In [None]:
import csv
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
from sklearn.neighbors import KNeighborsClassifier

# load the microchips dataset
with open("./resources/datasets/microchips.csv", 'r') as f:
    reader = csv.reader(f)
    data = list(reader)

# define global variables
X = np.array(data, dtype=float)
x1 = X[:, 0]
x2 = X[:, 1]
z = X[:, 2]

## Exercise 4: k-NN Classification using scikit-learn
Repeat Exercise 1 but this time using the KNeighborsClassifier from scikit-learn.  

**1.** Plot the original microchip data using different markers for the two classes OK and Fail. Faulty microchips are marked with the <span style="color:red">red</span> color and the <span style="color:green">green</span> one for the ok ones.

In [None]:
# scatter the points and show the plot
cmap = ListedColormap(['r', 'g'])
plt.scatter(x1, x2, c=z, edgecolors='k', cmap=cmap)
plt.show()

**2.** Implement and use k-NN to predict whether three unknown microchips are likely to be OK or Fail. The properties associated with the three unknown microchips are **(−0.3, 1.0)**, **(−0.5, −0, 1)**, and **(0.6, 0.0)**, and you should repeat the experiments for **k = 1, 3, 5, 7**. Hence, a total of
12 predictions. For example, for k = 5 the print out might look like:
```python
    k = 5
    chip1: [-0.3, 1.0] ==> Fail
    chip2: [-0.5, -0.1] ==> OK
    chip3: [0.6, 0.0] ==> OK
```

In [None]:
def predictLabels(n_neighbors, points):
    knn = KNeighborsClassifier(n_neighbors)
    knn.fit(X[:, :2], X[:, 2])
    labels = knn.predict(points)
    return labels

k_values = [1, 3, 5, 7]
chips = [[-0.3, 1.0], [-0.5, -0.1], [0.6, 0.0]]

for k in k_values:
    labels = predictLabels(k, chips)
    print(f"k = {k}")
    for i, chip in enumerate(chips):
        print(f"chip{i+1}: {chip} ==> {'OK' if labels[i] == 1.0 else 'Fail'}")
    print()

**3.** Display a 2 x 2 plot similar to Figure 1 showing the decision boundary and the training error for **k = 1, 3, 5, 7**

In [None]:
# create mesh grid for the plot
margin = 0.1
x_min, x_max = x1.min() - margin, x1.max() + margin
y_min, y_max = x2.min() - margin, x2.max() + margin

x_mesh, y_mesh = np.meshgrid(
    np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01)
)

mesh_points = np.c_[x_mesh.ravel(), y_mesh.ravel()]

fig, axs = plt.subplots(2, 2, figsize=(10, 10))
axs = axs.ravel()

for i, k in enumerate(k_values):
    mesh_labels = predictLabels(k, mesh_points)
    reshaped_labels = mesh_labels.reshape(x_mesh.shape)
    axs[i].pcolormesh(x_mesh, y_mesh, reshaped_labels, cmap=cmap)
    axs[i].scatter(x1, x2, c=z, edgecolors='k', cmap=cmap)
    axs[i].set_title(f"k = {k}")