### Importing Packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from matplotlib.colors import ListedColormap

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import metrics


### Loading Data and plotting data

In [None]:
#loading dataset
iris = datasets.load_iris()

# Plotting
X_plot = iris.data[:,:2]
Y_plot = iris.target

cmap_light = ListedColormap(["orange", "cyan", "cornflowerblue"])
cmap_bold = ["darkorange", "c", "darkblue"]

fig, ax = plt.subplots()
plt.title('Iris Dataset')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
sns.scatterplot(
    x = X_plot[:,0],y = X_plot[:,1],
    hue = iris.target_names[Y_plot],palette = cmap_bold, alpha = 1.0,
    edgecolor = "black"
)
plt.show()

## Logistic Regression

In [None]:
# Splitting data into training and testing
X_train, X_test, y_train, y_test = train_test_split(iris.data[:,:2],iris.target,test_size = 0.2, random_state = 25 )

logisticRegr = LogisticRegression()
logisticRegr.fit(X_train,y_train)

y_pred = logisticRegr.predict(X_test)

acc = logisticRegr.score(X_test,y_test)

print('Accuracy: {:.2f}'.format(logisticRegr.score(X_test,y_test)))




### Plotting result

In [None]:

h = 0.02
x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logisticRegr.predict(np.c_[xx.ravel(), yy.ravel()])

Z = Z.reshape(xx.shape)
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, cmap=cmap_light)

sns.scatterplot(
    x = X_test[:,0],y = X_test[:,1],
    hue = iris.target_names[y_test],palette = cmap_bold, alpha = 1.0,
    edgecolor = "black"
)


plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])

plt.title(' Classification with Logistic Regression (acc = %1.2f)' %acc)




### Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(y_test,y_pred)

plt.figure(figsize=(9,9))
sns.heatmap(conf_mat, annot=True, fmt="d", linewidths=.5, square = True, cmap = 'Greens')
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: %.2f' % acc
plt.title(all_sample_title, size = 15);
plt.show();

## KNN

In [4]:
# Function that takes n = nr of neighbors and w = ['uniform','distance'] (method) and trains a KNN model
# Returns the accuracy on the test set and produces a plot with decision boundaries
def knn_class(n,w):
  knn = KNeighborsClassifier(n_neighbors=n,weights = w)
  knn.fit(X_train,y_train)
  y_pred_knn = knn.predict(X_test)
  acc = metrics.accuracy_score(y_test,y_pred_knn)
  print("Accuracy:",acc)

  h = 0.02
  cmap_light = ListedColormap(["orange", "cyan", "cornflowerblue"])
  cmap_bold = ["darkorange", "c", "darkblue"]

  x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
  y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
  xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
  Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])

  Z = Z.reshape(xx.shape)
  plt.figure(figsize=(8, 6))
  plt.contourf(xx, yy, Z, cmap=cmap_light)

  sns.scatterplot(
    x = X_test[:,0],y = X_test[:,1],
    hue = iris.target_names[y_test],palette = cmap_bold, alpha = 1.0,
    edgecolor = "black"
  )

  plt.xlabel(iris.feature_names[0])
  plt.ylabel(iris.feature_names[1])

  plt.title(f'Classification with KNN ({w},k = {n}), acc = {acc:.2f}')

  plt.show()

  return y_pred_knn


 

In [5]:
# Function that takes n = nr of neighbors and w = ['uniform','distance'] (method) and trains a KNN model
# returns the accuracy on the test set
def knn_acc(n,w):
  knn = KNeighborsClassifier(n_neighbors=n,weights = w)
  knn.fit(X_train,y_train)
  y_pred_knn = knn.predict(X_test)
  acc = metrics.accuracy_score(y_test,y_pred_knn)
  return acc


accuracy_uniform = np.zeros((len(X_train),2))
accuracy_distance = np.zeros((len(X_train),2))
# for loop that applies the knn_acc function to calculate accuracy for different values of n
for i in range(1,len(X_train)):
  accuracy_uniform[i][0] = knn_acc(i,'uniform')
  accuracy_distance[i][0] = knn_acc(i,'distance')
  accuracy_uniform[i][1] = i
  accuracy_distance[i][1] = i

# Number of neighbors that produces the best accuracy
bestk_uniform = 25
bestk_distance = 8



In [None]:
# Producing a plot with decision boundaries for KNN (uniform)
knn_class(bestk_uniform,'uniform')


In [None]:
# Producing a plot with decision boundaries for KNN (distance)
knn_class(bestk_distance,'distance')

### Plotting Neighbors vs Accuracy

In [None]:

x = np.array(range(1,len(X_train)+1))

fig, ax = plt.subplots()
plt.plot(x,accuracy_uniform, label = 'Uniform')
plt.plot(x,accuracy_distance, label = 'Distance')

plt.xlabel('Neighbors')
plt.ylabel('Accuracy')
plt.title('Accuracy plot using KNN')
plt.legend(loc = 'center')

plt.ylim((0,1.0))

### Confusion matrices

In [None]:
y_uni = knn_class(bestk_uniform,'uniform')

conf_mat_uniform = confusion_matrix(y_test,y_uni)

plt.figure(figsize=(9,9))
sns.heatmap(conf_mat_uniform, annot=True, fmt="d", linewidths=.5, square = True, cmap = 'Greens')
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'KNN Uniform,Accuracy Score: %.2f' % 0.93
plt.title(all_sample_title, size = 15);
plt.show();

In [None]:
y_dist = knn_class(bestk_distance,'distance')

conf_mat_uniform = confusion_matrix(y_test,y_dist)

plt.figure(figsize=(9,9))
sns.heatmap(conf_mat_uniform, annot=True, fmt="d", linewidths=.5, square = True, cmap = 'Greens')
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'KNN Distance,Accuracy Score: %.2f' % 0.87
plt.title(all_sample_title, size = 15);
plt.show();