In [None]:
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn import datasets

# from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report

In [None]:
RANDOM_SEED = 0x0

## Nearest neighbor

In [None]:
X, y = datasets.make_blobs(
  n_samples=1000,
  n_features=2,
  centers=np.array([
    [-2, -2],
    [2, 2]
  ]),
  cluster_std=2.5,
  shuffle=True,
  random_state=RANDOM_SEED,
)

In [None]:
df = pd.DataFrame(X, columns=['x1', 'x2',]) 
df['class'] = y
ax = sns.jointplot(data=df, x="x1", y="x2", hue="class", s=80)
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

In [None]:
clf = DummyClassifier(strategy='uniform')
clf.fit(X_train, y_train)
y_hat_test = clf.predict(X_test)
print(classification_report(y_test, y_hat_test))

In [None]:
clf = KNeighborsClassifier(n_neighbors=5, weights='distance')
clf.fit(X_train,y_train)

In [None]:
y_hat_test = clf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_hat_test, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()
plt.show()

In [None]:
print(classification_report(y_test, y_hat_test))

In [None]:
# Identify the decision boundary by running the model prediction on a grid
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = .02  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

In [None]:
# blue and orange are the first two elements in the seaborn default color palette
my_cmap = matplotlib.colors.ListedColormap(sns.color_palette().as_hex()[:2])

# Put the result into a color plot
Z = Z.reshape(xx.shape)

fig, ax = plt.subplots(figsize=[6, 6])

ax.pcolormesh(xx, yy, Z, cmap=my_cmap, alpha=0.2)

# Plot also the training points
scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='white', cmap=my_cmap, s=80)
legend = ax.legend(*scatter.legend_elements(), loc="upper right", title="Class", fontsize=14)
ax.add_artist(legend)

ax.set_xlabel('x1', fontsize=12)
ax.set_ylabel('x2', fontsize=12)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
# ax.set_xticks(())
# ax.set_yticks(())
ax.set_title('Nearest Neighbor', fontsize=18)
# ax.legend(loc=2, fontsize=14)

plt.show()

## Logistic regression

In [None]:
clf = LogisticRegression(penalty=None, fit_intercept=True, solver='lbfgs', max_iter=1000, multi_class='auto')
clf.fit(X_train,y_train)

In [None]:
y_hat_test = clf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_hat_test, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()
plt.show()

In [None]:
print(classification_report(y_test, y_hat_test))

In [None]:
# Identify the decision boundary by running the model prediction on a grid
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = .02  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

Plot the predictions on the grid together with the training data and the analytical solution for the decision boundary

In [None]:
# blue and orange are the first two elements in the seaborn default color palette
my_cmap = matplotlib.colors.ListedColormap(sns.color_palette().as_hex()[:2])

# Put the result into a color plot
Z = Z.reshape(xx.shape)

fig, ax = plt.subplots(figsize=[6, 6])

ax.pcolormesh(xx, yy, Z, cmap=my_cmap, alpha=0.2)

# Plot also the training points
scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='white', cmap=my_cmap, s=80)
legend = ax.legend(*scatter.legend_elements(), loc="upper right", title="Class", fontsize=14)
ax.add_artist(legend)

ax.set_xlabel('x1', fontsize=12)
ax.set_ylabel('x2', fontsize=12)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
# ax.set_xticks(())
# ax.set_yticks(())
ax.set_title('Logistic Regression', fontsize=18)
# ax.legend(loc=2, fontsize=14)

plt.show()

In [None]:
print(classification_report(y_test, y_hat_test))

## Softmax regression

In [None]:
X, y = datasets.make_blobs(
  n_samples=[400,400,200],
  n_features=2,
  centers=np.array([
    [0, -2],
    [2, 2],
    [-2, 2]
  ]),
  cluster_std=[1.5,1.5,2.5],
  shuffle=True,
  random_state=RANDOM_SEED,
)

In [None]:
sns.color_palette()
c_palette_3 = sns.color_palette().as_hex()[:3]

df = pd.DataFrame(X, columns=['x1', 'x2',]) 
df['class'] = y
ax = sns.jointplot(data=df, x="x1", y="x2", hue="class", palette=c_palette_3, s=80)
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

In [None]:
clf = LogisticRegression(penalty=None, fit_intercept=True, solver='lbfgs', max_iter=1000, multi_class='multinomial')
clf.fit(X_train, y_train)

In [None]:
y_hat_test = clf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_hat_test, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()
plt.show()

In [None]:
# Identify the decision boundary by running the model prediction on a grid
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = .02  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

In [None]:
# blue and orange are the first two elements in the seaborn default color palette
# c_palette_3 = sns.color_palette().as_hex()[:3]
my_cmap = matplotlib.colors.ListedColormap(c_palette_3)

# Put the predictions on the grid into a color plot
Z = Z.reshape(xx.shape)

fig, ax = plt.subplots(figsize=[6, 6])

ax.pcolormesh(xx, yy, Z, cmap=my_cmap, alpha=0.2) #plt.cm.Spectral)

# Plot also the training points
scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='white', cmap=my_cmap, s=80) #plt.cm.Spectral)
legend = ax.legend(*scatter.legend_elements(), loc="upper right", title="Expected Class", fontsize=14)
ax.add_artist(legend)

ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
ax.set_xlabel('x1', fontsize=18)
ax.set_ylabel('x2', fontsize=18)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())

ax.set_title('Softmax Regression on 3 Gaussian Clouds', fontsize=16)
# plt.savefig('pics/softmax_3Gaussians.png', dpi=360)
plt.show()

In [None]:
print(classification_report(y_test, y_hat_test))