# Dataset

In [None]:
from sklearn import datasets
digits = datasets.load_digits(as_frame=True)
df = digits.frame

In [None]:
df.info()

In [None]:
df = df.drop(df.loc[~df["target"].isin([2, 7])].index)
df.info()

In [None]:
import matplotlib.pyplot as plt

pixels = [col for col in df.columns if col.startswith("pixel")]
sample = df.sample(12)

fig, axs = plt.subplots(3, 4)
for ax, index in zip(axs.flat, sample.index):
    img = sample.loc[index, pixels].values.reshape((8, 8))
    lbl = sample.loc[index, "target"]
    ax.imshow(img, cmap="gray")
    ax.tick_params(
        left=False,
        bottom=False,
        labelleft=False,
        labelbottom=False)

# Features

In [None]:
import numpy as np

def symmetry(flat_pixels):
    img = flat_pixels.values.reshape((8, 8))
    return abs(img[:4,:] - np.flip(img[4:, :], axis=0)).mean()
    
df["symmetry"] = df[pixels].apply(symmetry, axis="columns")
df["intensity"] = df[pixels].mean(axis="columns")
df["label"] = (df["target"] == 2).astype(int)

df = df[["intensity", "symmetry", "label"]]
df.info()
df.sample(10)

In [None]:
def plot_data(ax, df):
    for name, data in df.groupby("label"):
        ax.scatter("intensity", "symmetry", data=data, label=name)
    ax.legend()
    ax.set(xlabel="Intensity", ylabel="Symmetry")
    return ax

fix, ax = plt.subplots()
plot_data(ax, df)

# Random Seed

In [None]:
np.random.seed(934545)

# Train-Val Split

In [None]:
df = df.sample(frac=1).reset_index(drop=True)
split = 260
train, val = df.iloc[:split, :], df.iloc[split:, :]

X_tr, Y_tr = train[["intensity", "symmetry"]], train["label"] 
X_val, Y_val = val[["intensity", "symmetry"]], val["label"]

# Plotting Utilities

In [None]:
import pandas as pd

def plot_decision_boundary(ax, m):
    x_min, x_max = df["intensity"].min() - 1, df["intensity"].max() + 1
    y_min, y_max = df["symmetry"].min() - 1, df["symmetry"].max() + 1
    xx, yy = np.meshgrid(
        np.arange(x_min, x_max, 0.01),
        np.arange(y_min, y_max, 0.01))
    dz = np.c_[xx.ravel(), yy.ravel()]
    _df = pd.DataFrame({
        "intensity": dz[:, 0],
        "symmetry": dz[:, 1]
    })
    
    Z = m.predict(_df).reshape(xx.shape)
    ax.contourf(xx, yy, Z, alpha=0.2)
    
    return ax

def plot_results(ax1, ax2, m, train, val):
    plot_decision_boundary(ax1, m)
    plot_decision_boundary(ax2, m)
    plot_data(ax1, train)
    plot_data(ax2, val)
    return ax1, ax2

# Model 1: A Simple Perceptron

In [None]:
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

model = Pipeline([
    ("scaler", StandardScaler()),
    ("perceptron", Perceptron(eta0=0.03))
])
model.fit(X_tr, Y_tr)

In [None]:
print("Model 1")
print("Train:", model.score(X_tr, Y_tr))
print("Val:", model.score(X_val, Y_val))
fig, (ax1, ax2) = plt.subplots(ncols=2)
plot_results(ax1, ax2, model, train, val)

In [None]:
model[1].coef_, model[1].intercept_

# Feature Tranfsorms: The Polynomial Case

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(2, include_bias=False)
X_tr[:5], poly.fit_transform(X_tr, Y_tr).round(2)[:5]

# Model 2: Adding Model Complexity

In [None]:
model2 = Pipeline([
    ("scaler", StandardScaler()),
    ("poly", PolynomialFeatures(2, include_bias=False)),
    ("perceptron", Perceptron(eta0=0.03))
])
model2.fit(X_tr, Y_tr)

In [None]:
print("Model 2")
print("Train:", model2.score(X_tr, Y_tr))
print("Val:", model2.score(X_val, Y_val))
fig, (ax1, ax2) = plt.subplots(ncols=2)
plot_results(ax1, ax2, model2, train, val)

# Model 3: Overfitting

In [None]:
model3 = Pipeline([
    ("scaler", StandardScaler()),
    ("poly", PolynomialFeatures(8, include_bias=False)),
    ("perceptron", Perceptron(eta0=0.03))
])
model3.fit(X_tr, Y_tr)

In [None]:
print("Model 3")
print("Train:", model3.score(X_tr, Y_tr))
print("Val:", model3.score(X_val, Y_val))
fig, (ax1, ax2) = plt.subplots(ncols=2)
plot_results(ax1, ax2, model3, train, val)

# Model 4: Regularization

In [None]:
model4 = Pipeline([
    ("scaler", StandardScaler()),
    ("poly", PolynomialFeatures(8, include_bias=False)),
    ("perceptron", Perceptron(eta0=0.03, penalty="elasticnet", alpha=1e-2))
])
model4.fit(X_tr, Y_tr)

In [None]:
print("Model 4")
print("Train:", model4.score(X_tr, Y_tr))
print("Val:", model4.score(X_val, Y_val))
fig, (ax1, ax2) = plt.subplots(ncols=2)
plot_results(ax1, ax2, model4, train, val)