This notebook generates a GIF that visualizes the training progress for each step

In [None]:
GIF_PATH = None
NUM_ITERATIONS = 50

In [None]:
import os
import tempfile
import imageio
import numpy as np
from tqdm import tqdm, tqdm_notebook
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
RANDOM_STATE = 48
from ml_with_numpy.logistic_regression import util
from ml_with_numpy.logistic_regression.logistic_regression import LogisticRegression

In [None]:
# generate a classification dataset
X, y = make_classification(n_samples=200, 
                           n_features=2, 
                           n_redundant=0, 
                           n_clusters_per_class=2, 
                           class_sep=1.5, 
                           flip_y=0.05,
                           random_state=RANDOM_STATE)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print("X_train.shape:", X_train.shape)
print("X_test.shape:", X_test.shape)
print("y_train.shape:", y_train.shape)
print("y_test.shape:", y_test.shape)

In [None]:
# Plot
fig = plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1); util.visualize_points(X_train, y_train, title='Train')
plt.subplot(1, 2, 2); util.visualize_points(X_test, y_test, title='Test')

In [None]:
# fit and train model
model = LogisticRegression(num_iterations=50, learning_rate=0.1, silent=False)
hist = model.fit(X_train, y_train, return_training_history=True)

In [None]:
def get_counter(i, n_slots):
    i = str(i)
    if len(i) > n_slots:
        raise ValueError("i too long")
    return "0" * (n_slots - len(i)) + i

In [None]:
# generate training gif
filenames = list()
x1, x2 = util.get_lim_range(X, axis=0, margin_ratio=0.1)
y_lim = util.get_lim_range(X, axis=1, margin_ratio=0.2)
with tempfile.TemporaryDirectory() as tempdir:
    for i, (w, b) in tqdm_notebook(enumerate(zip(*hist))):
        y1, y2 = util.get_points(x1, x2, w, b)
        # Plot
        fig = plt.figure()
        util.visualize_points(X_train, y_train, title="Iteration: {}".format(str(i)), x_lim=[x1, x2], y_lim=list(y_lim))
        util.visualize_line(x1, x2, y1, y2, color='g')
        filename = os.path.join(tempdir, get_counter(i, len(str(NUM_ITERATIONS)) + 1) + ".png")
        filenames.append(filename)
        fig.savefig(filename)
        plt.close()
    imageio.mimsave(GIF_PATH, [imageio.imread(f) for f in filenames])

In [None]:
# Results
y_train_pred = (model.predict(X_train).flatten() > 0.5).astype(np.int)
y_test_pred = (model.predict(X_test).flatten() > 0.5).astype(np.int)

print("Train Confusion Matrix:")
print(confusion_matrix(y_train, y_train_pred))
print("")
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))