In [7]:
# === NOTEBOOK & IMPORT SETUP ===
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import scipy
import random
random.seed(50)

pd.set_option("display.max_columns", None)
%matplotlib inline

In [8]:
from tensorflow.keras.datasets import fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

print(f"X_train.shape = {X_train.shape}") # 60,000 training images, 28x28 pixel arrays
print(f"y_train.shape = {y_train.shape}") # 60,000 labels for the training images
print(f"X_test.shape = {X_test.shape}") # 10,000 test images, 28x28 pixel arrays
print(f"y_test.shape = {y_test.shape}") # 10,000 test labels

X_train.shape = (60000, 28, 28)
y_train.shape = (60000,)
X_test.shape = (10000, 28, 28)
y_test.shape = (10000,)


In [9]:
num_features = 28 * 28

X_train = X_train.reshape(-1, num_features)
X_test = X_test.reshape(-1, num_features) # -1 means unspecified

print(f"X_train.shape = {X_train.shape}") # 60,000 images, 28x28 pixel arrays
print(f"y_train.shape = {y_train.shape}") # 60,000 labels
print(f"X_test.shape = {X_test.shape}") # 10,000 test images, 28x28 pixel arrays
print(f"y_test.shape = {y_test.shape}") # 10,000 test labels

X_train.shape = (60000, 784)
y_train.shape = (60000,)
X_test.shape = (10000, 784)
y_test.shape = (10000,)


In [10]:
num_classes = len(np.unique(y_train))
print(f"There are {num_classes} different classes")

# dictionary
# you can use this to convert a numerical label back into 
# the description of what it represents
label_mapping = {
    0: "T-shirt/top", 
    1: "Trouser", 
    2: "Pullover", 
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}

There are 10 different classes


In [11]:
X_train = np.c_[np.ones([len( X_train), 1]),  X_train]
X_test = np.c_[np.ones([len( X_test), 1]),  X_test]

from sklearn.model_selection import train_test_split
XX_train,validation_train, yy_train,validation_test = train_test_split( X_train, y_train, test_size=0.2)

In [None]:

### Helper functions
def oneHotter(y):
    n_classes = y.max() + 1
    m = len(y)
    Y_one_hot = np.zeros((m, n_classes))
    Y_one_hot[np.arange(m), y] = 1
    return Y_one_hot

def softmax(s):
  s -= np.max(s)
  sm = (np.exp(s).T / np.sum(np.exp(s),axis=1)).T
  return sm

def learning_schedule(t, t0=5, t1=50):
    return t0 / (t + t1)

def custom_softmax(X,y):
  m = X.shape[0]
  n = X.shape[1]
  k = len(np.unique(y))
  theta = np.random.randn(n,k)
  losses = []
  thetas = []
  grads = []
  g_bias = []
  g_theta = []

  y_hot = oneHotter(y)
  s = X@theta
  prob = softmax(s)
  gradient = (-1/m)*X.T@(y_hot-prob)
  theta = theta - gradient
  loss = (-1/m)*np.sum(y_hot * np.log(prob), axis=1)
  losses.append(loss)
  thetas.append(theta)
  grads.append(gradient)
  g_bias.append(gradients[0][0])
  g_theta.append(gradients[1][0])
