### 1. Import Libraries

In [1]:
import os
import sys
import warnings

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_openml
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

### 2. Import from mlcblab

In [2]:
from mlcvlab.models.nn2 import NN2
from mlcvlab.nn.losses import l2
from mlcvlab.optim.sgd import SGD
from mlcvlab.optim.adam import Adam


### 3. Set Seed

In [3]:
np.random.seed(42)

### 4. Helper functions

In [4]:
def load_dataset():
    x, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
    y = np.array(y).astype(np.int64)
    return x,y

def prepare_data(x, y):
    y_binary = (y % 2 == 0)
    return x, y_binary

def split_train_test(x,y):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=10000, random_state=25)
    return X_train, X_test, y_train, y_test

def initialize_model(X_train, X_test, y_train, y_test, hidden_layer_neurons):
    input_layer_neurons = X_train.shape[1]
    layers_dims = [input_layer_neurons + 1, hidden_layer_neurons + 1, 1]
    W = []
    
    for l in range(1, len(layers_dims)):
        multiplier = np.sqrt(1/(layers_dims[l-1])) if l == 1 else np.sqrt(2/(layers_dims[l-1]))
        W.append(np.random.randn(layers_dims[l-1], layers_dims[l]) * multiplier)

    W0 = W[0]
    W1 = W[1]
    print(f"Size of W0 : {np.shape(W0)}, Size of W1 : {np.shape(W1)}")
    
    two_layer_nn = NN2()
    two_layer_nn.layers[0].W = W0
    two_layer_nn.layers[1].W = W1
    return two_layer_nn

def train_model(model, X_train, y_train, hidden_layer_neurons, lr=0.1, R=10):
    # final_W = SGD(model, X_train, y_train, lr, R)
    final_W = Adam(model, X_train, y_train, hidden_layer_neurons, R)
    return final_W

def test_model(model, X_test, y_test, final_W):
    model.W = final_W
    model.layers[0].W = final_W[0]
    model.layers[1].W = final_W[1]
    new_test_X = np.concatenate((np.transpose(X_test), -1*np.ones((1, len(X_test)))))
    y_hat = model.nn2(new_test_X)
    y_hat = ((y_hat.T) >= 0.5) * 1
    total = np.sum(y_hat == y_test)
    return total / len(X_test)

### 5. Run the program

In [5]:

#load data
x, y = load_dataset()

#prepare data
x, y = prepare_data(x,y)

# split data set
X_train, X_test, y_train, y_test = split_train_test(x,y)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.0
X_test /= 255.0
y_train = np.array([y_train]).T
y_test = np.array([y_test]).T

SGD_Neurons = 500
ADAM_Neurons = 400

#initialize model
# model = initialize_model(X_train, X_test, y_train, y_test, SGD_Neurons)
model = initialize_model(X_train, X_test, y_train, y_test, ADAM_Neurons)

#training model
# final_W = train_model(model, X_train, y_train, SGD_Neurons, lr=0.1, R=10)
final_W = train_model(model, X_train, y_train, ADAM_Neurons, R=10)
print(f"Completed training model - final W : {final_W}")

#testing model
accuracy = test_model(model, X_test, y_test, final_W)
print(f"Completed testing model - Accuracy : {accuracy}")    

Size of W0 : (785, 401), Size of W1 : (401, 1)
ADAM optimizer starts here:
R: 1 started
127.10618970510403
R: 1 completed
R: 2 started
173.96311642986538
R: 2 completed
R: 3 started
133.87924633871935
R: 3 completed
R: 4 started
136.45311337046604
R: 4 completed
R: 5 started
141.78693990360486
R: 5 completed
R: 6 started
142.32365960871397
R: 6 completed
R: 7 started
142.8078223025279
R: 7 completed
R: 8 started
141.9103403823493
R: 8 completed
R: 9 started
139.60724150710226
R: 9 completed
R: 10 started
136.53379234590037
R: 10 completed
Completed training model - final W : [array([[ 0.01772849, -0.00493486,  0.023117  , ..., -0.0040881 ,
         0.04417956, -0.05690756],
       [-0.02139261,  0.00018716,  0.00167681, ...,  0.00106204,
         0.03348879, -0.01841843],
       [ 0.0034307 , -0.01649931, -0.01550784, ...,  0.00446945,
        -0.01532614,  0.00436499],
       ...,
       [-0.02722255, -0.01179598,  0.01500827, ...,  0.04128264,
        -0.00623308,  0.03275561],
     