### 1. Import Libraries

In [11]:
import os
import sys
import warnings

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_openml
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

### 2. Import from mlcblab

In [12]:
from mlcvlab.models.nn4 import NN4
from mlcvlab.models.nn4GPU import NN4_GPU
from mlcvlab.nn.activations import relu, sigmoid, sigmoid_grad, relu_grad
from mlcvlab.nn.basis import linear, linear_grad
from mlcvlab.nn.batchnorm import BatchNorm
from mlcvlab.nn.dropout import dropout, dropout_grad
from mlcvlab.nn.losses import l2, l2_grad, cross_entropy, cross_entropy_grad
from mlcvlab.optim.adam import Adam
from mlcvlab.optim.sgd import SGD
from mlcvlab.optim.sync_sgd import sync_sgd, sync_sgd_gpu
# TODO: Import all the necessary code from mlcvlab package as you need... 

### 3. Set Seed

In [13]:
np.random.seed(42)

### 4. Helper functions

In [14]:
def load_dataset():
    x, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
    y = y.astype(np.int64)
    return x,y

def prepare_data(x, y):
    y = (y % 2 == 0)
    return x, y

def split_train_test(x,y):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=10000, random_state=25)
    return X_train, X_test, y_train, y_test

def minibatch(X_train,y_train,K):
    batches = X_train.shape[0] / K
    X_train_batches = np.array_split(X_train, batches)
    y_train_batches = np.array_split(y_train, batches)
    return X_train_batches, y_train_batches

def initialize_model():
    W0 = None
    W1 = None
    W2 = None
    W3 = None
    W0 = np.random.randn(785, 500) * np.sqrt(2/785)
    W1 = np.random.randn(500, 100) * np.sqrt(2/500)
    W2 = np.random.randn(100, 50) * np.sqrt(2/100)
    W3 = np.random.randn(50, 1) * np.sqrt(2/51)
    
    print(f"Size of W0 : {W0.shape}, Size of W1 : {W1.shape}, Size of W2 : {W2.shape}, Size of W3 : {W3.shape}")
    four_layer_nn  = NN4(True, 0.75)
    four_layer_nn.layers[0].W = W0
    four_layer_nn.layers[1].W = W1
    four_layer_nn.layers[2].W = W2
    four_layer_nn.layers[3].W = W3

    return four_layer_nn

def train_model(model, X_train_batches, y_train_batches):
    #Update both your models with final updated weights and return them
    model_async = sync_sgd(model, X_train_batches, y_train_batches, lr=0.01, mode='train')
    
    #return model_async, model_sync
    return model_async

def test_model(model, X_test, y_test):
    accuracy = None
    #TODO: Call model.nn4 to test model.
    total = 0
    for i in range(X_test.shape[0]):
        X_sample_test = X_test[i, :]
        X_sample_test = np.append(X_sample_test, -1)
        y_actual = y_test[i]
        y_hat = model.nn4(X_sample_test, mode='test')

        if y_hat[0] < 0.5:
            y_hat[0] = 0
        else:
            y_hat[0] = 1
             
        if y_hat == y_actual:
            total = total + 1

    accuracy = total/len(X_test)
    
    return accuracy

### 5. Run the program

In [15]:

#load data
x, y = load_dataset()

#prepare data
x, y = prepare_data(x,y)

# split data set
X_train, X_test, y_train, y_test = split_train_test(x,y)

#initialize model
model = initialize_model()

K = 100
X_train_batches, y_train_batches = minibatch(X_train,y_train,K)

#training model
# model_async, model_sync = train_model(model, X_train_batches, y_train_batches)
# print(f"Completed training, now testing...")
model_async = train_model(model, X_train_batches, y_train_batches)   

#testing model
# accuracy_async = test_model(model_async, X_test, y_test)
# print(f"Completed testing model using asynchronous SGD - Accuracy : {accuracy_async}")   

# accuracy_sync = test_model(model_sync, X_test, y_test)
# print(f"Completed testing model using synchronous SGD - Accuracy : {accuracy_sync}") 
accuracy = test_model(model_async, X_test, y_test)
print(f"Completed testing model - Accuracy : {accuracy}")  

Size of W0 : (785, 500), Size of W1 : (500, 100), Size of W2 : (100, 50), Size of W3 : (50, 1)
Starting SGD...
Iteration 1 completed with loss: 0.287308
Iteration 2 completed with loss: 0.325380
Iteration 3 completed with loss: 0.344122
Iteration 4 completed with loss: 0.298758
Iteration 5 completed with loss: 0.330383
Iteration 6 completed with loss: 0.277605
Iteration 7 completed with loss: 0.304946
Iteration 8 completed with loss: 0.277867
Iteration 9 completed with loss: 0.294481
Iteration 10 completed with loss: 0.288567
Completed training model - final W : [array([[ 2.50718687e-02, -6.97895234e-03,  3.26923682e-02, ...,
        -9.60742976e-03, -4.41972223e-02, -6.97974339e-02],
       [ 4.67492253e-02,  9.63786575e-02, -7.05933228e-02, ...,
         3.23468296e-02, -2.88305145e-02,  2.89013716e-02],
       [ 7.06330905e-02,  4.66712980e-02,  3.00986955e-03, ...,
         6.09928250e-02,  5.16900132e-02,  2.99080623e-02],
       ...,
       [ 4.11018115e-02,  1.30872926e-02,  1.7