### 1. Import Libraries

In [3]:
import os
import sys
import warnings

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_openml
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

### 2. Import from mlcblab

In [4]:
from mlcvlab.models.nn4 import NN4
from mlcvlab.nn.activations import relu, sigmoid, sigmoid_grad, relu_grad
from mlcvlab.nn.basis import linear, linear_grad
from mlcvlab.nn.batchnorm import BatchNorm
from mlcvlab.nn.dropout import dropout, dropout_grad
from mlcvlab.nn.losses import l2, l2_grad, cross_entropy, cross_entropy_grad
from mlcvlab.optim.adam import Adam
from mlcvlab.optim.sgd import SGD
from mlcvlab.optim.sync_sgd import sync_sgd
# TODO: Import all the necessary code from mlcvlab package as you need...

### 3. Set Seed

In [5]:
np.random.seed(42)

### 4. Helper functions

In [6]:
def load_dataset():
    x, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
    y = y.astype(np.int64)
    return x,y

def prepare_data(x, y):
    y = (y % 2 == 0)
    return x, y

def split_train_test(x,y):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=10000, random_state=25)
    return X_train, X_test, y_train, y_test

def minibatch(X_train,y_train,K):
    #TODO
    # Batch Size: K
    # X_train_batches, y_train_batches should be a list of lists of size K.
    batches = X_train.shape[0] // K
    X_train_batches = np.array_split(X_train, batches)
    y_train_batches = np.array_split(y_train, batches)
    return X_train_batches, y_train_batches

def initialize_model():
    #TODO (Can use the similar approach used in HW1)
    # e.g. He Initialization for W0-W2, Xavier Initialization for W3
    # Also, initialize your model with a dropout parameter of 0.25 and use_batchnorm being true.
    W0 = np.random.randn(785, 500) * np.sqrt(2/785)
    W1 = np.random.randn(500, 100) * np.sqrt(2/500)
    W2 = np.random.randn(100, 50) * np.sqrt(2/100)
    W3 = np.random.randn(50, 1) * np.sqrt(2/50)
    print(f"Size of W0 : {W0.shape}, Size of W1 : {W1.shape}, Size of W2 : {W2.shape}, Size of W3 : {W3.shape}")
    four_layer_nn  = NN4(True, 0.25)
    four_layer_nn.layers[0].W = W0
    four_layer_nn.layers[1].W = W1
    four_layer_nn.layers[2].W = W2
    four_layer_nn.layers[3].W = W3

    return four_layer_nn

def train_model(model, X_train_batches, y_train_batches):
    #TODO : Call async_SGD and sync_SGD to train two versions of the same model. Compare their outcomes and runtime.
    #Update both your models with final updated weights and return them
    model_async = sync_sgd(model, X_train_batches, y_train_batches, lr=0.01, mode='train')
    return model_async

def test_model(model, X_test, y_test):
    accuracy = None
    #TODO: Call model.nn4 to test model.
    total = 0
    for i in range(X_test.shape[0]):
        X_sample_test = np.append(X_test[i, :], 1)
        y_actual = y_test[i]
        y_hat = model.nn4(X_sample_test, mode='test')

        predicted_class = 1 if y_hat >= 0.5 else 0

        if predicted_class == y_actual:
            total += 1

    accuracy = total / len(X_test)

    return accuracy

### 5. Run the program

In [7]:

#load data
x, y = load_dataset()

#prepare data
x, y = prepare_data(x,y)

# split data set
X_train, X_test, y_train, y_test = split_train_test(x,y)

#initialize model
model = initialize_model()

K = 100
X_train_batches, y_train_batches = minibatch(X_train,y_train,K)

#training model
model_async = train_model(model, X_train_batches, y_train_batches)

#testing model
accuracy = test_model(model_async, X_test, y_test)
print(f"Completed testing model - Accuracy : {accuracy}")

  warn(


Size of W0 : (785, 500), Size of W1 : (500, 100), Size of W2 : (100, 50), Size of W3 : (50, 1)
Starting SGD...
Iteration 1 completed with loss: 0.287700
Iteration 2 completed with loss: 0.325896
Iteration 3 completed with loss: 0.344710
Iteration 4 completed with loss: 0.299171
Iteration 5 completed with loss: 0.329399
Iteration 6 completed with loss: 0.279759
Iteration 7 completed with loss: 0.305589
Iteration 8 completed with loss: 0.279359
Iteration 9 completed with loss: 0.296616
Iteration 10 completed with loss: 0.288702
Iteration 11 completed with loss: 0.258477
Iteration 12 completed with loss: 0.259837
Iteration 13 completed with loss: 0.230985
Iteration 14 completed with loss: 0.248296
Iteration 15 completed with loss: 0.281530
Iteration 16 completed with loss: 0.253921
Iteration 17 completed with loss: 0.292131
Iteration 18 completed with loss: 0.274164
Iteration 19 completed with loss: 0.260414
Iteration 20 completed with loss: 0.253582
Iteration 21 completed with loss: 0.27