In [24]:
import numpy as np
import matplotlib.pyplot as plt

from mytorch.nn.activation import ReLU, Softmax, Tanh, LinearActivation
from mytorch.nn.initialization import Xavier, He
from mytorch.nn.linear import Linear
from mytorch.nn.loss import CrossEntropyLoss, L2Loss
from mytorch.optim.optimizer import SGD, Adam
from models.mlp import MLP
import numpyNN

In [25]:
def train_mlp(mlp, x_train, y_train, opt_loss, opt_optim, num_epoch = 20):
    
    assert(x_train.shape[0]== y_train.shape[1]) # "x_train and y_train must have same length"
    index = np.arange(len(x_train))
    train_loss = []
    train_accuracy = [ ]
    opt_optim.initialize(mlp.get_parameters() )

    for epoch in range(num_epoch):
        np.random.shuffle(index)
        train_x = x_train[index]
        train_y = y_train[index]
        
        y_pred = mlp.forward(train_x)
        
        loss = opt_loss.forward(y_pred, train_y)
        train_loss.append(loss)

        predicted_labels = np.argmax(y_pred, axis=1)
        true_labels = np.argmax(y_train, axis=1)  # Adjust this line if train_y is not one-hot encoded
        accuracy = np.sum(predicted_labels == true_labels) / len(x_train)
        print(accuracy)
        
        dLdZ = opt_loss.backward(y_pred, train_y)
        mlp.backward(dLdZ)
        opt_optim.step()
        opt_optim.zero_grad()
        
        print(f"Epoch: {epoch}, Loss: {loss}, Accuracy: {accuracy}")
    
    train_logs = {"train_loss": train_loss, "train_accuracy": train_accuracy}
    return train_logs

In [3]:
def test_mlp(mlp, x_test, y_test, opt_loss, num_epoch = 20):
    """
    Parameters
    ----------
    Returns
    -------
        [0] Mean test loss.
        [1] Test accuracy.
    """
    assert(x_test.shape[0] == y_test.shape[0]) # "x_test and y_test must have same length"
    
    test_loss = []
    test_accuracy = [ ]
    
    for epoch in range(num_epoch):
        y_pred = mlp.forward(x_test)
        # y_label = np.argmax(y_pred, axis=1)
        loss = opt_loss.forward(y_pred, y_test)
        # y_pred = np.argmax(y_pred, axis=1)
        accuracy = np.sum(y_pred == y_test)/len(x_test)
        test_loss.append(loss)
        test_accuracy.append(accuracy)
        print(f"Epoch: {epoch}, Loss: {loss}, Accuracy: {accuracy}")
    
    test_logs = {"test_loss": loss, "test_accuracy": accuracy}
    return test_logs

In [4]:
# based on dataset.py from IML HW 6 
def one_hot_encoding(y, num_classes=2):
    one_hot = np.eye(num_classes)[y.astype(int).flatten()]
    return one_hot

In [5]:
x_train, y_train, x_test, y_test = numpyNN.sample_data(data_name = 'linear-separable',nTrain=2, nTest=2)

In [6]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((2, 2), (2, 1), (2, 2), (2, 1))

In [7]:
dim_in, dim_out = x_train.shape[1], 2
hidden_neuron_list = [4, 16]
activation_list = ['ReLU', 'ReLU', 'Softmax']
opt_init = 'xavier'
opt_loss = L2Loss()
mlp = MLP(dim_in, dim_out, hidden_neuron_list, activation_list, opt_init)
opt_optim = SGD(lr_decay=1, decay_iter=30)
opt_optim.initialize(mlp.get_parameters() )

In [8]:

print(mlp.summary())

Model Summary
-------------
Layer 1: Linear - Input Dim: 2, Output Dim: 4, Parameters: 12
Layer 2: ReLU
Layer 3: Linear - Input Dim: 4, Output Dim: 16, Parameters: 80
Layer 4: ReLU
Layer 5: Linear - Input Dim: 16, Output Dim: 2, Parameters: 34
Layer 6: Softmax
Total Parameters: 126
None


In [9]:
print(mlp.get_parameters())


[{'params': array([[-0.71929844,  0.74017452, -0.05278391,  0.6018215 ],
       [ 0.04095496,  0.35775906,  0.44126531,  0.16403958]]), 'grad': None}, {'params': array([[0., 0., 0., 0.]]), 'grad': None}, {'params': array([[ 0.04094032,  0.28329922, -0.43170659, -0.02891929, -0.3436057 ,
         0.25953086, -0.31050353, -0.39959847, -0.19264387, -0.38376196,
        -0.30418168, -0.12434509,  0.44102453, -0.05482704,  0.12385481,
         0.44075079],
       [-0.43896638,  0.51465005,  0.1677565 , -0.36050049, -0.15538652,
         0.27461291,  0.11812258, -0.19165116, -0.50562961,  0.14708986,
         0.50275373,  0.16737341,  0.14794958,  0.54257349,  0.08966254,
        -0.09380451],
       [-0.0277175 ,  0.13529854, -0.17745377,  0.19143158, -0.20024546,
         0.3049122 ,  0.49248041,  0.17803926, -0.53285558,  0.13457116,
         0.19023459,  0.51698985,  0.41429019,  0.01054298, -0.48669017,
        -0.0535024 ],
       [-0.52582717, -0.06385249,  0.52536094, -0.15397088, -0

In [10]:

y_pred = mlp.forward(x_train)
y_train = one_hot_encoding(y_train)
loss = opt_loss.forward(y_pred, y_train)

In [11]:
y_pred[1:4], y_train[1:4], loss[1:4]

(array([[0.38317163, 0.61682837]]), array([[1., 0.]]), array([0.38047723]))

In [12]:
predicted_labels = np.argmax(y_pred, axis=1)
true_labels = np.argmax(y_train, axis=1)  # Adjust this line if train_y is not one-hot encoded
accuracy = np.sum(predicted_labels == true_labels) / len(x_train)
print(accuracy)

0.0


In [13]:
print("Before backward:", mlp.layers[0].parameters[0]['params'].mean())

Before backward: 0.1967415731239063


In [14]:
dLdZ = opt_loss.backward(y_pred, y_train)
dLdZ[1:10]

array([[-0.61682837,  0.61682837]])

In [15]:
dLdZ.shape

(2, 2)

In [16]:
mlp.get_parameters()

[{'params': array([[-0.71929844,  0.74017452, -0.05278391,  0.6018215 ],
         [ 0.04095496,  0.35775906,  0.44126531,  0.16403958]]),
  'grad': None},
 {'params': array([[0., 0., 0., 0.]]), 'grad': None},
 {'params': array([[ 0.04094032,  0.28329922, -0.43170659, -0.02891929, -0.3436057 ,
           0.25953086, -0.31050353, -0.39959847, -0.19264387, -0.38376196,
          -0.30418168, -0.12434509,  0.44102453, -0.05482704,  0.12385481,
           0.44075079],
         [-0.43896638,  0.51465005,  0.1677565 , -0.36050049, -0.15538652,
           0.27461291,  0.11812258, -0.19165116, -0.50562961,  0.14708986,
           0.50275373,  0.16737341,  0.14794958,  0.54257349,  0.08966254,
          -0.09380451],
         [-0.0277175 ,  0.13529854, -0.17745377,  0.19143158, -0.20024546,
           0.3049122 ,  0.49248041,  0.17803926, -0.53285558,  0.13457116,
           0.19023459,  0.51698985,  0.41429019,  0.01054298, -0.48669017,
          -0.0535024 ],
         [-0.52582717, -0.06385249

In [17]:
mlp.backward(dLdZ)

array([[1.28921211, 0.96306395],
       [1.28921211, 0.96306395]])

In [18]:
mlp.get_parameters()

[{'params': array([[-0.71929844,  0.74017452, -0.05278391,  0.6018215 ],
         [ 0.04095496,  0.35775906,  0.44126531,  0.16403958]]),
  'grad': array([[0.        , 0.78397459, 0.78397459, 0.78397459],
         [0.        , 0.05325339, 0.05325339, 0.05325339]])},
 {'params': array([[0., 0., 0., 0.]]), 'grad': array([[0, 2, 2, 2]])},
 {'params': array([[ 0.04094032,  0.28329922, -0.43170659, -0.02891929, -0.3436057 ,
           0.25953086, -0.31050353, -0.39959847, -0.19264387, -0.38376196,
          -0.30418168, -0.12434509,  0.44102453, -0.05482704,  0.12385481,
           0.44075079],
         [-0.43896638,  0.51465005,  0.1677565 , -0.36050049, -0.15538652,
           0.27461291,  0.11812258, -0.19165116, -0.50562961,  0.14708986,
           0.50275373,  0.16737341,  0.14794958,  0.54257349,  0.08966254,
          -0.09380451],
         [-0.0277175 ,  0.13529854, -0.17745377,  0.19143158, -0.20024546,
           0.3049122 ,  0.49248041,  0.17803926, -0.53285558,  0.13457116,
    

In [19]:
print("Before optimization:", mlp.layers[0].parameters[0]['params'].mean())

Before optimization: 0.1967415731239063


In [20]:
opt_optim.step()

In [21]:
print("After optimization:", mlp.layers[0].parameters[0]['params'].mean())

After optimization: 0.1967415731239063


In [22]:
opt_optim.zero_grad()

In [23]:
mlp.get_parameters()

[{'params': array([[-0.71929844,  0.74017452, -0.05278391,  0.6018215 ],
         [ 0.04095496,  0.35775906,  0.44126531,  0.16403958]]),
  'grad': array([[0.        , 0.78397459, 0.78397459, 0.78397459],
         [0.        , 0.05325339, 0.05325339, 0.05325339]])},
 {'params': array([[0., 0., 0., 0.]]), 'grad': array([[0, 2, 2, 2]])},
 {'params': array([[ 0.04094032,  0.28329922, -0.43170659, -0.02891929, -0.3436057 ,
           0.25953086, -0.31050353, -0.39959847, -0.19264387, -0.38376196,
          -0.30418168, -0.12434509,  0.44102453, -0.05482704,  0.12385481,
           0.44075079],
         [-0.43896638,  0.51465005,  0.1677565 , -0.36050049, -0.15538652,
           0.27461291,  0.11812258, -0.19165116, -0.50562961,  0.14708986,
           0.50275373,  0.16737341,  0.14794958,  0.54257349,  0.08966254,
          -0.09380451],
         [-0.0277175 ,  0.13529854, -0.17745377,  0.19143158, -0.20024546,
           0.3049122 ,  0.49248041,  0.17803926, -0.53285558,  0.13457116,
    