In [2]:
from numpyNN import *
from torchUtil import *
import torch
import torch.nn as nn 
import torch.functional as F
from torch.utils.data import DataLoader, Dataset, random_split

# Q1 

In [17]:
class Model(nn.Module):
    def __init__(
            self, 
            inputDim, 
            outputDim, 
            hiddenLayers=[128, 128], 
            opt_act="ReLU", 
            opt_init="xavier"
            ):
        super(Model, self).__init__()

        self.layers = [] # storing the model struture
        self.opt_act = opt_act.lower()
        self.opt_init = opt_init.lower()
        self.gain = nn.init.calculate_gain(self.opt_act)# calculate the gain for parameters initialization 
        
        for i in range(len(hiddenLayers)+1):
            if i == 0:
                layer = nn.Linear(inputDim, hiddenLayers[0])
                
            elif i == len(hiddenLayers):
                layer = nn.Linear(hiddenLayers[-1], outputDim)
                self._init_weights(layer)
                self.layers.append(layer)
                break 
            else:
                layer = nn.Linear(hiddenLayers[i-1], hiddenLayers[i])
                

            self._init_weights(layer) # apply initialization
            self.layers.append(layer) # append layer
                


            # Activations 
            if opt_act == "ReLU":
                self.layers.append(nn.ReLU())

            elif opt_act == "Sigmoid":
                self.layers.append(nn.Sigmoid())
            elif opt_act == "tanh":
                self.layers.append(nn.Tanh())
            elif opt_act == "Linear":
                pass
            else:
                self.layers.append(nn.ReLU())
                print("Invalid activation, automatically used ReLU as default")  


        self.model = nn.Sequential(*self.layers)

    # Parameters initialization 
    def _init_weights(self, layer):
        if self.opt_init == "xavier": # xavier initialization 
            nn.init.xavier_uniform_(layer.weight, gain=self.gain)
        elif self.opt_init == "he" or self.opt_init == "kaiming": # he initilaization 
            nn.init.kaiming_uniform_(layer.weight)
        if layer.bias is not None:
                nn.init.zeros_(layer.bias)
        

    def forward(self, x):
        return self.model(x)

In [18]:
model = Model(10, 2, [50,50])
print(model)

Model(
  (model): Sequential(
    (0): Linear(in_features=10, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=2, bias=True)
  )
)


In [19]:
import numpy as np
import matplotlib.pyplot as plt
import random


In [66]:

class MLP():
    def __init__(self, 
            structures,
            opt_act="ReLU", 
            opt_init="xavier",
            seed = 0
            ):
        """_summary_

        Args:
            structures (iterable): MLP struture in orderly fashion (including input and output)
            opt_act (str, optional): Activation function. Daults to "ReLU".
            opt_init (str, optional): Initialization methods. Defaults to "xavier".
            seed (int, optional): Random seed. Defaults to 0.
        """
        self.structures = structures
        self.weights = dict() # idx = order of layer - 1
        self.biases = dict()
        self.model = {"W":self.weights, "b": self.biases} # the entire model structure
        self.opt_act = opt_act.lower()
        self.opt_init = opt_init.lower()
        self.seed = seed
        self.cache = dict() # store the forward pass values
        
    # model training 
    def train(self, training_data, num_epoch, opt_loss, opt_optim):
        if len(self.weights) == 0:
            self.initialize_mlp()
        

    
    # forward pass 
    def forward(self, x):
        """_summary_

        Args:
            x (numpy array): input data
        """
        for i in range(len(self.weights)): 
            w, b = self.weights[str(i)], self.biases[str(i)]
            u = np.dot(w,x) + b
            if i == len(self.weights)-1: # skip the last layer (linear at the last layer)
                y = u
                self.cache[str(i)] = y
                break
            if self.opt_act == "relu":
                y = self.ReLU(u)
            elif self.opt_act == "tanh":
                y = self.tanh(u)
            elif self.opt_act == "sigmoid":
                y = self.Sigmoid(u)
            elif self.opt_act == "linear":
                pass
            else:
                print("Invalid activation, automatically use ReLU initialization")
                y = self.ReLU(u)

            self.cache[str(i)] = y

        return self.cache[str(len(self.weights)-1)] # return the values of final output layer

    
    # back propagation 
    def backwards(self):
        pass

    
    
    
    
    # loss functions 
    def l2_loss(self,pred, label):
        """_summary_

        Args:
            pred (numpy array): model predictions
            label (numpy array): ground truth labels
        """
        pred = pred.reshape((pred.shape[0], -1))
        label = label.reshape((label.shape[0], -1))
        loss = np.sum(np.square(label-pred))
        return loss
    
    def cross_entropy(self,pred, label):
        pred = self.softmax(pred).reshape((pred.shape[0], -1)) # apply softmax function
        label = label.reshape((label.shape[0], -1))
        loss = -np.sum(label * np.log(pred))
        return loss

            

    
    # model parameters initialization 
    def initialize_mlp(self):
        np.random.seed(self.seed)
        for i in range(len(self.structures)-1):
            self.weights[str(i)], self.biases[str(i)] = self._init_single_layer(self.structures[i], self.structures[i+1])
        print("Successfully initilaize MLP")
        return self.model
    
    # parameters initialization of a single layer perceptrons
    def _init_single_layer(self, in_shape, out_shape):
        if self.opt_init == "xavier":
            weight = np.random.randn(out_shape, in_shape)*np.sqrt(1. / in_shape)
        elif self.opt_init == "he" or self.opt_init == "kaiming":
            weight = np.random.randn(out_shape, in_shape)*np.sqrt(2. / in_shape)
        else:
            print("Invalid initialization method, automatically use xavier initialization")
            weight = np.random.randn(out_shape, in_shape)*np.sqrt(1. / in_shape)
        bias = np.zeros((out_shape,1))
        return weight, bias
    



        



    
    # Activation functions
    def ReLU(self, x):
        return np.maximum(0, x)
    def tanh(self,x):
        return np.tanh(x)
    def Sigmoid(self,x):
        return 1./(1.+np.exp(-x))
    def Linear(self,x):
        return x
    def softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)
    




In [67]:
model = MLP([2,10,5])

In [68]:
mlp = model.initialize_mlp()
print(mlp)

Successfully initilaize MLP
{'W': {'0': array([[ 1.24737338,  0.28295388],
       [ 0.69207227,  1.58455078],
       [ 1.32056292, -0.69103982],
       [ 0.67181396, -0.10702571],
       [-0.07298675,  0.29033699],
       [ 0.10185419,  1.02832666],
       [ 0.53813494,  0.08603723],
       [ 0.3138587 ,  0.23594338],
       [ 1.05647344, -0.1450688 ],
       [ 0.22137229, -0.60393689]]), '1': array([[-0.80732627,  0.20669235,  0.27335873, -0.23469319,  0.71775943,
        -0.45991081,  0.01447011, -0.05919273,  0.48470735,  0.46465204],
       [ 0.04899868,  0.11958549, -0.2807425 , -0.62638284, -0.11001948,
         0.04944189,  0.38905207,  0.38022589, -0.12248349, -0.09559652],
       [-0.33158156, -0.4490491 , -0.53957001,  0.61688935, -0.16116617,
        -0.13853126, -0.39616868,  0.24586404, -0.51035931, -0.06727438],
       [-0.28317139,  0.12234931, -0.16153077, -0.37334868, -0.008912  ,
         0.13545043,  0.02103459,  0.09565001, -0.20059026, -0.11470883],
       [-0.2126

In [60]:
a = {str(i):[] for i in range(3)}
a is None

False

In [62]:
b = dict()
len(b)

0