In [9]:
import pandas as pd
import numpy as np
import torch

In [12]:
cong_voting = pd.read_csv('./preprocessed-datasets/CongressionVoting_prepro.csv')

label2num = {"democrat": 0, "republican": 1}
num2label = {0: "democrat", 1: "republican"}

# convert the target label to numeric
cong_voting["class"] = cong_voting["class"].apply(lambda x: label2num[x])

In [13]:
cong_voting.head()

Unnamed: 0,ID,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,class
0,140,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0
1,383,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0
2,201,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0
3,297,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1
4,309,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1


In [4]:
tt = torch.randn(2, 3)
tt

tensor([[-1.0276,  0.4698,  0.4415],
        [-0.0025, -0.3821, -2.0435]])

In [14]:
torch.tensor(cong_voting.drop(["class","ID"], axis=1).values)

tensor([[1., 0., 1.,  ..., 0., 1., 1.],
        [1., 1., 0.,  ..., 1., 0., 1.],
        [0., 0., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 0., 0.,  ..., 1., 1., 1.],
        [0., 0., 1.,  ..., 1., 0., 1.],
        [1., 1., 1.,  ..., 1., 0., 1.]], dtype=torch.float64)

In [15]:
pred = torch.randn(2, 3)
y_true = torch.randn(2, 3)
print(pred)
print(y_true)
torch.nn.MSELoss()(pred, y_true)

tensor([[-0.8636,  1.3856,  0.0692],
        [-0.7670, -0.3111, -0.3322]])
tensor([[ 1.3987,  0.2334, -2.1896],
        [ 1.5004,  0.7900, -1.3502]])


tensor(3.1562)

# Implement a Neural Network Class

Implementation is based on the code: https://github.com/enesozeren/machine_learning_from_scratch/blob/main/neural_networks/deep_feedforward_neural_network_model.py
From the page: https://medium.com/@enozeren/building-a-neural-network-from-scratch-with-python-905e20553b53


Naming convention on the parameters:
* b - bias (vector)
* W - weight (matrix)
* NV - node value
* CV - calculated value (that is the input for the activation function)
* AV - activated value

In [24]:
class NN:
    def __init__(self, input_size: int, layer_dims, output_size: int, epoch: int, learning_rate: float, mini_batch_size: int):
        self.input_size = input_size
        self.layer_dims = layer_dims
        self.output_size = output_size
        self.layers = []
        self.epoch = epoch
        self.learning_rate = learning_rate
        self.mini_batch_size = mini_batch_size
        self.parameters = self.init_param()
        self.loss_training = []
        
    def fit(self, X, Y):
        for epoch_i in range(self.epoch):
            for batch_j in range(0, X.shape[1], self.mini_batch_size):
                # Create the batch
                if batch_j == int(X.shape[1]/self.mini_batch_size): # the last batch goes to the end
                    # is padding not required????
                    X_mini = X[:,batch_j*self.mini_batch_size:]
                    Y_mini = Y[:,batch_j*self.mini_batch_size:]
                X_mini = X[:,batch_j*self.mini_batch_size:(batch_j+1)*self.mini_batch_size]
                Y_mini = Y[:,batch_j*self.mini_batch_size:(batch_j+1)*self.mini_batch_size]
                
                
                forward_var = self.forward_forprop(X_mini)
                predictions = forward_var["AV"+str(len(self.layer_dims)-1)] # the activation value 
                
                # calculate the loss
                loss_val = self.loss(predictions, Y_mini)
                self.loss_training.append(loss_val)                
        print(f"Losses: {self.loss_training}")        
        
        
    def init_param(self):
        param = {}
        for i in range(1,len(self.layer_dims)):
            # define the weight
            # the sqrt part is due to the so called HE initialization, that is required for ReLU activation function
            # Vanishing gradient: it helps to overcome the convergence problem, that occurs for deeper networks
            # https://medium.com/@shauryagoel/kaiming-he-initialization-a8d9ed0b5899
            
            param['W'+str(i)] = torch.randn(self.layer_dims[i], self.layer_dims[i-1])*np.sqrt(2/self.layer_dims[i-1]) # not sure why is this in the end with the sqrt
            # define the bias
            param['b'+str(i)] = torch.randn(self.layer_dims[i], 1)
            
        return param
    
    def forward_forprop(self, batch_X):
        # forward propogation
        forward_var = {"NV0": batch_X}
        for l_i in range(1,len(self.layer_dims)):
            # calculate the node value
            forward_var["NV"+str(l_i)] = torch.mm(self.parameters["W"+str(l_i)], forward_var["NV"+str(l_i-1)]) + self.parameters["b"+str(l_i)]
            # calculate the activation value
            # The activation is chosen based on which layer it is
            # https://towardsdatascience.com/activation-functions-neural-networks-1cbd9f8d91d6
            # In the hidden layers, we use ReLU 
            # In the output layer, we use sigmoid
            if l_i == len(self.layer_dims)-1:
                forward_var["AV"+str(l_i)] = torch.sigmoid(forward_var["NV"+str(l_i)])
            else:
                forward_var["AV"+str(l_i)] = torch.relu(forward_var["NV"+str(l_i)])
        return forward_var
    
    def loss(self, predictions, batch_Y):
        # calculate the loss using log loss
        # https://towardsdatascience.com/intuition-behind-log-loss-score-4e0c9979680a
        loss_val = torch.mean(torch.sum(-(batch_Y*torch.log(predictions) + (1-batch_Y)*torch.log(1-predictions)), axis=0))
        # loss_val = torch.nn.MSELoss()(predictions, batch_Y)
        return loss_val
    
    def backward_prop(self, parameters, foward_vars, Y):
        
        

# Training the model

In [17]:
cong_voting.head()

Unnamed: 0,ID,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,class
0,140,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0
1,383,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0
2,201,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0
3,297,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1
4,309,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1


In [25]:
# Model training loop
num_epochs = 5
def train_test_split(data: pd.DataFrame, target_label : str, test_size=0.2, return_torch=True):
        
    # split the data into train and test
    train = data.sample(frac=(1-test_size),random_state=200)
    test = data.drop(train.index)
    
    # split the train and test into X and Y
    train_X = train.drop([target_label], axis=1).values
    train_Y = train[target_label].values
    test_X = test.drop([target_label], axis=1).values
    test_Y = test[target_label].values
    
    if return_torch:
        train_X = torch.tensor(train_X)
        train_Y = torch.tensor(train_Y)
        test_X = torch.tensor(test_X)
        test_Y = torch.tensor(test_Y)
    
    return train_X, train_Y, test_X, test_Y

train_X, train_Y, test_X, test_Y = train_test_split(cong_voting, "class")


In [26]:
# self, input_size: int, layer_dims, output_size: int, epoch: int, learning_rate: float, mini_batch_size: int
test_NN = NN(input_size=train_X.shape[0], layer_dims=[10, 5, 1], output_size=1, epoch=5, learning_rate=0.01, mini_batch_size=10)

Check the randn dimensions: 5 4.47213595499958
Check the randn dimensions: 1 3.1622776601683795
