In [None]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv("data.csv")

In [None]:
data.head()

In [None]:
training_data = data.iloc[0:int(0.7*len(data))]
cv_data = data.iloc[int(0.7*len(data)):int(0.9*len(data))]
testing_data = data.iloc[int(0.9*len(data)):]

In [None]:
X_T = training_data.drop(['id','diagnosis',data.columns[32]],axis=1)

In [None]:
X_T.head()

In [None]:
X_dash_T = X_T - np.mean(X_T,axis=0)

In [None]:
X_dash_T = np.array(X_dash_T)

In [None]:
X_dash_T = X_dash_T/np.std(X_dash_T,axis=0)

In [None]:
X_dash_T.shape

In [None]:
sigma = np.cov(X_dash_T,rowvar=False)

In [None]:
sigma.shape

In [None]:
sigma

In [None]:
sigma_factors = np.linalg.svd(sigma)

In [None]:
sigma_factors[0].shape

In [None]:
Q = sigma_factors[0]

In [None]:
Q.shape

In [None]:
encoding_matrix = Q[:,0:3].T

In [None]:
encoding_matrix.shape

In [None]:
decoding_matrix = Q[:,0:3]

In [None]:
decoding_matrix.shape

In [None]:
np.linalg.norm(decoding_matrix,axis=0)

In [None]:
np.matmul(encoding_matrix,decoding_matrix)

In [None]:
neurons = [None,X_dash_T.shape[1],3,X_dash_T.shape[1]]
theta_hat_initial = [None,None,np.random.randn(neurons[1],neurons[2]),
                     np.random.randn(neurons[2],neurons[3])]
H = [None]*len(neurons)
activation = [None,None,"relu","relu"]
frwrd_dels = [None]*len(neurons)
backwrd_dels = [None]*len(neurons)

In [None]:
def G(theta_hat_l,Hl_minus1):
    return np.matmul(theta_hat_l.T,Hl_minus1)

In [None]:
def f(theta_hat_l,Hl_minus1,a):
    if a == "linear":
        return G(theta_hat_l,Hl_minus1)
    elif a == "sigmoid":
        return 1/(1 + np.exp(-G(theta_hat_l,Hl_minus1)))
    elif a == "softmax":
        layer_activation = np.array([np.exp(glc)/np.sum(np.exp(G(theta_hat_l,Hl_minus1)))
                        for glc in G(theta_hat_l,Hl_minus1)])
        return layer_activation.reshape(layer_activation.shape[0],1)
    elif a == "relu":
        return (G(theta_hat_l,Hl_minus1) > 0)*G(theta_hat_l,Hl_minus1)

In [None]:
def f_prime(theta_hat_l,Hl_minus1,a):
    if a == "linear":
        return np.ones_like(G(theta_hat_l,Hl_minus1))
    elif a == "sigmoid" or a == "softmax":
        return f(theta_hat_l,Hl_minus1,a)*(1-f(theta_hat_l,Hl_minus1,a))
    elif a == "relu":
        return f(theta_hat_l,Hl_minus1,a) > 0

In [None]:
def del_Hl(theta_hat_l,Hl_minus1,a):
    f_dash = f_prime(theta_hat_l,Hl_minus1,a)
    f_dash = f_dash.reshape(f_dash.shape[1],f_dash.shape[0],-1)
    Hl_minus1_T = Hl_minus1.T
    Hl_minus1_T = Hl_minus1_T.reshape(Hl_minus1_T.shape[0],-1,Hl_minus1_T.shape[1])
    return np.matmul(f_dash,Hl_minus1_T)

In [None]:
def del_J_del_Yi_hat(Yi_hat,gt_label,loss_fn):
    if loss_fn == "mse":
        return (Yi_hat - gt_label)
    elif Yi_hat.shape[1] == 1 and loss_fn == "cross entropy":
        return (Yi_hat - gt_label)/(Yi_hat * (1-Yi_hat))
    elif Yi_hat.shape[1] > 1 and loss_fn == "cross entropy":
        return -gt_label/Yi_hat

In [None]:
def del_J(theta_hat_lplus1,del_J_del_Hlplus1,Hl,a):
    f_dash = f_prime(theta_hat_lplus1,Hl,a)
    f_dash = f_dash.reshape(f_dash.shape[1],f_dash.shape[0],-1)
    J_dash = np.matmul(theta_hat_lplus1,(del_J_del_Hlplus1*f_dash))
    return J_dash

In [None]:
def J(X_dash,theta_hat_initial,gt_label,activations,loss_fn):
    
    layer_indices = list(range(len(neurons)))
    layer_outputs = [None]*len(layer_indices)
    
    layer_outputs[1] = X_dash
    for current_layer in layer_indices[2:]:
        layer_outputs[current_layer] = f(theta_hat_initial[current_layer],
                                         layer_outputs[current_layer-1],activations[current_layer])
        
    Y_hat = layer_outputs[current_layer]
    
    if loss_fn == "mse":
        return (1/2)*np.mean(np.linalg.norm(gt_label-Y_hat,axis=0))
    elif Y_hat.shape[0] == 1 and loss_fn == "cross entropy":
        return -(np.matmul(gt_label,np.log(Y_hat).T) + np.matmul((1-gt_label),np.log(1-Y_hat).T))/X_dash.shape[1]
    elif Y_hat.shape[0] > 1 and loss_fn == "cross entropy":
        return -np.mean(gt_label.T*np.log(Y_hat),axis=0)

In [None]:
epsilon = 10**(-2)
epochs = 10000
epoch_counter = 0
loss_fn_history = list()
layer_indices = list(range(0,len(neurons)))
batch_size = 398

# Going to code Backpropagation Algorithm

In [None]:
while epoch_counter < epochs:
    
    l = 1
    
    for i in range(X_dash_T.shape[0]//batch_size):
        random_index = np.random.choice(np.arange(X_dash_T.shape[0]),size=batch_size,replace=False)
        Xi_dash_T = X_dash_T[random_index,:]
        Xi_dash = Xi_dash_T.reshape(Xi_dash_T.shape[1],batch_size)
        H[l] = Xi_dash
        
        #The below loop is implementing Forward Propagation, computing Forward Pass Derivatives
        #as well as computing the output of all the hidden layers including output layer
        
        for current_layer in layer_indices[(l+1):]:
            frwrd_dels[current_layer] = del_Hl(theta_hat_initial[current_layer],H[current_layer-1],
                                              activation[current_layer])
            H[current_layer] = f(theta_hat_initial[current_layer],H[current_layer-1],
                                 activation[current_layer])
            
        #The two line code below is computing the Backward Pass Derivative of the loss function wrt 
        #output of the output layer (Yi_hat)
            
        backwrd_dels[current_layer] = del_J_del_Yi_hat(H[current_layer],Xi_dash,"mse")
        backwrd_dels[current_layer] = backwrd_dels[current_layer].reshape(batch_size,
                                                                          backwrd_dels[current_layer].shape[0],
                                                                          -1)
        
        l = current_layer
        
        #The below loop is implementing Backward Propagation, computing Backward Pass Derivatives of 
        #loss function wrt the output of all the hidden layers except output layer
        
        for current_layer in layer_indices[(l-1):1:-1]:
            backwrd_dels[current_layer] = del_J(theta_hat_initial[current_layer+1],
                                                backwrd_dels[current_layer+1],H[current_layer],
                                               activation[current_layer+1])
            
        l = 1
            
        
        #The below loop is implementing Backpropagation of Error computed by the loss function computed
        #at the end of Neural Network Output, by adjusting the connection weight matrices from their 
        #initial guess to the final guess, using Gradient Descent Algorithm. 
        
        for current_layer in layer_indices[(l+1):]:
            theta_hat_initial[current_layer] = theta_hat_initial[current_layer] - epsilon*np.mean(
                                                backwrd_dels[current_layer]*frwrd_dels[current_layer],axis=0).T
        
    epoch_counter = epoch_counter + 1
    print("Epoch number =",epoch_counter,"Loss Function =",J(X_dash_T.T,theta_hat_initial,
                                                                         X_dash_T.T,activation,"mse"))

In [None]:
trained_encoding_matrix = theta_hat_initial[2].T

In [None]:
trained_encoding_matrix.shape

In [None]:
trained_decoding_matrix = theta_hat_initial[3].T

In [None]:
trained_decoding_matrix.shape

In [None]:
np.matmul(trained_decoding_matrix,trained_encoding_matrix)

In [None]:
trained_decoding_matrix = trained_decoding_matrix/np.linalg.norm(trained_decoding_matrix,axis=0)

In [None]:
a = trained_decoding_matrix[:,0]
b = trained_decoding_matrix[:,1]

In [None]:
np.arccos(np.dot(a,b))