In [None]:
pip install pandas # type: ignore

In [21]:
import pandas as pd
import numpy as np

Data Import

In [22]:
df_train = pd.read_csv('/Users/harshsiddharthmalgatte/Desktop/PClub_ML/Lattice_Physics_Model/raw.csv')
df_test = pd.read_csv('/Users/harshsiddharthmalgatte/Desktop/PClub_ML/Lattice_Physics_Model/test.csv')

Data Processing

In [23]:
data_train = np.array(df_train)
X_train = []
y_train = []
for arr in data_train:
    arr_ = str(arr).removeprefix("['").removesuffix("']").split()
    arr_ = [float(i) for i in arr_]
    X_train.append(arr_[2:])
    y_train.append(arr_[0:2])
X_train = np.array(X_train).T
y_train = np.array(y_train).T

In [24]:
data_test = np.array(df_test)
X_test = []
y_test = []
for arr in data_test:
    arr_ = str(arr).removeprefix("['").removesuffix("']").split()
    arr_ = [float(i) for i in arr_]
    X_test.append(arr_[2:])
    y_test.append(arr_[0:2])
X_test = np.array(X_test,dtype=np.float64).T
y_test = np.array(y_test,dtype=np.float64).T    

In [25]:
m,n = X_train.shape

Forward and Backward Proppogation

In [26]:
layers = None
init_weight  = lambda shape: np.random.randn(*shape) * np.sqrt(2/shape[0])
init_bias = lambda shape: np.zeros(shape)

def init_params(input_size,hidden_size,output_size):
    parameters = {}
    parameters['Weights'] = []
    parameters['Biases'] = []
    for hidden in hidden_size:
        parameters['Weights'].append(init_weight((hidden, input_size)))
        parameters['Biases'].append(init_bias((hidden, 1)))
        input_size = hidden
    return parameters

def forward_prop(parameters,X,dropout_rate):
    layer_nodes = {}
    layer_nodes['Z'] = []
    layer_nodes['A'] = []
    index_ = 0
    X_ = X
    for weight, bias in zip(parameters['Weights'], parameters['Biases']):
        Z = np.dot(weight,X_) + bias
        A = leaky_relu(Z)
        A = apply_dropout(A, dropout_rate)
        if index_ == layers - 1:
            A = Z
        layer_nodes['Z'].append(Z)
        layer_nodes['A'].append(A)
        X_ = A
        index_ += 1    
    return layer_nodes

def compute_cost(output_layer, y):
    cost = (1/n) * np.sum(np.square(output_layer - y))
    return cost

def backward_prop(parameters,layer_nodes,X,y) :
    grad_params = {}
    grad_params['dWeights'] = []
    grad_params['dBiases'] = []
    dL = layer_nodes['A'][-1] - y
    k = layers - 1
    for node_Z,node_A in zip(layer_nodes['Z'][::-1],layer_nodes['A'][:-1][::-1]):
        if k == layers - 1:
            dZ = dL
        else :
            dZ = dA * grad_relu(node_Z)        
        dW = np.dot(dZ,node_A.T) / n
        db = np.sum(dZ,axis=1,keepdims=True) / n
        dA = np.dot(parameters['Weights'][k].T,dZ)
        grad_params['dWeights'].append(dW)
        grad_params['dBiases'].append(db)
        k -= 1
    dZ = dA * grad_relu(layer_nodes['Z'][0])
    dW = np.dot(dZ,X.T) / n
    db = np.sum(dZ,axis=1,keepdims=True) / n   
    grad_params['dWeights'].append(dW)
    grad_params['dBiases'].append(db)
    grad_params['dWeights'] = grad_params['dWeights'][::-1]
    grad_params['dBiases'] = grad_params['dBiases'][::-1]
    return grad_params

Activation Functions

In [27]:
def apply_dropout(A, dropout_rate):
    D = np.random.rand(*A.shape) > dropout_rate
    A = A * D
    A = A / (1 - dropout_rate)
    return A

def leaky_relu(x,alpha=0.01):
    return np.maximum(x*alpha,x)

def grad_relu(x,alpha=0.01):    
    grad = np.ones_like(x)
    grad[x < 0] = alpha
    return grad

def batch_norm(X):
    mu = np.mean(X,axis=1,keepdims=True)
    var = np.var(X,axis=1,keepdims=True)
    X_norm = (X - mu) / np.sqrt(var + 1e-5)
    return X_norm

def lr_decay(epoch, initial_lr, decay_rate):
    return initial_lr * np.exp(-decay_rate * epoch)

Adam Optimizer

In [28]:
def init_adam_params(parameters):
    adam_params = {
        'Weights_adam':
            {'m_t':[],'v_t':[]},
        'Biases_adam' :
            {'m_t':[],'v_t':[]}
    }
    for weight,bias in zip(parameters['Weights'],parameters['Biases']) :
        mat_W = np.zeros(weight.shape)
        mat_b = np.zeros(bias.shape)
        adam_params['Weights_adam']['m_t'].append(mat_W)
        adam_params['Weights_adam']['v_t'].append(mat_W) 
        adam_params['Biases_adam']['m_t'].append(mat_b)
        adam_params['Biases_adam']['v_t'].append(mat_b)
    return adam_params

def Adam_values(W,dW,m_t,v_t,t,lr,beta1,beta2,epsilon):
    m_t = beta1*m_t + (1-beta1)*dW
    v_t = beta2*v_t + (1-beta2)*(dW**2)
    m_cap = m_t/(1-beta1**t+epsilon)
    v_cap = v_t/(1-beta2**t+epsilon)
    W = W - (lr*m_cap)/((np.sqrt(v_cap))+epsilon)
    return W,m_t,v_t
    
def Adam_optimizer(parameters,grad_params,adam_params,lr,t): 
    beta1 = 0.9
    beta2 = 0.999
    epsilon = 1e-8
    for index in range(layers) :
        weight = parameters['Weights'][index]
        bias = parameters['Biases'][index]
        dW = grad_params['dWeights'][index]
        db = grad_params['dBiases'][index]
        m_t_W = adam_params['Weights_adam']['m_t'][index]
        v_t_W = adam_params['Weights_adam']['v_t'][index]
        m_t_b = adam_params['Biases_adam']['m_t'][index]
        v_t_b = adam_params['Biases_adam']['v_t'][index]
        weight,m_t_W,v_t_W = Adam_values(weight,dW,m_t_W,v_t_W,t,lr,beta1,beta2,epsilon)
        parameters['Weights'][index] = weight
        adam_params['Weights_adam']['m_t'][index] = m_t_W
        adam_params['Weights_adam']['v_t'][index] = v_t_W
        bias,m_t_b,v_t_b = Adam_values(bias,db,m_t_b,v_t_b,t,lr,beta1,beta2,epsilon)
        parameters['Biases'][index] = bias
        adam_params['Biases_adam']['m_t'][index] = m_t_b
        adam_params['Biases_adam']['v_t'][index] = v_t_b
    return parameters, adam_params   

In [29]:
input_size = m
hidden_size = [128, 64, 32, 2]
output_size = 2
layers = 4
batch_size = 64

Stocastic Gradient Descent 

In [30]:
def gradient_descent(X, y, iterations):
    parameters = init_params(input_size,hidden_size,output_size)
    adam_params = init_adam_params(parameters)
    for i in range(iterations):
        lr = lr_decay(i, initial_lr=0.000001, decay_rate=0.0001)
        permutation = np.random.permutation(n)
        X_shuffled = X[:, permutation]
        y_shuffled = y[:, permutation]
        for j in range(0, n, batch_size):
            X_batch = X_shuffled[:, j:j+batch_size]
            y_batch = y_shuffled[:, j:j+batch_size]
            layer_nodes = forward_prop(parameters, X_batch,dropout_rate=0.5)
            output_layer = layer_nodes['A'][-1]
            cost = compute_cost(output_layer, y_batch)
            for Z in layer_nodes['Z'] :
                Z = batch_norm(Z)
            grad_params= backward_prop(parameters,layer_nodes,X_batch, y_batch)
            max_grad_norm = 1.0
            for dW in grad_params['dWeights']:
                np.clip(dW, -max_grad_norm, max_grad_norm, out=dW)
            for db in grad_params['dBiases']:
                np.clip(db, -max_grad_norm, max_grad_norm, out=db)
            parameters,adam_params = Adam_optimizer(parameters,grad_params,adam_params,lr,i)    
        if i % 100 == 0:
            print("Gradient norms:", np.linalg.norm(grad_params['dWeights'][0]), np.linalg.norm(grad_params['dBiases'][0]), ...)
            print(f"Cost at iteration {i}",cost)           
    return parameters 

In [31]:
parameters = gradient_descent(X_train, y_train, 1000)

Gradient norms: 0.031165877903511836 0.0013496801554936382 Ellipsis
Cost at iteration 0 16.465128508776505
Gradient norms: 0.005902055451072678 0.0002891682712743081 Ellipsis
Cost at iteration 100 5.380289567323654
Gradient norms: 0.0010971269408025033 5.8827944157533214e-05 Ellipsis
Cost at iteration 200 1.0936762395295243
Gradient norms: 4.244198961236396e-06 2.505797703438888e-07 Ellipsis
Cost at iteration 300 0.0013781753720844762
Gradient norms: 7.11152753763953e-06 4.2207790962503373e-07 Ellipsis
Cost at iteration 400 0.0012153319383883632
Gradient norms: 6.936291967263826e-06 4.1509321355046834e-07 Ellipsis
Cost at iteration 500 0.0012095302399412163
Gradient norms: 3.1749174082585673e-06 1.955851928977455e-07 Ellipsis
Cost at iteration 600 0.0008349787024757671
Gradient norms: 2.5600922204793753e-06 1.5529455422926136e-07 Ellipsis
Cost at iteration 700 0.0008580386388621978
Gradient norms: 1.9209880212258782e-06 1.226689304195233e-07 Ellipsis
Cost at iteration 800 0.00102167963

In [32]:
layer_nodes = forward_prop(parameters, X_test,0)
output_layer = layer_nodes['A'][-1]
mse = compute_cost(output_layer, y_test)
print(mse)

0.0010077686519980942


Accuracy for Testing Data

In [33]:
accuracy = (1-np.sqrt(mse))*100
print(accuracy)

96.82546278648667
