In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [42]:
# Retrive dataset
ds=pd.read_csv('nba_logreg.csv')

In [44]:
# Seperate features and output
X=ds.iloc[:,0:18]
Y=ds.iloc[:,-1]


Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X14,X15,X16,X17,X18,X19,X20,X21,X22,X23
0,20000,2,2,2,53,4,4,3,3,2,...,18547,19118,18679,19812,0,1300,1170,0,1600,0
1,100000,2,1,2,28,0,0,0,2,0,...,14479,10775,10998,12655,1300,3005,0,700,2000,1500
2,200000,2,2,1,35,0,0,0,0,0,...,74770,75826,77741,79597,4000,3010,2600,3000,3000,3000
3,50000,2,1,1,43,0,0,0,0,2,...,29975,30559,29433,30045,2449,1778,2382,0,1251,1051
4,100000,2,6,1,29,0,0,0,0,0,...,58083,88344,49443,23159,5712,3503,59000,1600,1000,1000


In [45]:
# Feature scaling
X=X.to_numpy()
ma=np.amax(X,axis=0)
X=np.divide(X,ma)
X=pd.DataFrame(X)

In [46]:
# Distribute data among training set and test set
X_train, X_test, Y_train, Y_test = train_test_split (X, Y, test_size=0.4)
# Convert sets from datafram to numpy vectors
X_train=X_train.to_numpy().T
X_test=X_test.to_numpy().T
Y_train=Y_train.to_numpy()
Y_test=Y_test.to_numpy()
# convert 1D vector into 2D vector
Y_test=Y_test.reshape((1,X_test.shape[1]))
Y_train=Y_train.reshape((1,X_train.shape[1]))

In [47]:
# Initialize parameters
def init_params(dims):
    parameters={}
    for i in range(1,len(dims)):
        parameters['W'+str(i)]=np.random.rand(dims[i-1],dims[i])*0.01
        parameters['b'+str(i)]=np.random.rand(dims[i],1)*0.01
    return parameters

In [126]:
# forward propogation
def for_prop(parameters,dims,X):
    A=X
    caches=[]
    for i in range(1,len(dims)-1):
        W=parameters['W'+str(i)]
        b=parameters['b'+str(i)]
        Z=np.dot(W.T,A)+b
        linear_cache=(A,W,b)
        A=np.maximum(0,Z)
        activation_cache=(Z)
        cache=(linear_cache,activation_cache)
        caches.append(cache)
    # For last classifier layer
    W=parameters['W'+str(len(dims)-1)]
    b=parameters['b'+str(len(dims)-1)]
    Z=np.dot(W.T,A)+b
    linear_cache=(A,W,b)
    A=1/(1+np.exp(-Z))
    activation_cache=(Z)
    cache=(linear_cache,activation_cache)
    caches.append(cache)
    return A,caches

In [127]:
# Cost function
def cost(A,Y):
    m=Y.shape[1]
    J= np.sum((Y_train * np.log(A)) + ((1 - Y_train) * (np.log(1 - A))))/(-m)
    return J

In [128]:
# Derivatives of Relu Activation WRT cost function
def relu_deri(dA_prev,drZ):
    drZ[drZ<=0] = 0
    drZ[drZ>0] = 1
    return np.multiply(dA_prev,drZ)
    

In [129]:
# Backward propogation
def back_prop(A,caches,Y,dims):
    grades={}
    m=Y.shape[1]
    A_prev=A
    dA_prev=None
    for i in reversed(range(len(dims)-1)):
        linear_cache,activation_cache=caches[i]
        if i == len(dims)-2:
            dZ=A_prev-Y
            grades["dW"+str(i+1)]=(1/m) * np.dot(dZ,linear_cache[0].T)
            grades["db"+str(i+1)]=np.squeeze(np.sum(dZ, axis=1, keepdims=True)) / m
            grades["db"+str(i+1)]=grades["db"+str(i+1)].reshape((dZ.shape[0],1))
            dA_prev=np.dot(linear_cache[1],dZ)
        else:
            dZ=relu_deri(dA_prev,activation_cache)
            grades["dW"+str(i+1)]=(1/m) * np.dot(dZ,linear_cache[0].T)
            grades["db"+str(i+1)]=np.squeeze(np.sum(dZ, axis=1, keepdims=True)) / m
            grades["db"+str(i+1)]=grades["db"+str(i+1)].reshape((dZ.shape[0],1))
            dA_prev=np.dot(linear_cache[1],dZ)
    return grades

In [130]:
def update_param(dims,parameters,grades,learning_rate):
    for i in range(1,len(dims)):
        parameters["W"+str(i)]=parameters["W"+str(i)]-(learning_rate*grades["dW"+str(i)].T)
        parameters["b"+str(i)]=parameters["b"+str(i)]-(learning_rate*grades["db"+str(i)])
    return parameters

In [137]:
m=X_train.shape[1] # Nos of expamle
features=X_train.shape[0] #Features

# initialize parameter
dims=[features,1]
parameters=init_params(dims)

# Hyperparameters
iteration=1000
learning_rate=0.005

In [138]:
# Training Begins

for i in range(iteration):
    # Implement forward propogation
    A,caches=for_prop(parameters,dims,X_train)

    # Implement Cost function
    J=cost(A,Y_train)

    # Implement Backward propogation
    grades=back_prop(A,caches,Y_train,dims)

    # Update Parameters
    parameters = update_param(dims,parameters,grades,learning_rate)
    
    if i%100==0:
        print(J)
        

0.6977636224907671
0.6252027197720322
0.5836550628459994
0.5592452462082868
0.5444620748661323
0.5352260948526977
0.5292718694445211
0.5253064962655433
0.5225721262034176
0.5206137309843136
