In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
import sys

In [2]:
df=pd.read_csv("AdmissionDataset/data.csv")

In [3]:
X = df.drop(['Chance of Admit ','Serial No.'],axis=1)
Y = df['Chance of Admit ']
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.2)
# Y_train

In [4]:
lr = LinearRegression()
lr.fit(X_train, Y_train) 
y_pred = lr.predict(X_test)
print(r2_score(Y_test,y_pred))

0.8294259977879623


In [5]:
X_train = (X_train - X_train.mean())/X_train.std()
# Y_train = (Y_train - Y_train.mean())/Y_train.std()
X_test = (X_test - X_test.mean())/X_test.std()
# Y_test = (Y_test - Y_test.mean())/Y_test.std()
# X_train

In [6]:
X_train = pd.concat([X_train,Y_train],axis=1)
ones = np.ones([X_train.shape[0],1])
Y_train = X_train.iloc[:,7:8].values
X_train = X_train.iloc[:,0:7]
X_train = np.concatenate((ones,X_train),axis=1)

In [7]:
learning_rate = 0.01
iterations = 1000
# theta = np.zeros(8) # 7 is the number of features
theta = np.zeros([1,8])
# theta.shape

In [8]:
def gradient_decent(X_train,Y_train,theta,learning_rate,iterations):
    
    for i in range(iterations):
        theta = theta - (learning_rate/len(X_train)) * np.sum(X_train * (X_train @ theta.T - Y_train), axis=0)
    
    return theta

In [9]:
g = gradient_decent(X_train,Y_train,theta,learning_rate,iterations)
g = g[0]

In [10]:
y_pred = []
for index,rows in X_test.iterrows():
    y = 0
    rows = list(rows)
    for i in range(len(rows)):
        y = y + rows[i]*g[i+1]
    y = y + g[0]
    y_pred.append(y)

# Vectorized form
# ones = np.ones([X_test.shape[0],1])
# X_test = np.concatenate((X_test,ones),axis=1)
# y_pred = X_test @ g

In [11]:
print(y_pred)

[0.580209372380562, 0.8174663082331977, 0.6380528424135073, 0.6437258208020699, 0.627484968836145, 0.5026333520753599, 0.7314218983249491, 0.6726924592227783, 0.8271644163075924, 0.9301287210535112, 0.4749799043567742, 0.6392495624113357, 0.7780647514632768, 0.60663522252327, 0.7827132475730468, 0.8112060892993826, 0.91321815824792, 0.5197485811987709, 0.8069928303069489, 0.9011051991720072, 0.546822469607684, 0.9383653067582796, 0.8724987866220567, 0.6147448806777315, 0.7776405252909252, 0.9313599405044819, 0.6046134506113371, 0.7476585445408777, 0.8865767206347845, 0.7196498737267768, 0.6010253362106257, 0.5548350639681567, 0.6432698912073904, 0.5950304033452021, 0.6361964230628612, 0.49564158653771473, 0.5565498593096841, 0.6255234989086026, 0.6199440555092041, 0.6598104657607714, 0.6275869119201886, 0.5994555038983269, 0.7895496346131367, 0.5787372340071049, 0.7799072960308695, 0.9721014639654368, 0.5780112838502347, 0.8081922034387534, 0.8414863212417345, 0.8368327677534255, 0.650

In [12]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [13]:
print(r2_score(Y_test,y_pred))
print(mean_absolute_error(Y_test,y_pred))
print(mean_squared_error(Y_test, y_pred))
print(mean_absolute_percentage_error(Y_test, y_pred))

0.7843658853502914
0.04581066521162901
0.0035306731965439684
6.8091936442692225


In [14]:
try:
    testfilename = "admission-test.csv"
    #     testfilename = "data.csv"
    testing = pd.read_csv(testfilename)
    testing = testing.drop(['Serial No.','Chance of Admit '],axis=1)
    testing = (testing - testing.mean())/testing.std()
    y_pred = []
    for index,rows in testing.iterrows():
        y = 0
        rows = list(rows)
        for i in range(len(rows)):
            y = y + rows[i]*g[i+1]
        y = y + g[0]
        y_pred.append(y)
    print(y_pred)
except:
    pass

[0.5152546875262412, 0.7887492615023002, 0.7199257419300292, 0.46054835686781426, 0.8083913399858191, 0.7317471527645103, 0.7796926522960748, 0.7770177046655201, 0.6839571442714988, 0.9361081599798966, 0.6732399637046894, 0.740592642053913, 0.9379435214209059, 0.6973701235411706, 0.9024150123235536, 0.6920352338103427, 0.5185849800265809, 0.6010416040412933, 0.7011590684164157, 0.795147118503308, 0.6513739740111106, 0.9355679031699045, 0.8669437765500304, 0.6558301508054405, 0.7208357701056805, 0.6165968815985754, 0.747746681980834, 0.8454351382483624, 0.9450228991408741, 0.5206066115662595, 0.7167162980911914, 0.6092013227208829, 0.5975155568010483, 0.8243627272369759, 0.8862071856223553, 0.8382641294224873, 0.6024740960791318, 0.5352136071230873, 0.6511555418397684, 0.8343280077746789, 0.7278950270438125, 0.5821845447403249, 0.7263850353130148, 0.8895058187977146, 0.773380912397265, 0.5899626062588005, 0.5623016428513492, 0.4727786130303726, 0.6892203599436881, 0.715576034801677]
