In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [2]:
df=pd.read_csv("AdmissionDataset/data.csv")

In [3]:
X = df.drop(['Chance of Admit ','Serial No.'],axis=1)
Y = df['Chance of Admit ']
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.2)
# Y_train

In [4]:
lr = LinearRegression()
lr.fit(X_train, Y_train) 
y_pred = lr.predict(X_test)
print(r2_score(Y_test,y_pred))

0.8045886472085619


In [5]:
X_train = (X_train - X_train.mean())/X_train.std()
# Y_train = (Y_train - Y_train.mean())/Y_train.std()
X_test = (X_test - X_test.mean())/X_test.std()
# Y_test = (Y_test - Y_test.mean())/Y_test.std()
# X_train

In [6]:
X_train = pd.concat([X_train,Y_train],axis=1)
ones = np.ones([X_train.shape[0],1])
Y_train = X_train.iloc[:,7:8].values
X_train = X_train.iloc[:,0:7]
X_train = np.concatenate((ones,X_train),axis=1)

In [7]:
learning_rate = 0.01
iterations = 1000
# theta = np.zeros(8) # 7 is the number of features
theta = np.zeros([1,8])
# theta.shape

In [8]:
def gradient_descent_mean_absolute(X_train,Y_train,theta,learning_rate,iterations):
    
    for i in range(iterations):
        p = X_train @ theta.T - Y_train
        theta = theta - (learning_rate/len(X_train)) * np.sum((X_train * (p/np.absolute(p))), axis=0)
    
    return theta

In [9]:
g = gradient_descent_mean_absolute(X_train,Y_train,theta,learning_rate,iterations)
g = g[0]

In [10]:
y_pred = []
for index,rows in X_test.iterrows():
    y = 0
    rows = list(rows)
    for i in range(len(rows)):
        y = y + rows[i]*g[i+1]
    y = y + g[0]
    y_pred.append(y)


In [11]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [12]:
print("Mean Absolute Error")
print(r2_score(Y_test,y_pred))
print(mean_absolute_error(Y_test,y_pred))
print(mean_squared_error(Y_test, y_pred))
print(mean_absolute_percentage_error(Y_test, y_pred))

Mean Absolute Error
0.7734825224998861
0.04677773137602631
0.004357346127433548
7.882525484095408


In [13]:
mse = np.sum(np.power((Y_test-y_pred),2))/len(Y_test)
print(mse)
mae = np.sum(abs(Y_test-y_pred))/len(Y_test)
print(mae)

0.004357346127433548
0.04677773137602631


## Difference between MSE and MAE
* Both mean squared error (MSE) and mean absolute error (MAE) are used in predictive modeling. Because of the square, large errors have relatively greater influence on MSE than do the smaller error. Therefore, MAE is more robust to outliers since it does not make use of square.

## Difference between MAE and MAPE
* MAPE is used to normalize – or weight – errors by the inverse of the actual observation value, This could be useful if we are interested in minimizing the relative – rather than absolute - error.

In [14]:
def gradient_decent_mean_square(X_train,Y_train,theta,learning_rate,iterations):
    
    for i in range(iterations):
        theta = theta - (learning_rate/len(X_train)) * np.sum(X_train * (X_train @ theta.T - Y_train), axis=0)
    
    return theta

In [15]:
theta = np.zeros([1,8])
g = gradient_decent_mean_square(X_train,Y_train,theta,learning_rate,iterations)
g = g[0]

In [16]:
y_pred = []
for index,rows in X_test.iterrows():
    y = 0
    rows = list(rows)
    for i in range(len(rows)):
        y = y + rows[i]*g[i+1]
    y = y + g[0]
    y_pred.append(y)

# Vectorized form
# ones = np.ones([X_test.shape[0],1])
# X_test = np.concatenate((X_test,ones),axis=1)
# y_pred = X_test @ g

In [17]:
print("Mean Square Error")
print(r2_score(Y_test,y_pred))
print(mean_absolute_error(Y_test,y_pred))
print(mean_squared_error(Y_test, y_pred))
print(mean_absolute_percentage_error(Y_test, y_pred))

Mean Square Error
0.7845453149965098
0.046944040402229756
0.0041445395194147925
7.814233276803395
