In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
folder = "/content/gdrive/My Drive/RAship/ML/Dataset"

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

from math import sqrt

In [0]:
pd_data_train = pd.read_csv('{0}/training.csv'.format(folder))
pd_data_test = pd.read_csv('{0}/testing.csv'.format(folder))

In [5]:
pd_data_train.head()

Unnamed: 0,K,Psi,Th,SV,Y
0,253.15,17.7,0.026827,394.108539,75.5
1,253.15,17.7,0.02697,392.974592,74.5
2,253.15,17.7,0.026912,390.922824,73.2
3,253.15,17.7,0.026815,389.732156,72.4
4,253.15,17.7,0.026731,396.447898,77.1


In [0]:
np_data_train = np.array(pd_data_train)
np_data_test = np.array(pd_data_test)

In [0]:
X_train = np_data_train[:,:4]
Y_train = np_data_train[:,4:]

X_test = np_data_test[:,:4]
Y_test = np_data_test[:,4:]

# Linear Regression

In [8]:
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

# Prediction and rmse

In [0]:
Y_pred = linear_regressor.predict(X_test)

In [10]:
#Root mean square for Linear regression
rme = sqrt(mean_squared_error(Y_test, Y_pred))
print(rme)

1.0639925446241791


## Coefficients and intercept

In [11]:
print("\n 1 intercept and {0} number of coefficients in model".format(linear_regressor.coef_.shape[1]))


 1 intercept and 4 number of coefficients in model


In [12]:
print("Coefficients are -")
print(linear_regressor.coef_)

print("Intercept is -")
print(linear_regressor.intercept_)

Coefficients are -
[[-3.59066139e-01 -2.24421320e-02 -5.42573580e+02  7.03657160e-01]]
Intercept is -
[-97.54654457]


## Initial weights for gradient descent

In [13]:
weight_K = linear_regressor.coef_[0][0]
weight_Psi = linear_regressor.coef_[0][1]
weight_Th = linear_regressor.coef_[0][2]
weight_SV = linear_regressor.coef_[0][3]
Intercept = linear_regressor.intercept_[0]

print(weight_K,  weight_Psi, weight_Th, weight_SV, Intercept)

-0.35906613939579773 -0.022442132003658077 -542.5735802058828 0.7036571600288333 -97.54654456625236


# Gradient descent

In [0]:
data_size = len(X_train)

In [0]:
import math
def loss_function(predicted, actual):
  #return math.sqrt((predicted - actual) ** 2)
  return ((predicted - actual) ** 2) / data_size

In [0]:
def predict_using_weights(X):
  return X[0] * weight_K + X[1] * weight_Psi + X[2] * weight_Th + X[3] * weight_SV + Intercept

In [0]:
def getGradients(N, y_hat, y, K, Psi, Th, SV, Int):
  grad_K   = 1/N * ( 2*K   * (y_hat - y) )
  grad_Psi = 1/N * ( 2*Psi * (y_hat - y) )
  grad_Th  = 1/N * ( 2*Th  * (y_hat - y) )
  grad_SV  = 1/N * ( 2*SV  * (y_hat - y) )
  grad_Int = 1/N * ( 2     * (y_hat - y) )
  
  return grad_K, grad_Psi, grad_Th, grad_SV, Int

In [0]:
alpha = 0.1
num_iterations = 1000


# Run through test dataset and keep on learning weights
for i in range(X_test.shape[0]):
  pred_value = predict_using_weights(X_test[i])
  actual_value = Y_test[i][0]
  
  error = loss_function(pred_value, actual_value)
  
  #for z in range(num_iterations):
  K_grad, Psi_grad, Th_grad, SV_grad, Intercept_grad = getGradients(data_size, pred_value, actual_value, weight_K, weight_Psi, weight_Th, weight_SV, Intercept)

  weight_K    = weight_K   - alpha * K_grad
  weight_Psi  = weight_Psi - alpha * Psi_grad
  weight_Th   = weight_Th  - alpha * Th_grad
  weight_SV   = weight_SV  - alpha * SV_grad
  Intercept   = weight_K   - alpha * Intercept_grad
    
  

## Final weights after Gradient descent through entire dataset

In [19]:
print(weight_K,  weight_Psi, weight_Th, weight_SV, Intercept)

-0.15813566246367974 -0.009883698355593863 -238.95383921615567 0.30989636431803513 -0.14375972145047264


## Compare performance of original Linear model and New weights on Training dataset

In [0]:
Y_pred = linear_regressor.predict(X_test)

In [21]:
#Root mean square for Linear regression
rme = sqrt(mean_squared_error(Y_test, Y_pred))
print(rme)

1.0639925446241791


In [0]:
Y_new_pred = []
for n in range(len(X_test)):
  Y_new_pred.append( predict_using_weights(X_test[n]) )

In [23]:
#Root mean square for online learning prediction
rme = sqrt(mean_squared_error(Y_test, Y_new_pred))
print(rme)

1.7464790345243426
