In [1]:
import numpy as np
import pandas as pd

In [2]:
batch_size = 1000
learning_rate = 0.000001
num_of_epoch = 10000
test_training_ratio = 0.2
dataset_path = './dataset/dataset_csv.csv'

In [3]:
df = pd.read_csv(dataset_path) #read the dataset
df.head()

Unnamed: 0,AT,V,AP,RH,PE
0,8.34,40.77,1010.84,90.01,480.48
1,23.64,58.49,1011.4,74.2,445.75
2,29.74,56.9,1007.15,41.91,438.76
3,19.07,49.69,1007.22,76.79,453.09
4,11.8,40.66,1017.13,97.2,464.43


In [4]:
total_X = df.iloc[:, :-1].values #total_X is a matrix the size of 9568 X 4
total_y = df.iloc[:, -1].values  #total_Y is a vector the size of 9568

In [5]:
b = np.ones((total_X.shape[0],total_X.shape[1]+1)) # b is a 9568 X 5 matrix filled with ones.
b[:, 1:] = total_X #b is the total_X matrix except with first column filled with ones
total_X = b #reassign
print(total_X[:5])

[[1.00000e+00 8.34000e+00 4.07700e+01 1.01084e+03 9.00100e+01]
 [1.00000e+00 2.36400e+01 5.84900e+01 1.01140e+03 7.42000e+01]
 [1.00000e+00 2.97400e+01 5.69000e+01 1.00715e+03 4.19100e+01]
 [1.00000e+00 1.90700e+01 4.96900e+01 1.00722e+03 7.67900e+01]
 [1.00000e+00 1.18000e+01 4.06600e+01 1.01713e+03 9.72000e+01]]


In [6]:
#in order for us to verify the accuracy later, we will reserve about 20% of the whole dataset.
num_of_testing_example    = int(test_training_ratio*total_X.shape[0]) # (20/100) * 9568 = 1913.6. After flooring, 1913
num_of_training_example   = total_X.shape[0] - num_of_testing_example # 9568 - 1913 = 7655
X                         = total_X[:num_of_training_example] #matrix of size 7655 X 4. input X (according to the pdf note)
y                         = total_y[:num_of_training_example] #vector of size 7655. input y (according to the pdf note)
X_test                    = total_X[num_of_training_example:] #matrix of size 1913 X 4
y_test                    = total_y[num_of_training_example:] #vector of size 1913

In [7]:
theta = np.random.rand((X.shape[1])) #randomly initialize theta as a vector of size 5

In [8]:
def find_y_hat(data_point,theta):
    '''
    Parameters :
    data_point : Batches of rows in the training set matrix
    theta: theta vector

    Process : matrix (data points) multiplies vector (theta) 

    Output : prediction of the model, y_hat or h(X)
    '''
    return np.dot(data_point, theta) 

In [9]:
def testing():
    
    diff = X_test.dot(theta) - y_test #calculate (h(x) - y)
    
    loss = (1/(2*X_test.shape[0]))*(diff.dot(diff)) #calculate the loss using the defined equation
    
    print("Testing loss : ", loss)

testing() #run the testing before the training begins

Testing loss :  55120.60536675611


In [10]:
for epoch in range(num_of_epoch):
    
    diff = X.dot(theta) - y #calculate (h(x) - y)
    
    loss = (1/(2*X.shape[0]))*(diff.dot(diff))  #calculate the loss using the defined equation
    
    for index_first in range(0, X.shape[0], batch_size): #loop through the dataset with the given batch size
        
        index_last = index_first + batch_size #index_first - index_last = batch size
        index_last = None if index_last > X.shape[0] else index_last #if index_last > total no. of data points, it will be set to None
        
        y_hat = find_y_hat(X[index_first:index_last], theta) #predicted output
        
        m = y_hat.shape[0] # number of data points i.e. batch size
        
        y_hat_diff = y_hat - y[index_first:index_last] #calculate (h(x) - y)
        
        
        for j in range(theta.shape[0]): #gradient descent
        
            x_j = X[index_first:index_last, j] #get the x_j vector for that specified batch size
        
            par_der = (1/m)*(y_hat_diff.dot(x_j)) #calculate the partial derivative
            
            theta[j] = theta[j] - learning_rate*par_der
    
    if epoch % 1000 == 0: #print loss for every 500 epoch starting from 0th epoch
        print("Epoch : ", epoch)
        print("Loss : ", loss)
    

Epoch :  0
Loss :  54839.88671357097
Epoch :  1000
Loss :  49.94277561537955
Epoch :  2000
Loss :  31.298206821681976
Epoch :  3000
Loss :  27.32794839360615
Epoch :  4000
Loss :  25.211742373666564
Epoch :  5000
Loss :  23.52886720134371
Epoch :  6000
Loss :  22.089506234577374
Epoch :  7000
Loss :  20.846454078385648
Epoch :  8000
Loss :  19.771758230337596
Epoch :  9000
Loss :  18.842546445773362


In [11]:
print(theta)

[ 0.30281359 -0.71658911 -0.72144305  0.49756628  0.04508065]


In [12]:
testing() #as expected gradient descent optimization works slightly worse than the normal equation method.

Testing loss :  17.878823308549265


In [15]:
X_test.dot(theta)- y_test

array([-1.89733071, -1.17424785, -2.08685487, ..., -0.20501947,
        4.61016901,  1.33292009])