# Gradient Descent

## Importing Libraries

In [4]:
import pandas as pd
import random

In [5]:
df = pd.read_csv('linear_regression.csv')

split_ratio = 0.8 #test-train

print(df)

            X          Y
0   32.502345  31.707006
1   53.426804  68.777596
2   61.530358  62.562382
3   47.475640  71.546632
4   59.813208  87.230925
..        ...        ...
95  50.030174  81.536991
96  49.239765  72.111832
97  50.039576  85.232007
98  48.149859  66.224958
99  25.128485  53.454394

[100 rows x 2 columns]


In [6]:
def train_test_split(dataset, split):
    train_set = []
    test_set = []

    list_of_records = dataset.to_numpy().tolist()
    
    number_of_records = list(dataset.shape)[0]
    
    train_set_length = round(split*number_of_records,0)
    test_set_length = number_of_records-train_set_length

    for row in list_of_records:
        test_or_train = round(random.uniform(0,1),1)

        if len(train_set) == train_set_length:
            test_set.append(row)
        elif len(test_set) == test_set_length:
            train_set.append(row)
        else:    
            if test_or_train < split:
                train_set.append(row)
            else:
                test_set.append(row)
    return train_set,test_set

In [59]:
def gradient_descent(train_set):
    
    x = [row[0] for row in train_set]
    y = [row[1] for row in train_set]
    
    # Building the model
    m = 0
    c = 0

    learning_rate = 0.0001  # The learning Rate
    epochs = 1000  # The number of iterations to perform gradient descent

    n = float(len(x)) # Number of elements in X

    
    # Performing Gradient Descent 
    for i in range(epochs): 
        y_pred = list(map(lambda j:m*j+c,x))
        
        y_diff = [y[i]-y_pred[i] for i in range(len(y))]
        
        x_times_y_diff = [x[i]*y_diff[i] for i in range(len(y))]
        
        partial_derivative_m = (-2/n) * sum(x_times_y_diff)  # Derivative wrt m
        
        partial_derivative_c = (-2/n) * sum(y_diff)  # Derivative wrt c
        
        m -= learning_rate * partial_derivative_m  # Update m
        
        c -= learning_rate * partial_derivative_c  # Update c

    return m,c

In [60]:
def mse_metric(actual, predicted):
    sum_error = 0.0
    for i in range(len(actual)):
        prediction_error = predicted[i] - actual[i]
        sum_error += (prediction_error ** 2)
    mean_error = sum_error / float(len(actual))
    return mean_error

In [61]:
def test_model(test_set,slope,intercept):
    
    test_input = list(map(lambda row:row[0],test))
    
    test_target = [row[1] for row in test]

    test_output = list(map(lambda x: slope*x+intercept,test_input))

    mse = mse_metric(actual=test_target, predicted=test_output)

    return mse

In [62]:
def evaluation_algorithm(dataset,split):
    train,test = train_test_split(dataset,split)
    m,c = gradient_descent(train)
    mse = test_model(test_set=test,slope=m,intercept=c)
    print(f'Slope is {m}\nIntercept is {c}\nMean Square Error is {mse}')

In [63]:
evaluation_algorithm(dataset=df,split=split_ratio)

Slope is 1.4882505651344307
Intercept is 0.13550858633013768
Mean Square Error is 135.60886354395467
