# Linear Regression 

## Importing Libraries

In [6]:
import pandas as pd
import random
from math import sqrt

In [7]:
df = pd.read_csv('linear_regression.csv')

split_ratio = 0.8 #test-train

number_of_records = list(df.shape)[0]

print(df)

            X          Y
0   32.502345  31.707006
1   53.426804  68.777596
2   61.530358  62.562382
3   47.475640  71.546632
4   59.813208  87.230925
..        ...        ...
95  50.030174  81.536991
96  49.239765  72.111832
97  50.039576  85.232007
98  48.149859  66.224958
99  25.128485  53.454394

[100 rows x 2 columns]


## Train Test Split

In [14]:
def train_test_split(dataset, split):
    train_set = []
    test_set = []
    
    list_of_records = dataset.to_numpy().tolist()

    train_set_length = round(split*number_of_records,0)
    test_set_length = number_of_records-train_set_length

    for row in list_of_records:
        test_or_train = round(random.uniform(0,1),1)

        if len(train_set) == train_set_length:
            test_set.append(row)
        elif len(test_set) == test_set_length:
            train_set.append(row)
        else:    
            if test_or_train < split:
                train_set.append(row)
            else:
                test_set.append(row)
    return train_set,test_set

## Utility Functions

In [54]:
def mean(values):
    return sum(values) / float(len(values))

def variance(values, mean):
    return sum([(x-mean)**2 for x in values])

def covariance(x, mean_x, y, mean_y):
    covar = 0.0
    for i in range(len(x)):
        covar += (x[i] - mean_x) * (y[i] - mean_y)
    return covar

def coefficients(dataset):
    x = list(map(lambda row:row[0],dataset))
    y = list(map(lambda row:row[1],dataset))
    x_mean, y_mean = mean(x), mean(y)
    b1 = covariance(x, x_mean, y, y_mean) / variance(x, x_mean)
    b0 = y_mean - b1 * x_mean
    return b0, b1

In [58]:
def simple_linear_regression(train, test):
    predictions = []
    b0, b1 = coefficients(train)
    print(f'Intercept: {b0}\nSlope:{b1}\n')
    for row in test:
        y_pred = b0 + b1 * row[0]
        predictions.append(y_pred)
    return predictions

In [59]:
# Calculate root mean squared error
def mse_metric(actual, predicted):
    sum_error = 0.0
    for i in range(len(actual)):
        prediction_error = predicted[i] - actual[i]
        sum_error += (prediction_error ** 2)
    mean_error = sum_error / float(len(actual))
    return mean_error
 
# Evaluate an algorithm using a train/test split
def evaluate_algorithm(dataset, algorithm, split):
    train, test = train_test_split(dataset, split)
    test_input = [row[:-1] for row in test]
    test_output = algorithm(train, test_input)
    test_target = [row[-1] for row in test]
    mse = mse_metric(actual=test_target, predicted=test_output)
    return mse

In [1]:
mse = evaluate_algorithm(dataset=df, algorithm=simple_linear_regression, split=split_ratio)

print(f'MSE: {mse}')

NameError: name 'evaluate_algorithm' is not defined