# Implementing functions for Linear Regression Algorithms from Scratch for 1-d input

In [1]:
import numpy as np

In [2]:
data = np.loadtxt('Datasets/data.csv',delimiter=",")

In [8]:
x = data[:,0]
y = data[:,1]

In [9]:
x.shape

(100,)

In [13]:
from sklearn import model_selection
x_train,x_test,y_train,y_test = model_selection.train_test_split(x,y,test_size = 0.3)

In [14]:
x_train.shape # 70% train data 

(70,)

In [15]:
def fit(x_train,y_train):
    numerator = (x_train * y_train).mean() - x_train.mean() * y_train.mean()
    denominator = (x_train**2).mean() - x_train.mean()**2
    m = numerator / denominator
    c = y_train.mean() - m * (x_train.mean())
    return m,c

In [16]:
def predict(x, m, c):
    return m * x + c

In [17]:
# we will need coefficient of determination, which is [1-(u/v)]
def score(y_truth,y_pred):
    u = ((y_truth-y_pred)**2).sum()
    v = ((y_truth-y_truth.mean())**2).sum()
    return 1-u/v

In [20]:
def cost(x,y,m,c):
    return ((y - m * x - c)**2).mean() # can take sum also here

In [21]:
m, c = fit(x_train,y_train)
# test data
y_test_pred = predict(x_test,m,c)
print("Test Score: ", score(y_test,y_test_pred))
# train data
y_train_pred = predict(x_train,m,c)
print("Train Score: ", score(y_train,y_train_pred))
print("M, C", m, c)
print("Cost on training data is ",cost(x_train,y_train,m,c))

Test Score:  0.648634820833
Train Score:  0.573241562385
M, C 1.3131276419 8.4038802997
Cost on training data is  111.388902668
