In [45]:
import numpy as np
from sklearn.model_selection import train_test_split

In [46]:
data  = np.loadtxt("../datasets/data.csv",delimiter=',')

In [47]:
x = data[:,0]
y = data[:,1]
X_train,X_test,y_train,y_test = train_test_split(x,y)

### From scratch implementation

In [48]:
def fit(X_train,y_train):
    num  = (X_train*y_train).mean() - X_train.mean()*y_train.mean()
    deno = (X_train**2).mean() - X_train.mean()**2
    m    = num/deno
    c    = y_train.mean()-(m*X_train.mean())
    return m,c

In [49]:
def predict(x,m,c):
    return m*x+c

In [50]:
def score(y_true,y_pred):
    u = ((y_true-y_pred)**2).sum()
    v = ((y_true-(y_true.mean()))**2).sum()
    return 1 -(u/v)

In [51]:
def cost(x,y,m,c):
    return ((y-(m*x+c))**2).mean()

In [52]:
m,c = fit(X_train,y_train)
y_pred_training = predict(X_train,m,c)
print("Score on training data:",score(y_train,y_pred_training))

y_pred_testing = predict(X_test,m,c)
print("Score on testing data:",score(y_test,y_pred_testing))

print("M(slope):",m,"C(intercept):",c)

print("Cost Function on training data:",cost(X_train,y_train,m,c))

Score on training data: 0.563562295309294
Score on testing data: 0.6478679200661972
M(slope): 1.3364076870104948 C(intercept): 8.076711405046566
Cost Function on training data: 112.42946928230148


### Comparing the result from the linear regression inbuilt classifier

In [53]:
from sklearn.linear_model import LinearRegression

In [54]:
linearModel = LinearRegression()
linearModel.fit(X_train.reshape(-1,1),y_train)

LinearRegression()

In [55]:
y_predict = linearModel.predict(X_test.reshape(-1,1))

In [57]:
print("Score on testing data:",score(y_test,y_predict))
print("M(Slope):",linearModel.coef_)
print("Intercept:",linearModel.intercept_)

Score on testing data: 0.6478679200661974
M(Slope): [1.33640769]
Intercept: 8.076711405046865
