In [22]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
import pandas as pd

In [56]:
bias=100
X, y, coef = make_regression(100,coef=True,bias=bias,n_features=1,random_state=42,noise=15)

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [60]:
#plt.scatter(X,y)
y_train.shape

(67,)

In [48]:
class LinearRegression:
    
    def __init__(self, lr=0.01, iters=1000):
        self.lr = lr
        self.iters = iters
    
    def fit(self, X, y):
        self.X = X
        self.y = y
        
        self.coef = 0.0
        self.bias = 0.0
        
        errors = []
        
        for i in range(self.iters):
            self.gradient_dec()
            errors.append(self.error())
            
        return errors
    
    def gradient_dec(self):
        d_coef, d_bias = self.gradient()
        
        self.coef -= d_coef * self.lr
        self.bias -= d_bias * self.lr
    
    def gradient(self):
        yh = LinearRegression.hypothesis(self.bias, self.coef, self.X)
        diff = yh - self.y.reshape(-1, 1)
        d_coef = np.sum(diff * self.X) / len(self.X)
        d_bias = np.sum(diff) / len(self.X)
        
        return d_coef, d_bias
        
    def error(self):
        yh = LinearRegression.hypothesis(self.bias, self.coef, self.X)
        diff = yh - self.y.reshape(-1, 1)
        return np.sum(diff ** 2)
        
    @classmethod
    def hypothesis(cls, bias, coef, X):
        return X * coef + bias
        
    def predict(self, X):
        return LinearRegression.hypothesis(self.bias, self.coef, X).flatten()
        

In [50]:
X_train = pd.read_csv("DataSets/hardwork/Linear_X_Train.csv")
X_train.shape

(3750, 1)

In [51]:
y_train = pd.read_csv("DataSets/hardwork/Linear_Y_Train.csv")

In [52]:
X_test =  pd.read_csv("DataSets/hardwork/Test/Linear_X_Test.csv")
X_test.shape

(1250, 1)

In [53]:
sample =  pd.read_csv("DataSets/hardwork/Test/sample_submission_linear.csv")
sample.head()

Unnamed: 0,y
0,0
1,0
2,0
3,0
4,0


In [54]:
model = Linear(lr=0.01,iters=1000)

In [61]:
err = model.fit(X_train,y_train)

In [62]:
#plt.plot(err)

In [63]:
yh = model.predict(X_train)

In [64]:
y_test=model.score(X_test)

In [65]:
y_out = y_test.flatten()

In [66]:
y_out

array([ 39.41347264, 165.6859991 , 135.74468465,  81.53661843,
        88.64452279,  86.05522801,  78.22367468,  19.84520487,
       136.16663174, 141.08858239, 145.57591724, 115.25126327,
        44.24383937,  75.34232902,  47.3870658 ,  77.61867805,
       143.08049075, 110.069544  , 132.41204719, 116.08760124,
       114.64402577,  53.60432331,  72.15282436, 167.71709329,
       133.71560277, 103.31053442, 115.44399816,  35.06821355,
       144.33992968,  32.64841302, 168.39238948,  70.15119354,
        93.97720643])

In [67]:
df = pd.DataFrame.from_dict({"y":y_out})

In [68]:
df.to_csv("hardwork.csv", index=False)