In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

# Load Diabetes dataset from skleran

In [3]:
X,y = load_diabetes(return_X_y=True)

In [6]:
X[:2,:]

array([[ 0.03807591,  0.05068012,  0.06169621,  0.02187235, -0.0442235 ,
        -0.03482076, -0.04340085, -0.00259226,  0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872,
        -0.01916334,  0.07441156, -0.03949338, -0.06832974, -0.09220405]])

In [10]:
y[:10]

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310.])

In [11]:
print(X.shape,"and ",y.shape)

(442, 10) and  (442,)


# Using Sklearn's Linear Regression

In [18]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)
X_train

array([[-0.00188202, -0.04464164, -0.06979687, ..., -0.03949338,
        -0.06291295,  0.04034337],
       [-0.00914709, -0.04464164,  0.01103904, ..., -0.03949338,
         0.01703713, -0.0052198 ],
       [ 0.02354575,  0.05068012, -0.02021751, ..., -0.03949338,
        -0.09643322, -0.01764613],
       ...,
       [ 0.06350368,  0.05068012, -0.00405033, ..., -0.00259226,
         0.08449528, -0.01764613],
       [-0.05273755,  0.05068012, -0.01806189, ...,  0.1081111 ,
         0.03605579, -0.04249877],
       [ 0.00175052,  0.05068012,  0.05954058, ...,  0.1081111 ,
         0.06898221,  0.12732762]])

In [21]:
y_train.shape,"and ", X_train.shape

((353,), 'and ', (353, 10))

In [24]:
from sklearn.linear_model import LinearRegression
lr= LinearRegression()
lr.fit(X_train,y_train)

LinearRegression()

In [26]:
y_pred=lr.predict(X_test)

In [33]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [41]:
print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2_score",r2_score(y_test,y_pred))

MAE 45.21292481299676
MSE 3094.4295991207027
R2_score 0.4399387660024644


In [42]:
lr.coef_

array([  -9.16088483, -205.46225988,  516.68462383,  340.62734108,
       -895.54360867,  561.21453306,  153.88478595,  126.73431596,
        861.12139955,   52.41982836])

In [43]:
lr.intercept_

151.88334520854633

# Making our own Linear Regression Class

In [73]:
class mylr:
    def __init__(self):
        self.coef= None
        self.intercept= None
    def fit(self,X_train,y_train):
        X_train = np.insert(X_train,0,1,axis=1) # dataset,index_location, Value_o_insert, axis
        
        # formula : betas = (xT * x)^-1 * (xT) * (y)
        betas= np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
        self.coef=betas[1:]
        self.intercept=betas[0]
        
        
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef) + self.intercept
        return y_pred
        


In [75]:
lrr = mylr()

In [76]:
lrr.fit(X_train,y_train)

In [79]:
y_pred2 = lrr.predict(X_test)


In [80]:
print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2_score",r2_score(y_test,y_pred))

MAE 45.21292481299676
MSE 3094.4295991207027
R2_score 0.4399387660024644


In [81]:
print("MAE",mean_absolute_error(y_test,y_pred2))
print("MSE",mean_squared_error(y_test,y_pred2))
print("R2_score",r2_score(y_test,y_pred2))

MAE 45.21292481299676
MSE 3094.429599120699
R2_score 0.43993876600246506
