In [1]:
import pandas as pd
from sklearn.datasets import load_diabetes

In [2]:
X,y = load_diabetes(return_X_y=True)

In [3]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [4]:
X.shape

(442, 10)

In [5]:
y.shape

(442,)

## Using Sklearn Linear Regression

In [62]:
from sklearn.model_selection import train_test_split

In [63]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [64]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((353, 10), (89, 10), (353,), (89,))

In [65]:
from sklearn.linear_model import LinearRegression

In [66]:
lr=LinearRegression()

In [67]:
lr.fit(X_train,y_train)
y_pred=lr.predict(X_test)


In [68]:
y_pred

array([139.5475584 , 179.51720835, 134.03875572, 291.41702925,
       123.78965872,  92.1723465 , 258.23238899, 181.33732057,
        90.22411311, 108.63375858,  94.13865744, 168.43486358,
        53.5047888 , 206.63081659, 100.12925869, 130.66657085,
       219.53071499, 250.7803234 , 196.3688346 , 218.57511815,
       207.35050182,  88.48340941,  70.43285917, 188.95914235,
       154.8868162 , 159.36170122, 188.31263363, 180.39094033,
        47.99046561, 108.97453871, 174.77897633,  86.36406656,
       132.95761215, 184.53819483, 173.83220911, 190.35858492,
       124.4156176 , 119.65110656, 147.95168682,  59.05405241,
        71.62331856, 107.68284704, 165.45365458, 155.00975931,
       171.04799096,  61.45761356,  71.66672581, 114.96732206,
        51.57975523, 167.57599528, 152.52291955,  62.95568515,
       103.49741722, 109.20751489, 175.64118426, 154.60296242,
        94.41704366, 210.74209145, 120.2566205 ,  77.61585399,
       187.93203995, 206.49337474, 140.63167076, 105.59

In [35]:
from sklearn.metrics import r2_score

In [23]:
r2_score(y_test,y_pred)

0.4526027629719196

In [47]:
lr.intercept_

151.34560453985995

## Making our own linear Regression class

In [70]:
import numpy as np
class Mylr:
    def __init__(self):
        self.cof_=None
        self.intercept_=None

    def fit(self,X_train,y_train):
        X_train=np.insert(X_train,0,1,axis=1)
        # calculate cof
        betas=np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
        self.intercept_=betas[0]
        self.cof_=betas[1:]

    def predict(self,X_test):
        y_pred=np.dot(X_test,self.cof_)+self.intercept_
        return y_pred
        



        

In [71]:
my=Mylr()

In [72]:
my.fit(X_train,y_train)

In [73]:
my.intercept_

151.34560453986

In [74]:
my.cof_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [75]:
y_pred=my.predict(X_test)

In [76]:
y_pred

array([139.5475584 , 179.51720835, 134.03875572, 291.41702925,
       123.78965872,  92.1723465 , 258.23238899, 181.33732057,
        90.22411311, 108.63375858,  94.13865744, 168.43486358,
        53.5047888 , 206.63081659, 100.12925869, 130.66657085,
       219.53071499, 250.7803234 , 196.3688346 , 218.57511815,
       207.35050182,  88.48340941,  70.43285917, 188.95914235,
       154.8868162 , 159.36170122, 188.31263363, 180.39094033,
        47.99046561, 108.97453871, 174.77897633,  86.36406656,
       132.95761215, 184.53819483, 173.83220911, 190.35858492,
       124.4156176 , 119.65110656, 147.95168682,  59.05405241,
        71.62331856, 107.68284704, 165.45365458, 155.00975931,
       171.04799096,  61.45761356,  71.66672581, 114.96732206,
        51.57975523, 167.57599528, 152.52291955,  62.95568515,
       103.49741722, 109.20751489, 175.64118426, 154.60296242,
        94.41704366, 210.74209145, 120.2566205 ,  77.61585399,
       187.93203995, 206.49337474, 140.63167076, 105.59

In [60]:
y_test.shape

(89,)

In [77]:
r2_score(y_test,y_pred)

0.45260276297191904

In [80]:
np.random.seed(42)

In [81]:
 np.linspace(3, 5, 100)

array([3.        , 3.02020202, 3.04040404, 3.06060606, 3.08080808,
       3.1010101 , 3.12121212, 3.14141414, 3.16161616, 3.18181818,
       3.2020202 , 3.22222222, 3.24242424, 3.26262626, 3.28282828,
       3.3030303 , 3.32323232, 3.34343434, 3.36363636, 3.38383838,
       3.4040404 , 3.42424242, 3.44444444, 3.46464646, 3.48484848,
       3.50505051, 3.52525253, 3.54545455, 3.56565657, 3.58585859,
       3.60606061, 3.62626263, 3.64646465, 3.66666667, 3.68686869,
       3.70707071, 3.72727273, 3.74747475, 3.76767677, 3.78787879,
       3.80808081, 3.82828283, 3.84848485, 3.86868687, 3.88888889,
       3.90909091, 3.92929293, 3.94949495, 3.96969697, 3.98989899,
       4.01010101, 4.03030303, 4.05050505, 4.07070707, 4.09090909,
       4.11111111, 4.13131313, 4.15151515, 4.17171717, 4.19191919,
       4.21212121, 4.23232323, 4.25252525, 4.27272727, 4.29292929,
       4.31313131, 4.33333333, 4.35353535, 4.37373737, 4.39393939,
       4.41414141, 4.43434343, 4.45454545, 4.47474747, 4.49494