In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

In [2]:
X,y = load_diabetes(return_X_y=True)

In [15]:
X.shape

(442, 10)

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=2)

In [7]:
from sklearn.linear_model import LinearRegression

In [8]:
lr = LinearRegression()

In [9]:
lr.fit(X_train, y_train)

In [10]:
y_pred = lr.predict(X_test)

In [11]:
from sklearn.metrics import r2_score

In [12]:
r2_score(y_test, y_pred)

0.4399338661568968

In [13]:
lr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [14]:
lr.intercept_

151.88331005254167

# Making Our Own Class

In [20]:
class MeraLR:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0, 1, axis=1)

        # Calculating Coef
        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)

        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
    
    def predict(self, X_test):
        y_pred = np.dot(X_test, self.coef_) + self.intercept_
        return y_pred

In [27]:
mylr = MeraLR()

In [28]:
mylr.fit(X_train, y_train)

In [29]:
y_pred = mylr.predict(X_test)

In [30]:
r2_score(y_test, y_pred)

0.43993386615689756

In [32]:
mylr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [33]:
mylr.intercept_

151.8833100525417

# Testing For Simple Linear Regression

In [34]:
import pandas as pd

In [35]:
df = pd.read_csv('placement.csv')

In [36]:
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [38]:
X = df.iloc[:, 0:1]

In [40]:
y = df.iloc[:, 1]

In [37]:
newlr = MeraLR()

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=2)

In [43]:
newlr.fit(X_train, y_train)

In [48]:
newlr.predict(X_test.iloc[1].values.reshape(1,1))

array([3.09324469])

In [45]:
X_test

Unnamed: 0,cgpa
112,8.58
29,7.15
182,5.88
199,6.22
193,4.57
85,4.79
10,5.32
54,6.86
115,8.35
35,6.87


In [46]:
y_test

112    4.10
29     3.49
182    2.08
199    2.33
193    1.94
85     1.48
10     1.86
54     3.09
115    4.21
35     2.87
12     3.65
92     4.00
13     2.89
126    2.60
174    2.99
2      3.25
44     1.86
3      3.67
113    2.37
14     3.42
23     2.48
25     3.65
6      2.60
134    2.83
165    4.08
173    2.56
45     3.58
65     3.81
48     4.09
122    2.01
178    3.63
64     2.92
9      3.51
57     1.94
78     2.21
71     3.34
128    3.34
176    3.23
131    2.01
53     2.61
Name: package, dtype: float64