# Math Behind Simple Linear Regression

In [1]:
# importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

This is our sample Data `x` is our Input Feature, and `y` is what we are predicting

In [2]:
x = [6, 2, 10, 4, 8]
y = [9, 11, 5, 8, 7]

df = pd.DataFrame(x, columns=["x"])
df["y"] = y

In [3]:
df

Unnamed: 0,x,y
0,6,9
1,2,11
2,10,5
3,4,8
4,8,7


Calculate the **Square** for column `x` and `y` for each example

In [4]:
df["x_sqr"] = np.square(df["x"])
df["y_sqr"] = np.square(df["y"])
df

Unnamed: 0,x,y,x_sqr,y_sqr
0,6,9,36,81
1,2,11,4,121
2,10,5,100,25
3,4,8,16,64
4,8,7,64,49


Calculate **x*y** for each ith example

In [5]:
df["xy"] = np.multiply(df["x"], df["y"])
df

Unnamed: 0,x,y,x_sqr,y_sqr,xy
0,6,9,36,81,54
1,2,11,4,121,22
2,10,5,100,25,50
3,4,8,16,64,32
4,8,7,64,49,56


Now we will calculate $\bar{x}$ and $\bar{y}$

In [6]:
x_bar = np.sum(df["x"]) / len(df)
y_bar = np.sum(df["y"]) / len(df)

print(x_bar, y_bar)

6.0 8.0


Calculate $\bar{x} ^ 2$

In [7]:
x_bar_sqr = np.square(x_bar)
y_bar_sqr = np.square(y_bar)

print(x_bar_sqr, y_bar_sqr)

36.0 64.0


In [8]:
sigma_x_sqr = (1 / len(df) * (np.sum(df["x_sqr"]))) - x_bar_sqr
sigma_y_sqr = (1 / len(df) * (np.sum(df["y_sqr"]))) - y_bar_sqr

print(sigma_x_sqr, sigma_y_sqr)

8.0 4.0


In [9]:
covariance = np.round((1 / len(df) * np.sum(df["xy"])) - x_bar * y_bar, decimals=1)
print(covariance)

-5.2


Calculate the **Coefficient**

In [13]:
byx = covariance / sigma_x_sqr # coefficient

print(byx)

-0.65


Calculate the **Intercept**

In [14]:
intercept = - byx * x_bar + y_bar

print(intercept)

11.9


Regression Equation is: y = byx * x + intercept

### Testing on Example

In [15]:
x = 2

y = byx * x + intercept
print(y)

10.6


In [16]:
x = 3

y = byx * x + intercept
print(y)

9.95


# Training on Model Class

Now we will create same model using the Linear Regression Class

In [18]:
from sklearn.linear_model import LinearRegression

# Create a Instance of Linear Regression Model
lr = LinearRegression()

Separate Data

In [19]:
X = df["x"].values
y = df["y"].values

In [20]:
X = X.reshape(-1, 1)
y = y.reshape(-1, 1)

In [21]:
X

array([[ 6],
       [ 2],
       [10],
       [ 4],
       [ 8]], dtype=int64)

In [22]:
y

array([[ 9],
       [11],
       [ 5],
       [ 8],
       [ 7]], dtype=int64)

Fit the Model to data

In [24]:
lr.fit(X, y)

LinearRegression()

### Testing Model on Example

In [25]:
lr.predict([[2]])

array([[10.6]])

In [26]:
lr.predict([[9]])

array([[6.05]])

Coeffiecient of the Model

In [20]:
lr.coef_

array([[-0.65]])

Intercept of the Model

In [21]:
lr.intercept_

array([11.9])