__Simple Linear Regression__

In [87]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

In [57]:
data = pd.read_csv('placement.csv')
data.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [58]:
X = data.iloc[:, 0].values
y = data.iloc[:, 1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [59]:
# Implementation of Simple Linear Regression (OLS)

class SimpelLinearRegression:


    '''
    To make predictions using this we need to implement y = mx + c equation
    y and x are coming from data but we need to caculate m and c
    '''
    
    def __init__(self):

        self.m = None
        self.c = None

    def fit(self, X_train, y_train):
        
        num = 0
        den = 0
        
        for i in range(X_train.shape[0]):

            # for calcuation of m (Feature weight or slope)
            num = num + ((X_train[i] - X_train.mean())*(y_train[i] - y_train.mean()))
            den = den + ((X_train[i] - X_train.mean())*(X_train[i] - X_train.mean()))
            
        self.m = num/den 
        self.c = y_train.mean() - (self.m * X_train.mean())

    def predict(self, X_test):

        return (self.m * X_test) + self.c

In [60]:
lr = SimpelLinearRegression()

In [61]:
lr.fit(X_train, y_train)

In [76]:
c = lr.c
c

-0.8961119222429152

In [77]:
m = lr.m
m

0.5579519734250721

In [64]:
pred = lr.predict(X_test)

In [65]:
pred

array([3.89111601, 3.09324469, 2.38464568, 2.57434935, 1.6537286 ,
       1.77647803, 2.07219258, 2.93143862, 3.76278706, 2.93701814,
       4.09197872, 3.51170867, 2.97049525, 2.40138424, 3.18809652,
       3.46707251, 1.94386362, 3.24389172, 2.97607477, 3.41685683,
       2.55761079, 3.16577844, 2.85890486, 3.12114229, 3.68467378,
       2.8700639 , 3.49497011, 3.34432308, 3.91901361, 1.96060218,
       3.65119666, 3.2104146 , 3.74046898, 2.7863711 , 2.78079158,
       3.27178932, 3.52844723, 2.61340599, 2.65804215, 2.71383735])

In [66]:
y_test

array([4.1 , 3.49, 2.08, 2.33, 1.94, 1.48, 1.86, 3.09, 4.21, 2.87, 3.65,
       4.  , 2.89, 2.6 , 2.99, 3.25, 1.86, 3.67, 2.37, 3.42, 2.48, 3.65,
       2.6 , 2.83, 4.08, 2.56, 3.58, 3.81, 4.09, 2.01, 3.63, 2.92, 3.51,
       1.94, 2.21, 3.34, 3.34, 3.23, 2.01, 2.61])

In [68]:
X_test[:5]

array([8.58, 7.15, 5.88, 6.22, 4.57])

__Multiple Linear Regression__

In [88]:
X, y = load_diabetes(return_X_y=True)

In [89]:
print(X.shape)
X

(442, 10)


array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [90]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [91]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

print(X_train.shape)
print(X_test.shape)

(353, 10)
(89, 10)


## Multiple Linear Regression Formula

The formula for multiple linear regression is expressed as:

**Y' = b0 + b1X1 + b2X2 + ... + bkXk + ε**

Where:

- **Y'** is the predicted value of the dependent variable.
- **b0** is the y-intercept.
- **b1 to bk** are the regression coefficients for each independent variable (X1 to Xk).
- **ε** represents the error term.

---

## Formula for Calculating β in Multiple Linear Regression

In multiple linear regression, the β coefficients can be calculated using the following formula:

**β = (X'X)^{-1}X'Y**

Where:
- **X** is the design matrix that includes all the independent variables, with a column of ones added for the intercept.
- **Y** is the vector of observed values of the dependent variable.
- **X'** is the transpose of the design matrix X.
- **(X'X)^{-1}** is the inverse of the matrix product X'X.

### Calculation Steps

1. **Construct the Design Matrix (X)**: Include all independent variables and a column for the intercept.
2. **Compute Transpose (X')**: Calculate the transpose of the design matrix.
3. **Calculate Matrix Product**: Find the product of X' and X.
4. **Inverse Calculation**: Compute the inverse of the product from the previous step.
5. **Multiply with X'Y**: Multiply the inverse from step 4 with X' and then with the dependent variable vector Y.

This process yields the estimated coefficients β, which describe the relationship between the independent variables and the dependent variable in the model.


| Aspect                | Simple Linear Regression             | Multiple Linear Regression              |
|-----------------------|-------------------------------------|----------------------------------------|
| **Equation**          | Y' = b0 + b1X + ε                   | Y' = b0 + b1X1 + b2X2 + ... + bkXk + ε |
| **Number of Variables**| One independent variable (X)       | Multiple independent variables (X1, X2, ..., Xk) |
| **Slope Calculation** | b1 = (Σ(X - X̄)(Y - Ȳ)) / (Σ(X - X̄)²) | β = (X'X)^{-1}X'Y                      |
| **Intercept Calculation** | b0 = Ȳ - b1X̄                  | Part of the β coefficients calculation   |



In [92]:
class MultipleLinearRegression:

    '''
    Implementation of Multiple Linear Regression
    '''
    
    def __init__(self): 
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, X_test):
        X_train = np.insert(X_train, 0 , 1, axis=1) # insert 1 at the 0th index as column 

        # calculating the coefficient 
        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]

    def predict(self, X_test):
        y_pred = np.dot(X_test, self.coef_) + self.intercept_
        return y_pred

In [93]:
mlr  = MultipleLinearRegression()

In [94]:
mlr.fit(X_train, y_train)

In [95]:
mlr.predict(X_test[0])

154.12138809538362

In [96]:
mlr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [97]:
mlr.intercept_

151.8833100525417