# Linear Regression - OLS

## Data - https://drive.google.com/file/d/1ItJT3faKTNi4eZNo3GGnUORDO2_7C6JZ/view?usp=sharing

## 1. Linear Regression Scratch - OLS Method

In [27]:
import numpy as np
class LinearRegressionScratchOLS:
    
    def __init__(self):
        self.coef_ = None
        self.intercpet_ = None
    
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        newX = []
        for i in X:
            newX.append([1]+list(i))
        X = np.array(newX)
        self.wT = (np.linalg.inv(X.T @ X) @ X.T) @ y.T # WT = (inv(XT*X) * XT) * yT
        self.coef_ = self.wT[1:]
        self.intercept_ = self.wT[0]
    
    def predict(self, X):
        X = np.array(X)
        newX = []
        for i in X:
            newX.append([1]+list(i))
        X = np.array(newX)
        return X @ self.wT

In [28]:
import pandas as pd
link = 'https://drive.google.com/file/d/1ItJT3faKTNi4eZNo3GGnUORDO2_7C6JZ/view?usp=sharing'
df = pd.read_csv(f'https://drive.google.com/uc?id={link.split("/")[-2]}')
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.drop('package',axis=1),df['package'],test_size=0.2,random_state=42)

In [30]:
lr_scratch_ols = LinearRegressionScratchOLS()

In [31]:
lr_scratch_ols.fit(X_train, y_train)

In [32]:
lr_scratch_ols.coef_

array([0.57425647])

In [33]:
lr_scratch_ols.intercept_

-1.0270069374541317

In [34]:
lr_scratch_ols_preds = lr_scratch_ols.predict(X_test)

In [35]:
from sklearn.metrics import r2_score
r2_score(lr_scratch_ols_preds,y_test)

0.7337973382944322

## 2. Linear Regression Sklearn Library - OLS Method

In [36]:
from sklearn.linear_model import LinearRegression

In [37]:
lr_ols = LinearRegression()

In [38]:
lr_ols.fit(X_train,y_train)

LinearRegression()

In [39]:
lr_ols.coef_

array([0.57425647])

In [40]:
lr_ols.intercept_

-1.0270069374542108

In [41]:
lr_ols_preds = lr_ols.predict(X_test)

In [42]:
from sklearn.metrics import r2_score
r2_score(lr_ols_preds,y_test)

0.733797338294438