In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Creating synthetic data using sklearn

In [26]:
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=2, noise=60, random_state=28)

# Print some samples to verify the ranges
print("X1:", X[:, 0].min(), X[:, 0].max())
print("X2:", X[:, 1].min(), X[:, 1].max())
print("y:", y.min(), y.max())

X1: -2.8774226651402866 3.9966026385775546
X2: -2.807462041315097 2.272766915940227
y: -287.32396147127196 280.28961285532785


In [27]:
df = pd.DataFrame({'feat1':X[:,0],
                  'feat2': X[:,1],
                  'target':y})
df

Unnamed: 0,feat1,feat2,target
0,0.059188,1.514620,115.099758
1,-2.877423,0.299694,-29.028436
2,-0.039369,2.272767,219.435196
3,1.765612,-0.311381,32.236944
4,0.658188,0.439482,96.772946
...,...,...,...
95,0.376421,1.332930,133.229923
96,-0.565638,-2.807462,-116.654337
97,1.525564,-0.014732,73.186527
98,-0.183725,0.176809,-8.608231


In [28]:
import plotly.express as px
px.scatter_3d(df,x = 'feat1',y = 'feat2',z = 'target')

## Linear Regression Model fit

In [29]:
# Splitting 70/30 train and test data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=28)

In [30]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(70, 2)
(70,)
(30, 2)
(30,)


In [31]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train,y_train)

# Predictions
y_pred = lr.predict(X_test)

### Difference between test and predicted target

In [46]:
np.vstack((y_test,y_pred)).T

array([[ 1.12266252e+02,  7.13536307e+01],
       [-6.65749524e-01,  1.21630489e+01],
       [ 1.15099758e+02,  1.25274918e+02],
       [-6.62114937e+01, -1.20356400e+02],
       [ 3.09732954e+01, -3.45785331e+01],
       [ 1.83818911e+02,  2.27388156e+02],
       [-2.66199534e+02, -1.70473964e+02],
       [-8.18613352e-02, -6.58973009e+01],
       [ 1.29223583e+02,  9.93723396e+01],
       [ 1.33229923e+02,  1.27306000e+02],
       [-2.23026301e-01, -5.15421205e+01],
       [-5.53823862e+01, -6.26202458e+01],
       [-6.01363906e+01, -8.70080767e+01],
       [-1.30807068e+02, -3.02203933e+01],
       [-7.92331582e+01, -8.07787472e+01],
       [-2.08326032e+02, -1.79720424e+02],
       [-6.87202525e+01, -4.34570306e+01],
       [-1.53767250e+02, -8.69178997e+01],
       [-1.35581672e+02, -1.32018453e+02],
       [-4.66813809e+01, -3.90060097e+01],
       [ 1.61280421e+01,  8.31538170e+00],
       [ 1.79159737e+02,  1.08157449e+02],
       [ 7.60640508e+01,  1.39411386e+02],
       [-6.

### Model parameters

In [32]:
print('Coeffecients:',np.round(lr.coef_,2))
print('Intercept:',np.round(lr.intercept_,2))

Coeffecients: [51.51 78.76]
Intercept: 2.93


## Model Validation

In [33]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [34]:
mae = mean_absolute_error(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)
rmse  = np.sqrt(mse)
r2 = r2_score(y_test,y_pred)

In [35]:
print("Mean Absolute error",mae)
print("Mean Square error",mse)
print("Root Mean Squared error",rmse)
print("R2 score",r2)

Mean Absolute error 40.03239929164605
Mean Square error 2453.1961629587067
Root Mean Squared error 49.52975028161062
R2 score 0.8183379384459435


# Build Own Multiple Regression Model from scratch

In [51]:
class MyMultipleRegressor:
    
    def __init__(self):
        self.coef = None
        self.intercept = None
        
    def fit(self, X,y):
        X_T = np.transpose(X)
        self.coef = np.linalg.solve(np.dot(X_T,X),np.dot(X_T,y))
        self.intercept = np.mean(y) - np.dot(np.mean(X,axis = 0),self.coef)
           
    def predict(self,X):
        y_pred = np.dot(X,self.coef) + self.intercept
        return y_pred

In [52]:
model = MyMultipleRegressor()

In [53]:
X_train.shape

(70, 2)

In [54]:
model.fit(X_train,y_train)

In [56]:
print(model.coef)
print(model.intercept)

[51.45476841 78.79501114]
2.925548022215872


In [57]:
y_pred2 = model.predict(X_test)

In [58]:
mae = mean_absolute_error(y_test,y_pred2)
mse = mean_squared_error(y_test,y_pred2)
rmse  = np.sqrt(mse)
r2 = r2_score(y_test,y_pred2)

In [59]:
print("Mean Absolute error",mae)
print("Mean Square error",mse)
print("Root Mean Squared error",rmse)
print("R2 score",r2)

Mean Absolute error 40.021785352405864
Mean Square error 2452.866519134312
Root Mean Squared error 49.526422434235165
R2 score 0.8183623489588989
