In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import make_regression

In [3]:
x,y = make_regression(n_samples = 100, n_features = 2, n_informative = 2,n_targets=1,noise= 50)

In [4]:
df = pd.DataFrame({'feature1':x[:,0],'feature2':x[:,1],'target':y})

In [5]:
df.head()

Unnamed: 0,feature1,feature2,target
0,-0.488219,-1.462185,-99.134063
1,0.868534,1.288374,170.869497
2,0.017216,2.086581,185.201041
3,1.789081,0.367957,89.424139
4,-0.32751,-0.781059,-43.178988


In [6]:
df.shape

(100, 3)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   feature1  100 non-null    float64
 1   feature2  100 non-null    float64
 2   target    100 non-null    float64
dtypes: float64(3)
memory usage: 2.5 KB


In [9]:
import plotly.express as px

In [10]:
import plotly.graph_objects as go

In [11]:
fig = px.scatter_3d(df, x='feature1',y='feature2',z='target')
fig.show()

In [12]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=3)

In [13]:
from sklearn.linear_model import LinearRegression

In [14]:
lr = LinearRegression()

In [15]:
lr.fit(x_train,y_train)

In [16]:
y_pred = lr.predict(x_test)

In [18]:
y_pred

array([ -56.75935106,  -74.58622033,   -7.2331017 ,  129.3674798 ,
          5.11606515, -121.70261964,   84.57374071,   -7.40860392,
        -64.52553285,  -23.23353223, -103.07727191,  138.43783133,
        134.22352875,   -1.14758111,    6.84525027,  -68.39589531,
        -10.29056571,   -3.24677547, -112.21605807,   23.75908221])

In [17]:
y_test

array([-103.23401059,  -93.9155566 ,  -99.05848946,   55.51913442,
         15.4289639 ,  -97.94883504,   98.50402259,   38.79499144,
        -32.4728174 ,   42.52582101,  -89.23140103,  177.65653534,
        164.76098722,  -45.77721743,   55.45607466,   -2.72981368,
        -15.60127325,   55.98428281, -131.56627167,    0.98929132])

In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print("MAE mean_absolute_error ", mean_absolute_error(y_test,y_pred))
print("MAE mean_squared_error ",mean_squared_error(y_test,y_pred))
print("R2 score",r2_score(y_test,y_pred))

MAE mean_absolute_error  38.63303521136727
MAE mean_squared_error  2026.4353828938051
R2 score 0.727649065718208


In [20]:
x = np.linspace(-5,5,10)
y = np.linspace(-5,5,10)
xGrid, yGrid = np.meshgrid(y,x)

final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T

z_final = lr.predict(final).reshape(10,10)
z = z_final

#final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T

fig = px.scatter_3d(df, x='feature1', y='feature2',z='target')

fig.add_trace(go.Surface(x = x,y = y,z =z))
fig.show()

In [21]:
lr.coef_

array([15.26103782, 89.96897551])

In [22]:
lr.intercept_

np.float64(1.594093168564236)

In [23]:
class MyLR:

  def __init__(self):
    self.coef_ = None
    self.intercept_ = None

  def fit(self,x_train,y_train):
    x_train = np.insert(x_train,0,1,axis=1)

    #Calculate co-eff
    betas = np.linalg.inv(np.dot(x_train.T,x_train)).dot(x_train.T).dot(y_train)
    self.intercept_ = betas[0]
    self.coef_ = betas[1:]

  def predict(self,x_test):
    y_pred = np.dot(x_test,self.coef_) + self.intercept_
    return y_pred

In [24]:
LR = MyLR()

In [25]:
LR.fit(x_train,y_train)

In [26]:
Y_pred = LR.predict(x_test)
Y_pred

array([ -56.75935106,  -74.58622033,   -7.2331017 ,  129.3674798 ,
          5.11606515, -121.70261964,   84.57374071,   -7.40860392,
        -64.52553285,  -23.23353223, -103.07727191,  138.43783133,
        134.22352875,   -1.14758111,    6.84525027,  -68.39589531,
        -10.29056571,   -3.24677547, -112.21605807,   23.75908221])

In [27]:
r2_score(y_test,y_pred)

0.727649065718208

In [28]:
LR.coef_

array([15.26103782, 89.96897551])

In [29]:
LR.intercept_

np.float64(1.5940931685642308)