In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression

In [3]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [4]:
import plotly.express as px

In [5]:
import plotly.graph_objects as go

In [6]:
X, y = make_regression(n_samples=200, n_features=2, n_targets=1)

In [7]:
type(X)

numpy.ndarray

In [8]:
type(y)

numpy.ndarray

In [9]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [10]:
X_train.shape

(160, 2)

In [11]:
y_train.shape

(160,)

In [12]:
X_test.shape

(40, 2)

In [13]:
y_test.shape

(40,)

### Checking for the missing data

In [14]:
pd.DataFrame(X).isnull().sum()

0    0
1    0
dtype: int64

In [15]:
pd.DataFrame(X,columns=['a','b']).isnull().sum()

a    0
b    0
dtype: int64

In [16]:
pd.DataFrame(y).isnull().sum()

0    0
dtype: int64

### Training the model

In [17]:
lin_reg = LinearRegression()

In [18]:
lin_reg.fit(X_train,y_train)

In [45]:
lin_reg.coef_

array([78.33275109, 84.42172609])

In [46]:
lin_reg.intercept_

1.1546319456101628e-14

In [47]:
y_pred = lin_reg.predict(X_test)

In [48]:
y_pred

array([ 2.59551294e+02, -5.35490216e+01,  8.89001264e+01,  1.55313396e+02,
        7.69756706e+01,  1.05313514e+02, -1.89464382e+02,  5.54200320e+01,
        1.38440078e+02,  1.00586534e+02, -2.38632936e+01,  1.43212442e+02,
        9.66388567e+00, -4.71704631e+01,  9.31211024e+01,  7.98422913e+01,
       -9.34250406e+01,  3.86301247e+01,  1.32055362e+02,  7.73881141e+01,
        2.84876122e+02, -1.10522465e+02,  1.02377389e+01,  1.18377236e+02,
       -5.65396857e+01, -7.24222206e+01, -2.00930662e+01, -2.18454169e+02,
        1.53372862e+02, -8.08826145e+01, -7.85798142e+01, -3.75080708e+01,
        2.55987710e-01,  8.46208568e+01,  1.87332363e+01,  1.18675437e+02,
       -3.86646732e+01,  6.52802107e+01,  1.67466570e+02,  2.26009431e+02])

In [21]:
y_test

array([ 2.59551294e+02, -5.35490216e+01,  8.89001264e+01,  1.55313396e+02,
        7.69756706e+01,  1.05313514e+02, -1.89464382e+02,  5.54200320e+01,
        1.38440078e+02,  1.00586534e+02, -2.38632936e+01,  1.43212442e+02,
        9.66388567e+00, -4.71704631e+01,  9.31211024e+01,  7.98422913e+01,
       -9.34250406e+01,  3.86301247e+01,  1.32055362e+02,  7.73881141e+01,
        2.84876122e+02, -1.10522465e+02,  1.02377389e+01,  1.18377236e+02,
       -5.65396857e+01, -7.24222206e+01, -2.00930662e+01, -2.18454169e+02,
        1.53372862e+02, -8.08826145e+01, -7.85798142e+01, -3.75080708e+01,
        2.55987710e-01,  8.46208568e+01,  1.87332363e+01,  1.18675437e+02,
       -3.86646732e+01,  6.52802107e+01,  1.67466570e+02,  2.26009431e+02])

In [49]:
r2_score(y_test,y_pred)

1.0

In [23]:
mae = mean_absolute_error(y_test,y_pred)

In [24]:
mae

1.0565853747479537e-13

In [25]:
rmse = np.sqrt(mean_squared_error(y_test,y_pred))

In [26]:
rmse

1.2976915833634956e-13

In [27]:
X_test[0]

array([2.5123236 , 0.74334034])

In [28]:
y_test[0]

259.55129385195517

In [29]:
lin_reg.predict([[0.36198852, -1.20341552]])

array([-73.23885876])

### Visulizing the Data

In [30]:
df = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})

In [31]:
# df.shape

In [32]:
px.scatter_3d(df,x=df.iloc[:,0],y=df.iloc[:,1],z=df.iloc[:,2])

+ ### Here in the above plot, the z-column defines the output column in the dataset and we could see that the data is spread in a "sort of linear" structure which means we can use Linear Regression to train the model.

In [33]:
#Weights of the input features
print('Weights of input Features are:',lin_reg.coef_)

Weights of input Features are: [78.33275109 84.42172609]


In [34]:
w1 = lin_reg.coef_[0]
w2 = lin_reg.coef_[1]

In [35]:
# Bias of the hyperplane/plane/line
print('Bias / Intercept of the trained Model:',lin_reg.intercept_)
w0=lin_reg.intercept_

Bias / Intercept of the trained Model: 1.1546319456101628e-14


In [36]:
# 'So the model hyperplane equation looks like:' 
# y = w0 + w1*x1 + w2+x2

### let's test the above equation with model's prediction

In [37]:
X_test[1]

array([-0.22488441, -0.42563933])

In [38]:
y_test[1]

-53.5490215762206

In [39]:
y_pred[1]

-53.549021576220596

In [40]:
x1 = X_test[1][0]
x2 = X_test[1][1]

In [41]:
y = w0 + w1*x1 + w2*x2

In [42]:
y

-53.5490215762206

### Let's draw the hyperplane/plane found after training the model

In [43]:

x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)

final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T

z_final = lin_reg.predict(final).reshape(10,10)

z = z_final

In [44]:

fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

fig.add_trace(go.Surface(x = x, y = y, z =z ))

fig.show()