In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression

In [3]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [4]:
import plotly.express as px

In [63]:
import plotly.graph_objects as go

In [5]:
X, y = make_regression(n_samples=200, n_features=2, n_targets=1)

In [6]:
type(X)

numpy.ndarray

In [7]:
type(y)

numpy.ndarray

In [8]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [9]:
X_train.shape

(160, 2)

In [10]:
y_train.shape

(160,)

In [11]:
X_test.shape

(40, 2)

In [12]:
y_test.shape

(40,)

### Checking for the missing data

In [13]:
pd.DataFrame(X).isnull().sum()

0    0
1    0
dtype: int64

In [14]:
pd.DataFrame(X,columns=['a','b']).isnull().sum()

a    0
b    0
dtype: int64

In [15]:
pd.DataFrame(y).isnull().sum()

0    0
dtype: int64

### Training the model

In [16]:
lin_reg = LinearRegression()

In [17]:
lin_reg.fit(X_train,y_train)

In [18]:
y_pred = lin_reg.predict(X_test)

In [19]:
y_pred

array([ -81.86765936,   45.47997628,   44.77264743,   73.21020801,
        -93.54035228,  -56.33156871,  -91.36387354,  -53.26881269,
        -21.30704725,   38.57184405,    3.49750206,   23.39303838,
         58.47607598,  -26.56378586, -166.66457915,   25.64939931,
        -17.67361872,  -54.14005857,  -83.51005271,  -97.57896025,
         47.34008485,  111.82517878,   -8.27488268,  -26.76536044,
         65.12610054,  -13.47789581,  -40.92797799,   17.78222631,
       -172.84881649,  206.0374376 , -100.43251563,   43.5652829 ,
        -64.02337614,   39.67618037,  -99.72661865,   39.41948284,
        -74.31492119,  -94.989516  ,   36.42367369,   31.65367614])

In [20]:
y_test

array([ -81.86765936,   45.47997628,   44.77264743,   73.21020801,
        -93.54035228,  -56.33156871,  -91.36387354,  -53.26881269,
        -21.30704725,   38.57184405,    3.49750206,   23.39303838,
         58.47607598,  -26.56378586, -166.66457915,   25.64939931,
        -17.67361872,  -54.14005857,  -83.51005271,  -97.57896025,
         47.34008485,  111.82517878,   -8.27488268,  -26.76536044,
         65.12610054,  -13.47789581,  -40.92797799,   17.78222631,
       -172.84881649,  206.0374376 , -100.43251563,   43.5652829 ,
        -64.02337614,   39.67618037,  -99.72661865,   39.41948284,
        -74.31492119,  -94.989516  ,   36.42367369,   31.65367614])

In [21]:
r2_score(y_test,y_pred)

1.0

In [22]:
mae = mean_absolute_error(y_test,y_pred)

In [23]:
mae

1.1124434706744069e-14

In [24]:
rmse = np.sqrt(mean_squared_error(y_test,y_pred))

In [25]:
rmse

1.3996209640077836e-14

In [26]:
X_test[0]

array([ 0.36198852, -1.20341552])

In [27]:
y_test[0]

-81.867659358228

In [30]:
lin_reg.predict([[0.36198852, -1.20341552]])

array([-81.86765932])

### Visulizing the Data

In [32]:
df = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})

In [37]:
# df.shape

In [36]:
px.scatter_3d(df,x=df.iloc[:,0],y=df.iloc[:,1],z=df.iloc[:,2])

+ ### Here in the above plot, the z-column defines the output column in the dataset and we could see that the data is spread in a "sort of linear" structure which means we can use Linear Regression to train the model.

In [41]:
#Weights of the input features
print('Weights of input Features are:',lin_reg.coef_)

Weights of input Features are: [27.12972295 76.19006574]


In [46]:
w1 = lin_reg.coef_[0]
w2 = lin_reg.coef_[1]

In [45]:
# Bias of the hyperplane/plane/line
print('Bias / Intercept of the trained Model:',lin_reg.intercept_)
w0=lin_reg.intercept_

Bias / Intercept of the trained Model: 1.7763568394002505e-15


In [47]:
# 'So the model hyperplane equation looks like:' 
# y = w0 + w1*x1 + w2+x2

### let's test the above equation with model's prediction

In [50]:
X_test[1]

array([-0.5407594 ,  0.78948126])

In [53]:
y_test[1]

45.47997628373005

In [57]:
y_pred[1]

45.47997628373006

In [54]:
x1 = X_test[1][0]
x2 = X_test[1][1]

In [55]:
y = w0 + w1*x1 + w2*x2

In [56]:
y

45.47997628373007

### Let's draw the hyperplane/plane found after training the model

In [69]:

x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)

final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T

z_final = lin_reg.predict(final).reshape(10,10)

z = z_final

In [70]:

fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

fig.add_trace(go.Surface(x = x, y = y, z =z ))

fig.show()