In [21]:
# Multiple Linear Regression implementation using builtin-library
# There is no data set but developed using the make_regression method
# Author: Muhammad Humayun

from sklearn.datasets import make_regression
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

# make_regression() is a method that creates 2 inputs(features) of 100 samples(rows) and one output(n_targets)
# n_information = 2 features will have linear relationship with the target variable
# noise is there for the randomness of the data as the real world data is not the perfect, without it will plot
# perfect linear line
X,y = make_regression(n_samples = 100, n_features = 2, n_informative = 2, n_targets = 1, noise = 50)

df = pd.DataFrame({'input_one':X[:,0],'input_two':X[:,1],'output':y})
df.shape    # should be 100 rows with 3 columns as output



(100, 3)

In [22]:
df.head()

Unnamed: 0,input_one,input_two,output
0,-1.613005,1.607182,-95.298279
1,-0.859418,-0.052794,26.203228
2,-0.019103,-0.00931,9.925661
3,1.107946,0.928453,76.26188
4,1.073119,-0.290504,1.193018


In [23]:
fig = px.scatter_3d(df,x = "input_one", y = "input_two", z = "output")
fig.show()

In [24]:
# Now train the model using train_test_split
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 2)

from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train,y_train)


In [25]:
y_pred = lr.predict(X_test)

In [26]:
# the metrics to find the model prediction is based on MAE, MSE and R2
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
print("MAE",mean_absolute_error(y_test,y_pred)) # MAE = 10 means, predictions are off by $10,000 from actual house prices
print("MSE",mean_squared_error(y_test,y_pred))  # low means model prediction are closer to actual value on average
print("R2 score",r2_score(y_test,y_pred))

MAE 50.10329263705246
MSE 4017.4903251312508
R2 score 0.25183116310824216


In [29]:
# the following code draw the plane in the data just like the linear line in the data
x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)

final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T
z_final = lr.predict(final).reshape(10,10)

z = z_final



fig = px.scatter_3d(df, x='input_one', y='input_two', z='output')

fig.add_trace(go.Surface(x = x, y = y, z =z ))

fig.show()
