In [2]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

#### Creating our own dataset using make_regression()

In [3]:
X, y = make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50)

In [4]:
pd.DataFrame(X).head()

Unnamed: 0,0,1
0,-0.904885,-0.969127
1,0.591837,0.462579
2,0.666411,-1.548457
3,0.082301,-0.776581
4,-0.622693,-1.278264


In [5]:
pd.DataFrame(y).head()

Unnamed: 0,0
0,-119.442916
1,-27.967118
2,-148.835739
3,-64.252278
4,-81.343782


In [6]:
df = pd.DataFrame({"feature1": X[:, 0], "feature2": X[:, -1], "target": y})

df.head()

Unnamed: 0,feature1,feature2,target
0,-0.904885,-0.969127,-119.442916
1,0.591837,0.462579,-27.967118
2,0.666411,-1.548457,-148.835739
3,0.082301,-0.776581,-64.252278
4,-0.622693,-1.278264,-81.343782


#### we have created a dataset which is having 100 rows and 3 columnn - 2 inputs and 1 output

In [7]:
px.scatter_3d(df, x="feature1", y="feature2", z="target")

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=3
)

In [9]:
from sklearn.linear_model import LinearRegression

In [10]:
lr = LinearRegression()

In [11]:
lr.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [12]:
y_pred = lr.predict(X_test)

In [13]:
print("mae =", mean_absolute_error(y_test, y_pred))
print("mse =", mean_squared_error(y_test, y_pred))
print("r2 score =", r2_score(y_test, y_pred))

mae = 44.57568698704804
mse = 2784.445381181433
r2 score = 0.818686263002259


#### Visualising the regression plane

In [14]:
x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)

final = np.vstack((xGrid.ravel().reshape(1, 100), yGrid.ravel().reshape(1, 100))).T

z_final = lr.predict(final).reshape(10, 10)

z = z_final


In [15]:
fig = px.scatter_3d(df, x="feature1", y="feature2", z="target")

fig.add_trace(go.Surface(x = x, y = y, z = z))

fig.show()

#### In linear regression we get to know m and b by lr.coef_ and lr.intercept_
#### In multiple regression (with 2 inputs) we actually have 3 values to find : $\beta_o$ which is the intercept and $\beta_1$ and $\beta_2$ which are the m

$\beta_0 =$

In [17]:

lr.intercept_

np.float64(-6.097722223327274)

$\beta_1$ and $\beta_2$

In [18]:
lr.coef_

array([42.4890473 , 85.69318484])