In [2]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

The **make_regression** function is used to generate a synthetic regression dataset with the following parameters:

n_samples: The number of samples in the dataset (100).
n_features: The total number of features (independent variables) in the dataset (2).
n_informative: The number of informative features, i.e., the number of features used to build the linear model used to generate the output (2).
n_targets: The number of regression targets (output variables) in the dataset (1).
noise: The standard deviation of the Gaussian noise applied to the output (50).

**n_informative**
In example make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50), both features are informative (n_informative=2), meaning both features play a role in determining the target variable.

**noise**
A higher noise value results in a more scattered distribution of target values around the linear relationship defined by the informative features.
Lower values of noise make the relationship between features and target more apparent, while higher values introduce more randomness.

In [6]:
x,y = make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50)

In [14]:
df = pd.DataFrame({'feature1':x[:,0],'feature2':x[:,1],'target':y})

In [15]:
df.shape

(100, 3)

In [16]:
df.head()

Unnamed: 0,feature1,feature2,target
0,0.750203,0.640324,10.262066
1,0.156785,1.035743,84.359782
2,-1.220283,1.582544,35.932238
3,-1.023591,0.284579,-69.863916
4,0.08357,0.505882,8.186747


In [18]:
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

fig.show()

**Trainning Model**

In [19]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=2)

In [20]:
from sklearn.linear_model import LinearRegression

In [21]:
model = LinearRegression()

In [22]:
model.fit(x_train,y_train)

In [26]:
y_pred = model.predict(x_test)
y_pred

array([ 142.46699789,   78.81325812, -113.32776439,  -14.04382176,
         64.62471061,  -52.07920965,   42.72092949,   88.20291402,
         26.00493488,  -41.07179159,  142.47539303,  -27.93361053,
        -38.38413121,  -75.13431654,   97.57673465,  -20.79329156,
        -43.37611038,  166.38663057, -125.12952612,    1.43400998])

In [37]:
x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)

xGrid, yGrid

xGrid.shape


(array([[-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
          0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ],
        [-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
          0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ],
        [-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
          0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ],
        [-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
          0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ],
        [-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
          0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ],
        [-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
          0.55555556,  1.66666667,  2.77777778,  3.88888889,  5.        ],
        [-5.        , -3.88888889, -2.77777778, -1.66666667, -0.55555556,
          0.55555556,  1.6666666

**xGrid.ravel() and yGrid.ravel():**

xGrid and yGrid are 2D arrays created by np.meshgrid based on the x and y arrays.
.ravel() is used to flatten these 2D arrays, converting them into 1D arrays.

**Vertical stacking using np.vstack():**

--> np.vstack((xGrid.ravel().reshape(1, 100), yGrid.ravel().reshape(1, 100))) vertically stacks the two row vectors.
--> The resulting array has a shape of (2, 100) because it stacks two row vectors vertically, forming a 2-row by 100-column array.
--> The first row contains the flattened and reshaped x coordinates, and the second row contains the flattened and reshaped y coordinates.

In [32]:
final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T

z_final = model.predict(final).reshape(10,10)

z = z_final


In [38]:
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

fig.add_trace(go.Surface(x = x, y = y, z =z ))

fig.show()

In [39]:
model.coef_

array([64.20922648, 76.06247516])

In [40]:
model.intercept_

0.7021587468609205