# `Multiple Linear Regression`

In [2]:
# import necessary libraries
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
# generate regression data
X, y =  make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50)

In [4]:
# create a DataFrame for easier handling
df = pd.DataFrame({'feature_1': X[:,0], 'feature_2': X[:,1], 'target': y})

In [5]:
# display first few rows of the DataFrame
df.head()

Unnamed: 0,feature_1,feature_2,target
0,1.479447,1.221182,170.850147
1,-0.016721,-0.29099,26.158218
2,-0.540549,-0.310802,-57.838249
3,0.223394,0.121083,-55.200344
4,-0.177972,-0.211727,-66.024126


In [6]:
# display the shape of the DataFrame
df.shape

(100, 3)

In [7]:
# 3D Scatter plot of features vs target
fig = px.scatter_3d(df, x='feature_1', y='feature_2', z='target', title='3D Scatter Plot of Features vs Target')
fig.show()

In [8]:
# split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# import and create the Linear Regression model
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [10]:
# fit the model
lr.fit(X_train, y_train)

In [11]:
# make predictions
y_pred = lr.predict(X_test)

In [12]:
# calculate and print evaluation metrics
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("R-squared (R2 ):", r2_score(y_test, y_pred))

Mean Absolute Error (MAE): 37.695624925315045
Mean Squared Error (MSE): 2524.4165198372048
R-squared (R2 ): 0.7814266223027518


In [13]:
x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
# create meshgrid with x varying along axis 1 and y along axis 0
x_grid, y_grid = np.meshgrid(x, y)

# build the (N, 2) array of points to predict (100 points for 10x10 grid)
final = np.vstack((x_grid.ravel(), y_grid.ravel())).T

# predict and reshape to the grid shape
z_final = lr.predict(final).reshape(x_grid.shape)
z = z_final

In [14]:
fig = px.scatter_3d(df, x='feature_1', y='feature_2', z='target', title='3D Scatter plot of features vs target')
fig.add_traces(go.Surface(x=x, y=y, z=z, colorscale='Viridis', opacity=0.5, name='Regression Plane'))
fig.show()

In [15]:
# print the coefficients of the model
lr.coef_

array([41.71430139, 62.703515  ])

In [16]:
# print the intercept of the model
lr.intercept_

np.float64(3.1566125313261324)

# `Practice on Housing Dataset`

In [17]:
# import housing dataset
df1 = pd.read_csv('../Dataset/Housing.csv')

In [18]:
# display first few rows of the housing dataset
df1.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [None]:
# select relevant columns for analysis
df1= df1[['area', 'bedrooms', 'price', 'bathrooms', 'stories', 'parking']]

In [20]:
df1.head()

Unnamed: 0,area,bedrooms,price,bathrooms,stories,parking
0,7420,4,13300000,2,3,2
1,8960,4,12250000,4,4,3
2,9960,3,12250000,2,2,2
3,7500,4,12215000,2,2,3
4,7420,4,11410000,1,2,2


In [None]:
# 3D Scatter plot of area, bedrooms vs price
fig = px.scatter_3d(df1, x='area', y='bedrooms', z='price',
                     color='bathrooms',
                     color_continuous_scale='Viridis',
                     size='price',
                     title='3D Scatter Plot of Area vs Bedrooms vs Price')
fig.show()

In [None]:
# prepare feature matrix X and target vector y
X = df1[['area', 'bedrooms', 'bathrooms', 'stories', 'parking']]
y = df1['price']

In [31]:
X

Unnamed: 0,area,bedrooms,bathrooms,stories,parking
0,7420,4,2,3,2
1,8960,4,4,4,3
2,9960,3,2,2,2
3,7500,4,2,2,3
4,7420,4,1,2,2
...,...,...,...,...,...
540,3000,2,1,1,2
541,2400,3,1,1,0
542,3620,2,1,1,0
543,2910,3,1,1,0


In [None]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# import and create the Linear Regression model
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [None]:
# fit the model
lr.fit(X_train, y_train)

In [None]:
# make predictions
y_pred = lr.predict(X_test)

In [None]:
y_pred

array([6178627.50326817, 6370140.85865473, 3283148.1570249 ,
       4226007.94816032, 3409685.55116741, 4262158.3499693 ,
       5493440.53152357, 5559897.74293016, 3373715.42245305,
       3020513.18649699, 9519985.01490602, 3250408.25967961,
       3184310.73107686, 3639160.7315938 , 3907874.98339005,
       6174323.64572264, 3148160.32926787, 4872535.73454622,
       4826205.69113307, 3888527.39095668, 5222386.81260559,
       4294589.38035851, 3162381.17719464, 4193028.43115341,
       5747707.28367817, 7619041.42465273, 2989724.54874164,
       4794005.74968623, 7434038.64867549, 3457027.28535551,
       5102872.70577387, 3982998.90101381, 6215707.38061091,
       4724828.08224186, 4307759.63367012, 5625853.70972471,
       4791990.70630181, 3815116.83871033, 3261429.41224535,
       4879463.83866908, 5326652.37523625, 3156512.70502897,
       6345431.50216772, 4316575.21358846, 4120177.60802241,
       4190668.52130486, 6710808.14327267, 4603723.42489655,
       4521704.65093634,

In [30]:
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("R-squared (R2 ):", r2_score(y_test, y_pred))

Mean Absolute Error (MAE): 1127483.3523235186
Mean Squared Error (MSE): 2292721545725.3613
R-squared (R2 ): 0.5464062355495873
