# Multilinear Regression

In [61]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score

In [75]:
# taking data
X,y = make_regression(n_samples=10000, n_features=2, n_targets=1, n_informative=2)

In [76]:
data = pd.DataFrame(X)

In [77]:
data.columns = ['s1','s2']

In [78]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [79]:
# model building
model = LinearRegression()
model.fit(X_train, y_train) # model understanding the data patterns

In [80]:
x,y = model.coef_

In [81]:
c = model.intercept_

In [82]:
a,b = X_test[0]

In [83]:
# Predicting manually after extracting values of general equation
# ax + by + c = 0
(a*x) + (b*y) + c

np.float64(-103.55039747102788)

In [84]:
y_pred = model.predict(X_test)

In [85]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
score_ = model.score(X_test, y_test)

In [86]:
mae,mse,r2,score_

(np.float64(3.5071336113023934e-14),
 np.float64(1.9437190032452967e-27),
 1.0,
 1.0)

### Retraining same model with **L2-Regularization**

##### Alpha = 1.0

In [87]:
model2 = Ridge(alpha=1.0)
model2.fit(X_train, y_train)

In [88]:
y_pred_2 = model2.predict(X_test)

In [89]:
mae_2 = mean_absolute_error(y_test, y_pred_2)
mse_2 = mean_squared_error(y_test, y_pred_2)
r2_2 = r2_score(y_test, y_pred_2)
score_2 = model.score(X_test, y_test)

In [90]:
mae_2, mse_2, r2_2, score_2

(np.float64(0.007662322363424363),
 np.float64(9.089576838840671e-05),
 0.9999999811272114,
 1.0)

In [95]:
def my_model():
    for i in range(1, 6):
        model2 = Ridge(alpha=i)
        model2.fit(X_train, y_train)
        y_pred_2 = model2.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        score_ = model.score(X_test, y_test)
        print(f"Alpha = {i}\n\tMAE = {mae}\n\tMSE = {mse}\n\tR2 Score = {r2}\n\tAccuracy = {score_}")


In [96]:
my_model()

Alpha = 1
	MAE = 3.5071336113023934e-14
	MSE = 1.9437190032452967e-27
	R2 Score = 1.0
	Accuracy = 1.0
Alpha = 2
	MAE = 3.5071336113023934e-14
	MSE = 1.9437190032452967e-27
	R2 Score = 1.0
	Accuracy = 1.0
Alpha = 3
	MAE = 3.5071336113023934e-14
	MSE = 1.9437190032452967e-27
	R2 Score = 1.0
	Accuracy = 1.0
Alpha = 4
	MAE = 3.5071336113023934e-14
	MSE = 1.9437190032452967e-27
	R2 Score = 1.0
	Accuracy = 1.0
Alpha = 5
	MAE = 3.5071336113023934e-14
	MSE = 1.9437190032452967e-27
	R2 Score = 1.0
	Accuracy = 1.0


In [98]:
X_train.shape

(7500, 2)

In [97]:
sns.scatterplot(x=X_train, y=y_train)

ValueError: Per-column arrays must each be 1-dimensional