# Multiple Linear Regression

In [1]:
from sklearn.datasets import make_regression
import numpy as np
import pandas as pd

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
X, Y = make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50)

In [3]:
# y = b0 + (x1*b1) + (x2*b2)
# y = target, x1 = feature_1, x2 = feature_2
# (b0, b1, b3) = ? {using model we fing this value}

#Create dataframe using pandas library 
df = pd.DataFrame({"feature_1": X[:,0], "feature_2" : X[:,1], "target" : Y})

In [4]:
df      

Unnamed: 0,feature_1,feature_2,target
0,0.943969,0.175286,-1.137200
1,0.989661,1.427477,-98.390540
2,-1.048333,0.199346,13.957103
3,0.075944,0.646055,38.300546
4,-0.356845,0.567626,-8.898535
...,...,...,...
95,-1.327597,0.166389,-75.568036
96,-0.962810,0.850888,-57.711805
97,-0.828934,-1.382310,31.488306
98,-0.523189,-1.123741,-35.059807


In [5]:
df.head()

Unnamed: 0,feature_1,feature_2,target
0,0.943969,0.175286,-1.1372
1,0.989661,1.427477,-98.39054
2,-1.048333,0.199346,13.957103
3,0.075944,0.646055,38.300546
4,-0.356845,0.567626,-8.898535


In [6]:
df.shape

(100, 3)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

In [8]:
from sklearn.linear_model import LinearRegression

In [9]:
lr = LinearRegression()    #create object

In [10]:
lr.fit(X_train, Y_train)  #using fit we train model

In [11]:
y_pred = lr.predict(X_test)    #predict model

In [12]:
y_pred

array([ 35.68267176,  10.05394599, -29.13422756,  13.02680585,
        42.14214851, -15.3646929 ,   0.21474275,  38.541769  ,
       -23.53265592, -26.93657129, -33.26326985, -28.90512339,
       -21.11431231,  26.01968872, -21.85937058,   8.40899565,
        -3.56469943,   9.31525108,  30.7714338 ,  -2.80766784])

In [13]:
# MAE = Mean Absolute Error = (True value - Predicted value)
# MSE = Mean Squared Error = MSE is calculated by taking the average of the square of the difference between the 
# original and predicted values of the data. 
# R2 score = Coefficient of Determination

print("MAE = ", mean_absolute_error(Y_test, y_pred))
print("MSE = ", mean_squared_error(Y_test, y_pred))
print("R2 score = ", r2_score(Y_test, y_pred))

MAE =  44.8669675267166
MSE =  3603.0731733771186
R2 score =  -0.15271543616365268


In [14]:
lr.coef_    #return b1 and b2

array([28.2908817 ,  7.66396851])

In [15]:
lr.intercept_  #return b0

-0.39675746289392766