# Multilinear Regression

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn import preprocessing

from sklearn.model_selection import train_test_split
import pandas as pd

## Read clean dataset

In [35]:
df = pd.read_csv('clean.csv')
df.head()

Unnamed: 0,to_Bangalore,to_Chennai,to_Delhi,to_Hyderabad,to_Kolkata,to_Mumbai,arr_time_Afternoon,arr_time_Evening,arr_time_Morning,arr_time_Night,...,dep_time_Morning,dep_time_Night,airline_Air India,airline_Vistara,time_taken,stop,price,distance,dia,mes
0,0,0,0,0,0,1,0,1,0,0,...,0,0,1,0,2.0,0,25612,1146.935458,11,2
1,0,0,0,0,0,1,0,0,0,1,...,0,0,1,0,2.25,0,25612,1146.935458,11,2
2,0,0,0,0,0,1,0,1,0,0,...,0,0,1,0,24.75,1,42220,1146.935458,11,2
3,0,0,0,0,0,1,0,0,0,1,...,0,1,1,0,26.5,1,44450,1146.935458,11,2
4,0,0,0,0,0,1,0,0,0,1,...,0,0,1,0,6.666667,1,46690,1146.935458,11,2


## Normalize data

In [36]:
scaler = preprocessing.MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df.head()

Unnamed: 0,to_Bangalore,to_Chennai,to_Delhi,to_Hyderabad,to_Kolkata,to_Mumbai,arr_time_Afternoon,arr_time_Evening,arr_time_Morning,arr_time_Night,...,dep_time_Morning,dep_time_Night,airline_Air India,airline_Vistara,time_taken,stop,price,distance,dia,mes
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.01259,0.0,0.051601,0.587407,0.333333,0.0
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.017986,0.0,0.051601,0.587407,0.333333,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.503597,0.5,0.325222,0.587407,0.333333,0.0
3,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,1.0,1.0,0.0,0.541367,0.5,0.361962,0.587407,0.333333,0.0
4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.113309,0.5,0.398867,0.587407,0.333333,0.0


In [37]:
target_scaler = preprocessing.MinMaxScaler()
target_scaler.fit_transform(df['price'].values.reshape(-1, 1))

array([[0.05160057],
       [0.05160057],
       [0.325222  ],
       ...,
       [0.93281381],
       [0.97377136],
       [0.97377136]])

## Split dataset into training and testing subsets

In [38]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns='price'), df['price'], test_size=0.3, random_state=0)

## Fit the LinearRegression model

In [39]:
model = LinearRegression()
model.fit(X_train, y_train)

## Predict using the trained model

In [40]:
y_pred = model.predict(X_test)
y_pred

array([0.56436157, 0.57946777, 0.52270508, ..., 0.55911255, 0.50500488,
       0.54544067])


## Scale back the data

In [42]:
y_pred = target_scaler.inverse_transform(y_pred.reshape(-1, 1))
y_pred

array([[0.56436157],
       [0.57946777],
       [0.52270508],
       ...,
       [0.55911255],
       [0.50500488],
       [0.54544067]])

## Assess the performance of the model

In [31]:

print( 
  'mean_squared_error : ', mean_squared_error(y_test, y_pred)) 
print( 
  'mean_absolute_error : ', mean_absolute_error(y_test, y_pred)) 

mean_squared_error :  0.018409441486903384
mean_absolute_error :  0.10624428998624136


In [32]:
for test, pred in zip(y_test, y_pred):
    print(f"Predicted: {pred} \t Original: {test}\n")

Predicted: [0.56436157] 	 Original: 0.6291414732194343

Predicted: [0.57946777] 	 Original: 0.4180766759477404

Predicted: [0.52270508] 	 Original: 0.33018106331449654

Predicted: [0.57223511] 	 Original: 0.6169827174324924

Predicted: [0.60443115] 	 Original: 0.7704499398652322

Predicted: [0.23629761] 	 Original: 0.1182266009852217

Predicted: [0.60122681] 	 Original: 0.6246766726526847

Predicted: [0.67514038] 	 Original: 0.660839909715472

Predicted: [0.54467773] 	 Original: 0.35916107880125875

Predicted: [0.46472168] 	 Original: 0.36354350297378785

Predicted: [0.65722656] 	 Original: 0.4910786365059228

Predicted: [0.48345947] 	 Original: 0.561938810814373

Predicted: [0.64624023] 	 Original: 0.9014613572334711

Predicted: [0.24679565] 	 Original: 0.23447616850915204

Predicted: [0.18475342] 	 Original: 0.23447616850915204

Predicted: [0.50427246] 	 Original: 0.561938810814373

Predicted: [0.42971802] 	 Original: 0.6105244081256076

Predicted: [0.53887939] 	 Original: 0.30955401