In [1]:
# Library Imports.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns

# Allows plots to appear directly in the notebook.
%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score 

import pickle

In [2]:
# Read CSV file into Data Frame:
trips = pd.read_csv('cleaned_trips.csv', keep_default_na=True, delimiter=',', skipinitialspace=True)

In [3]:
trips.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,LASTUPDATE,...,month,day,dayOfWeek,arrival_diff,departure_diff,planned_duration,actual_duration,duration_diff,weekend,rushHour
0,2018-02-07,6253783,68,68_80,1,87245,84600,87524,84600,28-FEB-18 12:05:11,...,2,7,2,279,0,2645,2924,279,0,0
1,2018-02-07,6262138,25B,25B_271,2,30517,26460,32752,26460,28-FEB-18 12:05:11,...,2,7,2,2235,0,4057,6292,2235,0,0
2,2018-02-07,6254942,45A,45A_70,2,35512,32100,36329,32082,28-FEB-18 12:05:11,...,2,7,2,817,-18,3412,4247,835,0,0
3,2018-02-07,6259460,25A,25A_273,1,57261,54420,58463,54443,28-FEB-18 12:05:11,...,2,7,2,1202,23,2841,4020,1179,0,0
4,2018-02-07,6253175,14,14_15,1,85383,81600,84682,81608,28-FEB-18 12:05:11,...,2,7,2,-701,8,3783,3074,-709,0,0


In [4]:
trips.dtypes

DAYOFSERVICE        object
TRIPID               int64
LINEID              object
ROUTEID             object
DIRECTION            int64
PLANNEDTIME_ARR      int64
PLANNEDTIME_DEP      int64
ACTUALTIME_ARR       int64
ACTUALTIME_DEP       int64
LASTUPDATE          object
NOTE                object
planDep_time         int64
month                int64
day                  int64
dayOfWeek            int64
arrival_diff         int64
departure_diff       int64
planned_duration     int64
actual_duration      int64
duration_diff        int64
weekend              int64
rushHour             int64
dtype: object

In [5]:
trips['DIRECTION'] = trips['DIRECTION'].astype('int32')
trips['planDep_time'] = trips['planDep_time'].astype('int32')
trips['month'] = trips['month'].astype('int32')
trips['day'] = trips['day'].astype('int32')
trips['dayOfWeek'] = trips['dayOfWeek'].astype('int32')
trips['weekend'] = trips['weekend'].astype('int32')
trips['rushHour'] = trips['rushHour'].astype('int32')
trips['LINEID'] = trips['LINEID'].astype('category')

## Encoding Route Numbers

In [6]:
# Categorical Encoding
trips['num_routeID'] = trips['LINEID'].cat.codes
trips.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,LASTUPDATE,...,day,dayOfWeek,arrival_diff,departure_diff,planned_duration,actual_duration,duration_diff,weekend,rushHour,num_routeID
0,2018-02-07,6253783,68,68_80,1,87245,84600,87524,84600,28-FEB-18 12:05:11,...,7,2,279,0,2645,2924,279,0,0,106
1,2018-02-07,6262138,25B,25B_271,2,30517,26460,32752,26460,28-FEB-18 12:05:11,...,7,2,2235,0,4057,6292,2235,0,0,39
2,2018-02-07,6254942,45A,45A_70,2,35512,32100,36329,32082,28-FEB-18 12:05:11,...,7,2,817,-18,3412,4247,835,0,0,85
3,2018-02-07,6259460,25A,25A_273,1,57261,54420,58463,54443,28-FEB-18 12:05:11,...,7,2,1202,23,2841,4020,1179,0,0,38
4,2018-02-07,6253175,14,14_15,1,85383,81600,84682,81608,28-FEB-18 12:05:11,...,7,2,-701,8,3783,3074,-709,0,0,13


In [7]:
trips.dtypes

DAYOFSERVICE          object
TRIPID                 int64
LINEID              category
ROUTEID               object
DIRECTION              int32
PLANNEDTIME_ARR        int64
PLANNEDTIME_DEP        int64
ACTUALTIME_ARR         int64
ACTUALTIME_DEP         int64
LASTUPDATE            object
NOTE                  object
planDep_time           int32
month                  int32
day                    int32
dayOfWeek              int32
arrival_diff           int64
departure_diff         int64
planned_duration       int64
actual_duration        int64
duration_diff          int64
weekend                int32
rushHour               int32
num_routeID            int16
dtype: object

## Shuffling Dataset:

In [8]:
# Row shuffle inspired from Geeks for Geeks: https://www.geeksforgeeks.org/pandas-how-to-shuffle-a-dataframe-rows/
shuf_trips = trips.sample(frac = 1)
shuf_trips.head()

Unnamed: 0,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,LASTUPDATE,...,day,dayOfWeek,arrival_diff,departure_diff,planned_duration,actual_duration,duration_diff,weekend,rushHour,num_routeID
1521470,2018-12-17,8461196,25B,25B_274,1,44204,41520,44455,41520,27-DEC-18 08:21:23,...,17,0,251,0,2684,2935,251,0,0,39
1302658,2018-02-19,6266698,27A,27A_4,1,24937,23700,25053,23709,28-FEB-18 14:04:41,...,19,0,116,9,1237,1344,107,0,0,45
816153,2018-05-23,6778752,39A,39A_43,2,39259,34200,39407,34200,14-JUN-18 12:49:16,...,23,2,148,0,5059,5207,148,0,0,67
1451794,2018-11-06,8110145,1,1_37,1,70676,68400,71521,68442,15-NOV-18 09:08:26,...,6,1,845,42,2276,3079,803,0,0,0
1268240,2018-04-11,6623767,102,102_9,2,41870,38700,41822,38684,20-APR-18 14:41:36,...,11,2,-48,-16,3170,3138,-32,0,0,1


## Splitting Shuffled Data: Train (70%) & Test (30%)

In [9]:
# train_test_split already includes a shuffle method, but no harm to shuffle again
train, test = train_test_split(shuf_trips, test_size=0.3, random_state=42, shuffle=True)

In [10]:
train

Unnamed: 0,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,LASTUPDATE,...,day,dayOfWeek,arrival_diff,departure_diff,planned_duration,actual_duration,duration_diff,weekend,rushHour,num_routeID
1469526,2018-02-13,6248239,77A,77A_29,1,36029,31200,36674,31276,28-FEB-18 13:29:11,...,13,1,645,76,4829,5398,569,0,0,117
1153412,2018-07-13,7153343,16,16_20,1,74973,70200,76056,70375,24-JUL-18 14:23:17,...,13,4,1083,175,4773,5681,908,0,0,24
44974,2018-12-30,8576784,122,122_14,1,34815,32400,34839,32417,16-JAN-19 18:16:31,...,30,6,24,17,2415,2422,7,1,0,9
777352,2018-12-15,8167774,83A,83A_23,2,85710,81600,86249,81606,24-DEC-18 09:43:35,...,15,5,539,6,4110,4643,533,1,0,125
1956133,2018-07-19,7170397,4,4_10,1,76578,72900,76373,72871,28-JUL-18 10:09:47,...,19,3,-205,-29,3678,3502,-176,0,0,69
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1940735,2018-05-09,6740514,84,84_32,2,86166,84600,86502,84603,26-JUN-18 08:08:03,...,9,2,336,3,1566,1899,333,0,0,126
1653547,2018-06-01,6842786,59,59_11,2,47373,46200,47478,46156,14-JUN-18 14:15:55,...,1,4,105,-44,1173,1322,149,0,0,95
375778,2018-07-27,7172274,54A,54A_12,2,27322,24300,27791,24322,17-AUG-18 03:39:00,...,27,4,469,22,3022,3469,447,0,0,93
1519971,2018-07-23,7178872,75,75_19,2,81084,77400,81499,77180,16-AUG-18 14:11:40,...,23,0,415,-220,3684,4319,635,0,0,114


In [11]:
test

Unnamed: 0,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,LASTUPDATE,...,day,dayOfWeek,arrival_diff,departure_diff,planned_duration,actual_duration,duration_diff,weekend,rushHour,num_routeID
1144726,2018-07-13,7160912,29A,29A_15,2,36041,33600,36345,33620,24-JUL-18 14:23:17,...,13,4,304,20,2441,2725,284,0,0,48
1476551,2018-06-11,6869596,120,120_9,2,70841,69600,70992,69610,21-JUN-18 08:14:53,...,11,0,151,10,1241,1382,141,0,0,8
1514419,2018-03-09,6396821,239,239_26,1,72415,70200,73384,70213,19-MAR-18 14:35:19,...,9,4,969,13,2215,3171,956,0,0,36
1359215,2018-01-19,6107616,54A,54A_12,2,37005,33300,37112,33302,26-JAN-18 21:36:42,...,19,4,107,2,3705,3810,105,0,0,93
374449,2018-04-20,6635460,25B,25B_274,1,39220,36120,39280,36106,25-JUN-18 09:00:42,...,20,4,60,-14,3100,3174,74,0,0,39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
572639,2018-09-24,7766115,38A,38A_20,1,58612,54000,58935,54018,22-OCT-18 17:24:51,...,24,0,323,18,4612,4917,305,0,0,63
167165,2018-05-24,6777680,130,130_10,1,72359,70500,72534,70571,14-JUN-18 13:01:06,...,24,3,175,71,1859,1963,104,0,0,12
538824,2018-08-12,7322111,32,32_57,1,83299,81300,83360,81267,28-AUG-18 10:23:52,...,12,6,61,-33,1999,2093,94,1,0,53
1260019,2018-10-29,8068472,38,38_14,2,37487,34200,38108,34200,09-NOV-18 23:25:32,...,29,0,621,0,3287,3908,621,0,0,62


## Multiple Linear Regression

<h2>Training</h2>

In [16]:
X = train[['month', 'day']]
y = train.duration_diff

In [17]:
linreg = LinearRegression().fit(X, y)

# Weights for each Feature
print("Features: \n", X)
print("Coeficients: \n", linreg.coef_)
print("\nIntercept: \n", linreg.intercept_)

feature_importance = pd.DataFrame({'feature': ['month', 'day'], 'importance':linreg.coef_})
feature_importance.sort_values('importance', ascending=False)

Features: 
          month  day
1469526      2   13
1153412      7   13
44974       12   30
777352      12   15
1956133      7   19
...        ...  ...
1940735      5    9
1653547      6    1
375778       7   27
1519971      7   23
1786807     11   14

[1425971 rows x 2 columns]
Coeficients: 
 [15.96078112  1.21030455]

Intercept: 
 211.80994258535623


Unnamed: 0,feature,importance
0,month,15.960781
1,day,1.210305


In [18]:
# Serialize model object into a file called model.pkl on disk using pickle
with open('dummy_linreg_model.pkl', 'wb') as handle:
    pickle.dump(linreg, handle, pickle.HIGHEST_PROTOCOL)

<h2>Prediction & Evaluation on Training Data</h2>

In [None]:
train_linreg_predictions = linreg.predict(X)

train_actual_vs_pred_linreg = pd.concat([y, pd.DataFrame(train_linreg_predictions, columns=['pred_duration_diff'], index=y.index)], axis=1)
train_actual_vs_pred_linreg.head(10)

In [None]:
# Function to output evaluation metrics
def printMetrics(testActualVal, predictions):
    #classification evaluation measures
    print("MAE: ", metrics.mean_absolute_error(testActualVal, predictions))
    print("MSE: ", metrics.mean_squared_error(testActualVal, predictions))
    print("RMSE: ", metrics.mean_squared_error(testActualVal, predictions)**0.5)
    print("R2: ", metrics.r2_score(testActualVal, predictions))

In [None]:
printMetrics(y, train_linreg_predictions)

<h2>Prediction & Evaluation on Testing Data</h2>

In [None]:
X_test = test[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_test = test.duration_diff

In [None]:
test_linreg_predictions = linreg.predict(X_test)

test_actual_vs_pred_linreg = pd.concat([y_test, pd.DataFrame(test_linreg_predictions, columns=['pred_duration_diff'], index=y_test.index)], axis=1)
test_actual_vs_pred_linreg.head(10)

In [None]:
printMetrics(y_test, test_linreg_predictions)

<h2>Prediction & Evaluation on Full Data (5-Fold Cross-Validation):</h2>

<h3>1st Fold</h3>

In [None]:
cv_lin1, cv_lin2, cv_lin3, cv_lin4, cv_lin5 = np.array_split(shuf_trips, 5)

In [None]:
cvlin = cv_lin2
cvlin = cvlin.append(cv_lin3)
cvlin = cvlin.append(cv_lin4)
cvlin = cvlin.append(cv_lin5)
cvlin.shape

In [None]:
X_cvlin = cvlin[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cvlin.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
X_cvlin = cv_lin1[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cv_lin1.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
fold1_MAE = metrics.mean_absolute_error(y_cvlin, full_linreg_predictions)
fold1_MSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)
fold1_RMSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)**0.5
fold1_R2 = metrics.r2_score(y_cvlin, full_linreg_predictions)

print('Fold 1 MAE: ', fold1_MAE)
print('Fold 1 MSE: ', fold1_MSE)
print('Fold 1 RMSE: ', fold1_RMSE)
print('Fold 1 R2: ', fold1_R2)

<h3>2nd Fold</h3>

In [None]:
cvlin2 = cv_lin1
cvlin2 = cvlin2.append(cv_lin3)
cvlin2 = cvlin2.append(cv_lin4)
cvlin2 = cvlin2.append(cv_lin5)
cvlin2.shape

In [None]:
X_cvlin = cvlin2[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cvlin2.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
X_cvlin = cv_lin2[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cv_lin2.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
fold2_MAE = metrics.mean_absolute_error(y_cvlin, full_linreg_predictions)
fold2_MSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)
fold2_RMSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)**0.5
fold2_R2 = metrics.r2_score(y_cvlin, full_linreg_predictions)

print('Fold 2 MAE: ', fold2_MAE)
print('Fold 2 MSE: ', fold2_MSE)
print('Fold 2 RMSE: ', fold2_RMSE)
print('Fold 2 R2: ', fold2_R2)

<h3>3rd Fold</h3>

In [None]:
cvlin3 = cv_lin1
cvlin3 = cvlin3.append(cv_lin2)
cvlin3 = cvlin3.append(cv_lin4)
cvlin3 = cvlin3.append(cv_lin5)
cvlin3.shape

In [None]:
X_cvlin = cvlin3[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cvlin3.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
X_cvlin = cv_lin3[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cv_lin3.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
fold3_MAE = metrics.mean_absolute_error(y_cvlin, full_linreg_predictions)
fold3_MSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)
fold3_RMSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)**0.5
fold3_R2 = metrics.r2_score(y_cvlin, full_linreg_predictions)

print('Fold 3 MAE: ', fold3_MAE)
print('Fold 3 MSE: ', fold3_MSE)
print('Fold 3 RMSE: ', fold3_RMSE)
print('Fold 3 R2: ', fold3_R2)

<h3>4th Fold</h3>

In [None]:
cvlin4 = cv_lin1
cvlin4 = cvlin4.append(cv_lin2)
cvlin4 = cvlin4.append(cv_lin3)
cvlin4 = cvlin4.append(cv_lin5)
cvlin4.shape

In [None]:
X_cvlin = cvlin4[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cvlin4.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
X_cvlin = cv_lin4[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cv_lin4.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
fold4_MAE = metrics.mean_absolute_error(y_cvlin, full_linreg_predictions)
fold4_MSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)
fold4_RMSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)**0.5
fold4_R2 = metrics.r2_score(y_cvlin, full_linreg_predictions)

print('Fold 4 MAE: ', fold4_MAE)
print('Fold 4 MSE: ', fold4_MSE)
print('Fold 4 RMSE: ', fold4_RMSE)
print('Fold 4 R2: ', fold4_R2)

<h3>5th Fold</h3>

In [None]:
cvlin5 = cv_lin1
cvlin5 = cvlin5.append(cv_lin2)
cvlin5 = cvlin5.append(cv_lin3)
cvlin5 = cvlin5.append(cv_lin4)
cvlin5.shape

In [None]:
X_cvlin = cvlin5[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cvlin5.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
X_cvlin = cv_lin5[['dayOfWeek', 'weekend', 'rushHour', 'planDep_time', 'planned_duration', 'DIRECTION', 'num_routeID', 'TRIPID']]
y_cvlin = cv_lin5.duration_diff

In [None]:
full_linreg_predictions = linreg.predict(X_cvlin)

full_actual_vs_pred_linreg = pd.concat([y_cvlin, pd.DataFrame(full_linreg_predictions, columns=['pred_duration_diff'], index=y_cvlin.index)], axis=1)
full_actual_vs_pred_linreg.head(10)

In [None]:
fold5_MAE = metrics.mean_absolute_error(y_cvlin, full_linreg_predictions)
fold5_MSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)
fold5_RMSE = metrics.mean_squared_error(y_cvlin, full_linreg_predictions)**0.5
fold5_R2 = metrics.r2_score(y_cvlin, full_linreg_predictions)

print('Fold 5 MAE: ', fold5_MAE)
print('Fold 5 MSE: ', fold5_MSE)
print('Fold 5 RMSE: ', fold5_RMSE)
print('Fold 5 R2: ', fold5_R2)

<h3>5-Fold Cross-Validation Metrics:</h3>

In [None]:
cv_MAE = (fold1_MAE + fold2_MAE + fold3_MAE + fold4_MAE + fold5_MAE) / 5
cv_MSE = (fold1_MSE + fold2_MSE + fold3_MSE + fold4_MSE + fold5_MSE) / 5
cv_RMSE = (fold1_RMSE + fold2_RMSE + fold3_RMSE + fold4_RMSE + fold5_RMSE) / 5
cv_R2 = (fold1_R2 + fold2_R2 + fold3_R2 + fold4_R2 + fold5_R2) / 5

print("MAE: ", cv_MAE)
print("MSE: ", cv_MSE)
print("RMSE: ", cv_RMSE)
print("R2: ", cv_R2)