In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from category_encoders import OrdinalEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, mean_squared_log_error
import joblib
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
df = pd.read_csv('nyc_clean.csv')

In [120]:
df.head()

Unnamed: 0,trip_duration,distance_km,DayOfWeek,TimeOfDay
0,455.0,1.502172,Monday,Afternoon
1,663.0,1.80866,Sunday,Night
2,429.0,1.483632,Wednesday,Evening
3,435.0,1.187038,Saturday,Afternoon
4,443.0,1.099625,Saturday,Evening


In [None]:
sns.set(style="whitegrid")  
plt.figure(figsize=(12, 8))  
sns.boxplot(data=df, x='distance_km')           
plt.show() 

In [None]:
sns.set(style="whitegrid")  
plt.figure(figsize=(12, 8))  
sns.boxplot(data=df, x='trip_duration')           
plt.show() 

In [3]:
target = 'trip_duration'
X = df.drop(columns = target)
y = df[target]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [5]:
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.2, random_state=42)

## Baseline

In [6]:
baseline_train = [y_train.median()] * len(y_train)
baseline_val = [y_train.median()] * len(y_val)
baseline_test = [y_train.median()] * len(y_test)

In [7]:
r2_train = r2_score(y_train,baseline_train)
r2_val = r2_score(y_val,baseline_val)
r2_test = r2_score(y_test,baseline_test)
rmsle_train = np.sqrt(mean_squared_log_error(y_train, baseline_train))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, baseline_val))
rmsle_test = np.sqrt(mean_squared_log_error(y_test, baseline_test))

In [8]:
print("Baseline Model Evaluation")
print('--------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Testing Accuracy:", round(r2_test, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))
print("Testing Error:", round(rmsle_test, 4))

Baseline Model Evaluation
--------------------------
Training Accuracy: -0.0443
Validation Accuracy: -0.0458
Testing Accuracy: -0.0441
Training Error: 0.801
Validation Error: 0.8028
Testing Error: 0.8087


## Support Vector Regression

In [6]:
from sklearn.svm import SVR

In [8]:
svr_m = make_pipeline(
    OneHotEncoder(),
    StandardScaler(),
    SVR(C=1.0, epsilon=0.2, max_iter=5000, verbose=True))

In [9]:
svr_m.fit(X_train, y_train)

[LibSVM]



Pipeline(steps=[('onehotencoder',
                 OneHotEncoder(cols=['DayOfWeek', 'TimeOfDay'])),
                ('standardscaler', StandardScaler()),
                ('svr', SVR(epsilon=0.2, max_iter=5000, verbose=True))])

In [10]:
r2_train = r2_score(y_train, svr_m.predict(X_train))
r2_val = r2_score(y_val, svr_m.predict(X_val))
rmsle_train = np.sqrt(mean_squared_log_error(y_train, svr_m.predict(X_train)))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, svr_m.predict(X_val)))

In [11]:
print("Support Vector Regression Model Evaluation")
print('-------------------------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))

Support Vector Regression Model Evaluation
-------------------------------------------
Training Accuracy: -0.2865
Validation Accuracy: -0.2797
Training Error: 0.9402
Validation Error: 0.9409


## Decision Tree Regressor

In [124]:
from sklearn.tree import DecisionTreeRegressor, plot_tree

In [131]:
X_train = onehot.fit_transform(X_train)
X_train = minmax.fit_transform(X_train)

In [132]:
dtr_m = DecisionTreeRegressor()

In [136]:
param_dist = {
    'max_depth': randint(3, 20),  
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 20),
    'max_features': ['auto', 'sqrt', 'log2', None]
}

In [137]:
random_search = RandomizedSearchCV(
    dtr_m, param_distributions=param_dist, n_iter=100, cv=5, verbose=1, n_jobs=-1
)

In [138]:
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  6.0min
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed: 15.4min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 17.6min finished


RandomizedSearchCV(cv=5, estimator=DecisionTreeRegressor(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000020A96C9B4F0>,
                                        'max_features': ['auto', 'sqrt', 'log2',
                                                         None],
                                        'min_samples_leaf': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000020A96E77B80>,
                                        'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000020A971C3520>},
                   verbose=1)

In [139]:
best_params = random_search.best_params_
print("Best Hyperparameters:", best_params)

Best Hyperparameters: {'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 19, 'min_samples_split': 6}


In [140]:
dtr_model = random_search.best_estimator_

In [142]:
X_val = onehot.fit_transform(X_val)
X_val = minmax.fit_transform(X_val)

In [143]:
X_test = onehot.fit_transform(X_test)
X_test = minmax.fit_transform(X_test)

In [144]:
train_pred = dtr_model.predict(X_train)
val_pred = dtr_model.predict(X_val)
test_pred = dtr_model.predict(X_test)

In [145]:
r2_train = r2_score(y_train, train_pred)
r2_val = r2_score(y_val, val_pred)
r2_test = r2_score(y_test, test_pred)
rmsle_train = np.sqrt(mean_squared_log_error(y_train, train_pred))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, val_pred))
rmsle_test = np.sqrt(mean_squared_log_error(y_test, test_pred))

In [146]:
print("Decision Tree Model using Randomized Search Evaluation")
print('-------------------------------------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Testing Accuracy:", round(r2_test, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))
print("Testing Error:", round(rmsle_test, 4))

Decision Tree Model Evaluation
-------------------------------
Training Accuracy: 0.6421
Validation Accuracy: 0.4076
Testing Accuracy: 0.4971
Training Error: 0.4274
Validation Error: 0.5187
Testing Error: 0.4807


In [148]:
dtr = make_pipeline(
    OneHotEncoder(),
    MinMaxScaler(),
    DecisionTreeRegressor(max_depth=12, random_state=42)
)

In [152]:
dtr.fit(X_train, y_train)

Pipeline(steps=[('onehotencoder',
                 OneHotEncoder(cols=['DayOfWeek', 'TimeOfDay'])),
                ('minmaxscaler', MinMaxScaler()),
                ('decisiontreeregressor',
                 DecisionTreeRegressor(max_depth=12, random_state=42))])

In [153]:
train_pred = dtr.predict(X_train)
val_pred = dtr.predict(X_val)
test_pred = dtr.predict(X_test)

In [154]:
r2_train = r2_score(y_train, train_pred)
r2_val = r2_score(y_val, val_pred)
r2_test = r2_score(y_test, test_pred)
rmsle_train = np.sqrt(mean_squared_log_error(y_train, train_pred))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, val_pred))
rmsle_test = np.sqrt(mean_squared_log_error(y_test, test_pred))

In [155]:
print("Decision Tree ModelEvaluation")
print('-------------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Testing Accuracy:", round(r2_test, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))
print("Testing Error:", round(rmsle_test, 4))

Decision Tree ModelEvaluation
-------------------------------
Training Accuracy: 0.6473
Validation Accuracy: 0.6384
Testing Accuracy: 0.638
Training Error: 0.4231
Validation Error: 0.4299
Testing Error: 0.4301


In [156]:
joblib.dump(dtr, 'DT_final.pkl')

['DT_final.pkl']

## XGBoost

In [9]:
from xgboost import XGBRegressor

In [10]:
xgb = make_pipeline(
    OneHotEncoder(),
    MinMaxScaler(),
    XGBRegressor(objective ='reg:squarederror', n_estimators= 100, max_depth=6, learning_rate=0.2, subsample=0.8, random_state=42))

In [11]:
xgb.fit(X_train, y_train)

Pipeline(steps=[('onehotencoder',
                 OneHotEncoder(cols=['DayOfWeek', 'TimeOfDay'])),
                ('minmaxscaler', MinMaxScaler()),
                ('xgbregressor',
                 XGBRegressor(base_score=None, booster=None, callbacks=None,
                              colsample_bylevel=None, colsample_bynode=None,
                              colsample_bytree=None, early_stopping_rounds=None,
                              enable_categorical=False, eval_metric=None,
                              feature_types=None, gamma=None...id=None,
                              grow_policy=None, importance_type=None,
                              interaction_constraints=None, learning_rate=0.2,
                              max_bin=None, max_cat_threshold=None,
                              max_cat_to_onehot=None, max_delta_step=None,
                              max_depth=6, max_leaves=None,
                              min_child_weight=None, missing=nan,
                  

In [12]:
after_pred = X_train.copy()

In [13]:
after_pred['ground_truth'] = y_train
after_pred['pred'] = xgb.predict(X_train)

In [14]:
after_pred[after_pred['pred']<0]

Unnamed: 0,distance_km,DayOfWeek,TimeOfDay,ground_truth,pred
466022,0.0,Friday,Afternoon,0.0,-5.884634
689819,0.0,Saturday,Morning,0.0,-4.541495
1090246,0.0,Thursday,Night,0.0,-9.539333
1088256,0.0,Sunday,Afternoon,0.0,-10.877093
695154,0.0,Saturday,Afternoon,0.0,-3.150624
...,...,...,...,...,...
40353,0.0,Tuesday,Evening,0.0,-2.453859
852090,0.0,Monday,Night,0.0,-10.824129
1064004,0.0,Monday,Night,0.0,-10.824129
577360,0.0,Sunday,Evening,0.0,-2.649105


In [15]:
after_pred[(after_pred['pred']<0) & (after_pred['ground_truth']==0)]

Unnamed: 0,distance_km,DayOfWeek,TimeOfDay,ground_truth,pred
466022,0.0,Friday,Afternoon,0.0,-5.884634
689819,0.0,Saturday,Morning,0.0,-4.541495
1090246,0.0,Thursday,Night,0.0,-9.539333
1088256,0.0,Sunday,Afternoon,0.0,-10.877093
695154,0.0,Saturday,Afternoon,0.0,-3.150624
...,...,...,...,...,...
40353,0.0,Tuesday,Evening,0.0,-2.453859
852090,0.0,Monday,Night,0.0,-10.824129
1064004,0.0,Monday,Night,0.0,-10.824129
577360,0.0,Sunday,Evening,0.0,-2.649105


In [16]:
after_pred[(after_pred['pred']<0) & (after_pred['ground_truth']!=0)]

Unnamed: 0,distance_km,DayOfWeek,TimeOfDay,ground_truth,pred
32272,0.000424,Tuesday,Night,31.0,-0.734958
555033,0.000424,Tuesday,Night,36.0,-16.29228
574694,0.000424,Sunday,Afternoon,10.0,-2.570963
142525,0.000424,Tuesday,Night,5.0,-0.734958
307685,0.000424,Monday,Afternoon,8.0,-2.448628
2591,0.000424,Friday,Night,6.0,-1.592067


In [17]:
after_pred.loc[after_pred['pred'] < 0, 'pred'] = 0

In [18]:
val_after_pred = X_val.copy()

In [19]:
val_after_pred['ground_truth'] = y_val
val_after_pred['pred'] = xgb.predict(X_val)

In [20]:
val_after_pred[val_after_pred['pred']<0]

Unnamed: 0,distance_km,DayOfWeek,TimeOfDay,ground_truth,pred
478044,0.0,Saturday,Afternoon,0.0,-3.150624
607448,0.0,Thursday,Night,0.0,-9.539333
1048529,0.0,Monday,Afternoon,0.0,-10.754762
1240659,0.0,Friday,Afternoon,0.0,-5.884634
338503,0.0,Sunday,Afternoon,0.0,-10.877093
...,...,...,...,...,...
628670,0.0,Friday,Afternoon,0.0,-5.884634
405056,0.0,Saturday,Morning,0.0,-4.541495
1274168,0.0,Monday,Afternoon,0.0,-10.754762
1063024,0.0,Tuesday,Night,0.0,-30.453445


In [21]:
val_after_pred[(val_after_pred['pred']<0) & (val_after_pred['ground_truth']==0)]

Unnamed: 0,distance_km,DayOfWeek,TimeOfDay,ground_truth,pred
478044,0.0,Saturday,Afternoon,0.0,-3.150624
607448,0.0,Thursday,Night,0.0,-9.539333
1048529,0.0,Monday,Afternoon,0.0,-10.754762
1240659,0.0,Friday,Afternoon,0.0,-5.884634
338503,0.0,Sunday,Afternoon,0.0,-10.877093
...,...,...,...,...,...
628670,0.0,Friday,Afternoon,0.0,-5.884634
405056,0.0,Saturday,Morning,0.0,-4.541495
1274168,0.0,Monday,Afternoon,0.0,-10.754762
1063024,0.0,Tuesday,Night,0.0,-30.453445


In [22]:
val_after_pred.loc[val_after_pred['pred'] < 0, 'pred'] = 0

In [23]:
test_after_pred = X_test.copy()

In [24]:
test_after_pred['ground_truth'] = y_test
test_after_pred['pred'] = xgb.predict(X_test)

In [25]:
test_after_pred.loc[test_after_pred['pred'] < 0, 'pred'] = 0

In [26]:
r2_train = r2_score(y_train, after_pred['pred'])
r2_val = r2_score(y_val, val_after_pred['pred'])
r2_test = r2_score(y_test, test_after_pred['pred'])
rmsle_train = np.sqrt(mean_squared_log_error(y_train, after_pred['pred']))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, val_after_pred['pred']))
rmsle_test = np.sqrt(mean_squared_log_error(y_test, test_after_pred['pred']))

In [27]:
print("Random Forest Model Evaluation")
print('-------------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Testing Accuracy:", round(r2_test, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))
print("Testing Error:", round(rmsle_test, 4))

Random Forest Model Evaluation
-------------------------------
Training Accuracy: 0.6455
Validation Accuracy: 0.6428
Testing Accuracy: 0.6427
Training Error: 0.438
Validation Error: 0.4423
Testing Error: 0.4433


In [28]:
joblib.dump(xgb, 'XGB_final.pkl')

['XGB_final.pkl']

## Random Forest

In [29]:
from sklearn.ensemble import RandomForestRegressor

In [39]:
forest = RandomForestRegressor()

In [43]:
param_dist = {
    'n_estimators': randint(100, 800),  # Randomly sample from 100 to 1000
    'max_depth': randint(5, 20),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 20),
    'max_features': ['auto', 'sqrt', 'log2'],
    'bootstrap': [True, False]
}

In [44]:
random_search = RandomizedSearchCV(
    forest, param_distributions=param_dist, n_iter=50, cv=5, verbose=1, n_jobs=-1
)

In [47]:
X_train = onehot.fit_transform(X_train)
X_train = minmax.fit_transform(X_train)

In [None]:
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


In [None]:
best_params = random_search.best_params_
print("Best Hyperparameters:", best_params)

In [None]:
rf_model = random_search.best_estimator_

In [None]:
X_val = onehot.fit_transform(X_val)
X_val = minmax.fit_transform(X_val)

In [None]:
X_test = onehot.fit_transform(X_test)
X_test = minmax.fit_transform(X_test)

In [None]:
train_pred = rf_model.predict(X_train)
val_pred = rf_model.predict(X_val)
test_pred = rf_model.predict(X_test)

In [None]:
r2_train = r2_score(y_train, train_pred)
r2_val = r2_score(y_val, val_pred)
r2_test = r2_score(y_test, test_pred)
rmsle_train = np.sqrt(mean_squared_log_error(y_train, train_pred))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, val_pred))
rmsle_test = np.sqrt(mean_squared_log_error(y_test, test_pred))

In [None]:
print("Random Forest Model using Randomized Search Evaluation")
print('-------------------------------------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Testing Accuracy:", round(r2_test, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))
print("Testing Error:", round(rmsle_test, 4))

In [30]:
rf_m = make_pipeline(
    OneHotEncoder(),
    MinMaxScaler(),
    RandomForestRegressor(n_estimators=100, max_depth=12, random_state=42))

In [31]:
rf_m.fit(X_train, y_train)

Pipeline(steps=[('onehotencoder',
                 OneHotEncoder(cols=['DayOfWeek', 'TimeOfDay'])),
                ('minmaxscaler', MinMaxScaler()),
                ('randomforestregressor',
                 RandomForestRegressor(max_depth=12, random_state=42))])

In [32]:
train_pred =  rf_m.predict(X_train)
val_pred =  rf_m.predict(X_val)
test_pred = rf_m.predict(X_test)

In [103]:
train_pred[train_pred<0] = 0
val_pred[val_pred<0] = 0
test_pred[test_pred<0] = 0

In [35]:
r2_train = r2_score(y_train, train_pred)
r2_val = r2_score(y_val, val_pred)
r2_test = r2_score(y_test, test_pred)

In [36]:
rmsle_train = np.sqrt(mean_squared_log_error(y_train, train_pred))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, val_pred))
rsmle_test = np.sqrt(mean_squared_log_error(y_test, test_pred))

In [37]:
print("Random Forest Model Evaluation")
print('-------------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Testing Accuracy:", round(r2_test, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))
print("Testing Error:", round(rmsle_test, 4))

Random Forest Model Evaluation
-------------------------------
Training Accuracy: 0.6503
Validation Accuracy: 0.6423
Testing Accuracy: 0.6421
Training Error: 0.4219
Validation Error: 0.4285
Testing Error: 0.4433


In [38]:
joblib.dump(rf_m, 'RF_final.pkl')

['RF_final.pkl']

In [73]:
#r2_train = r2_score(y_train, rf_m.predict(X_train))
#r2_val = r2_score(y_val, rf_m.predict(X_val))
#rmsle_train = np.sqrt(mean_squared_log_error(y_train, rf_m.predict(X_train)))
#rmsle_val = np.sqrt(mean_squared_log_error(y_val, rf_m.predict(X_val)))

In [74]:
#print("Random Forest Model Evaluation")
#print('-------------------------------')
#print("Training Accuracy:", round(r2_train, 4))
#print("Validation Accuracy:", round(r2_val, 4))
#print("Training Error:", round(rmsle_train, 4))
#print("Validation Error:", round(rmsle_val, 4))

Random Forest Model Evaluation
-------------------------------
Training Accuracy: 0.6503
Validation Accuracy: 0.6423
Training Error: 0.4219
Validation Error: 0.4285


## Neural Network

In [12]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import SGD,Adam

In [46]:
onehot = OneHotEncoder()
standard = StandardScaler()
minmax = MinMaxScaler()

In [15]:
X = onehot.fit_transform(X)



In [16]:
X = minmax.fit_transform(X)

In [17]:
X

array([[0.04827898, 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.05812934, 0.        , 1.        , ..., 1.        , 0.        ,
        0.        ],
       [0.04768313, 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       ...,
       [0.25137973, 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.03511312, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.03643506, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [18]:
y = np.array(y)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [20]:
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.2, random_state=42)

In [88]:
model = Sequential()
model.add(Dense(64, input_shape=(X_train.shape[1],), activation='relu')) 
#model.add(Dense(256, activation='relu'))
#model.add(Dropout(.4, seed=42))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary() 

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 64)                832       
_________________________________________________________________
dense_16 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 33        
Total params: 2,945
Trainable params: 2,945
Non-trainable params: 0
_________________________________________________________________


In [107]:
learn_rate=.2
adam = Adam(lr=learn_rate, beta_1=0.8, beta_2=0.8, epsilon=None, decay=0.01, amsgrad=False)
sgd = SGD(lr=learn_rate,momentum=.4,decay=0.01)



In [108]:
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.5, min_lr=0.00001,min_delta=0.001)

In [109]:
es = EarlyStopping(monitor='val_loss',
                   mode='min',
                   patience=5,
                   min_delta=0.01,
                   restore_best_weights = True)

In [110]:
model.compile(optimizer=adam, loss='mean_squared_logarithmic_error')

In [111]:
history = model.fit(X_train, y_train,
                    validation_data = (X_val, y_val),
                    epochs=30,
                    callbacks=[es,lr_reduction],
                    batch_size=50,
                    verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


In [112]:
train_pred =  model.predict(X_train)
val_pred =  model.predict(X_val)
test_pred = model.predict(X_test)

In [113]:
train_pred[train_pred<0] = 0
val_pred[val_pred<0] = 0
test_pred[test_pred<0] = 0

In [114]:
r2_train = r2_score(y_train, train_pred)
r2_val = r2_score(y_val, val_pred)
r2_test = r2_score(y_test, test_pred)

In [115]:
rmsle_train = np.sqrt(mean_squared_log_error(y_train, train_pred))
rmsle_val = np.sqrt(mean_squared_log_error(y_val, val_pred))
rmsle_test = np.sqrt(mean_squared_log_error(y_test, test_pred))

In [117]:
print("Neural Network Model Evaluation")
print('-------------------------------')
print("Training Accuracy:", round(r2_train, 4))
print("Validation Accuracy:", round(r2_val, 4))
print("Testing Accuracy:", round(r2_test, 4))
print("Training Error:", round(rmsle_train, 4))
print("Validation Error:", round(rmsle_val, 4))
print("Testing Error:", round(rmsle_test, 4))

Neural Network Model Evaluation
-------------------------------
Training Accuracy: 0.6297
Validation Accuracy: 0.6307
Testing Accuracy: 0.6304
Training Error: 0.4375
Validation Error: 0.4376
Testing Error: 0.4369


In [None]:
#model.save("NN_final.h5")