In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# In order to get reproducible results

# Seed value (can actually be different for each attribution step)
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

In [3]:
def load_data():
    
    def dummie_and_drop(df, name):
        # Creates a dummy variable, concatenates it and finally drops the original categorical variable
        dummies = pd.get_dummies(df[name]).rename(columns = lambda x: name + '_' + str(x))
        df = pd.concat([df, dummies], axis = 1)
        df.drop(columns = [name], inplace=True, axis=1)

        return df
    
    
    df = pd.read_csv('dataset_final.csv')

    # Take the features needed, and build new ones

    x = df[['temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'precipitation', 'sky_level',
            'daytype', 'Day Name', 'Month', 'Hour', 'Quarter', 'Fare Last Month', 'Tips Last Month', 'Trips Last Hour',
            'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)', 'trip_start_timestamp']]
    
    categorical_variables = ['sky_level', 'daytype', 'Day Name', 'Month', 'Hour', 'Quarter']
    for var in categorical_variables:
        x = dummie_and_drop(x, name = var)
        
    n_areas = 77
    y = df['Trips'].to_numpy()
    y = np.reshape(y, [-1, n_areas])
    x = x.groupby('trip_start_timestamp').mean()
    x = x.to_numpy()
    assert(len(y) == len(x))
    x, x_test, y, y_test = train_test_split(x,y, test_size = 0.15, random_state = 2020)
    
    
    return (x, x_test, y, y_test)

In [4]:
# x, x_test, y, y_test = load_data()
# print(x.shape)
# print(y.shape)

In [5]:
def plot_results(history):
    import matplotlib.pyplot as plt

    # Plot training & validation accuracy values
    plt.plot(history.history['mae'])
    plt.plot(history.history['val_mae'])
    plt.title('Model mean absolute error')
    plt.ylabel('MAE')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

In [6]:
def create_model(x, x_test, y, y_test): #n_areas, features, x_train, y_train):
    from keras.models import Sequential
    from keras.layers import Dense
    
    # In order to get reproducible results
    import tensorflow as tf
    tf.random.set_seed(2020)
    from numpy.random import seed
    seed(1)
    
    n_areas = y.shape[1]
    features = x.shape[1]
    
    act = {{choice(['relu', 'sigmoid'])}} # Choose the activation function
    
    model = Sequential()
    model.add(Dense(256, activation = act, input_shape = (features,)))
    model.add(Dense(512, activation = act))
    model.add(Dense(512, activation = act))
    
    if {{choice(['three', 'four'])}} == 'four': # Choose the ampliation of the architecture
        model.add(Dense(1024, activation = act))
        model.add(Dense(1024, activation = act))

    
    model.add(Dense(512, activation = act))
    model.add(Dense(256, activation = act))
    model.add(Dense(n_areas))
    
    
    model.compile(optimizer = {{choice(['adam','rmsprop' , 'sgd'])}}, loss = 'mse', metrics = ['mae'])
    model.summary()
    
    # checkpoint
#     filepath="weights-improvement-{epoch:02d}-{val_mae:.2f}.hdf5"
#     checkpoint = ModelCheckpoint(filepath, monitor='val_mae', verbose=1, save_best_only=True, mode='min')
#     callbacks_list = [checkpoint]
    
#     model.fit(X, Y, validation_split=0.33, epochs=150, batch_size=10, verbose = 0) #callbacks=callbacks_list, verbose=0)
    
    result = model.fit(x = x, y = y, validation_split = 0.15, 
                        batch_size = {{choice([32,64])}},
                        epochs = 20, verbose = 2)
    
    validation_mae = np.amin(result.history['val_mae']) 
    print('Best validation mae of epoch:', validation_mae)
    return {'loss': validation_mae, 'status': STATUS_OK, 'model': model}


In [7]:
exec('from __future__ import absolute_import, division, print_function')
import numpy as np
from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperopt import Trials, STATUS_OK, tpe

best_run, best_model = optim.minimize(model = create_model,
                                      data = load_data,
                                      algo = tpe.suggest, 
                                      max_evals = 5,
                                      trials=Trials(),
                                      notebook_name = 'Model')
x, x_test, y, y_test= load_data()
print("Evalutation of best performing model:")
print(best_model.evaluate(x_test, y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)

# plot_results(history)

Using TensorFlow backend.


>>> Imports:
#coding=utf-8

try:
    import pandas as pd
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    from sklearn.model_selection import train_test_split
except:
    pass

try:
    import os
except:
    pass

try:
    import random
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    import matplotlib.pyplot as plt
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.layers import Dense
except:
    pass

try:
    import tensorflow as tf
except:
    pass

try:
    from numpy.random import seed
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from keras.models import model_from_json
except:
    pass

>>> Hyperas search space:

def get_space():
    return {
        'act': 

nan                                                  
Model: "sequential_2"                                           
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 256)               17152     
_________________________________________________________________
dense_8 (Dense)              (None, 512)               131584    
_________________________________________________________________
dense_9 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_10 (Dense)             (None, 1024)              525312    
_________________________________________________________________
dense_11 (Dense)             (None, 1024)              1049600   
_________________________________________________________________
dense_12 (Dense)             (None, 512)               524800    
_______________________

Epoch 16/20                                                      
 - 6s - loss: 56.1425 - mae: 1.9198 - val_loss: 44.2484 - val_mae: 1.7352

Epoch 17/20                                                      
 - 6s - loss: 54.6179 - mae: 1.9018 - val_loss: 39.7754 - val_mae: 1.6194

Epoch 18/20                                                      
 - 7s - loss: 53.2844 - mae: 1.8784 - val_loss: 48.4155 - val_mae: 1.6832

Epoch 19/20                                                      
 - 6s - loss: 52.2510 - mae: 1.8567 - val_loss: 36.1228 - val_mae: 1.5936

Epoch 20/20                                                      
 - 6s - loss: 51.8083 - mae: 1.8502 - val_loss: 42.8103 - val_mae: 1.8304

Best validation mae of epoch:                                    
1.5935717821121216                                               
Model: "sequential_4"                                            
_________________________________________________________________
Layer (type)                 Ou

 - 3s - loss: 98.1327 - mae: 2.3640 - val_loss: 96.4749 - val_mae: 2.3622

Epoch 11/20                                                      
 - 3s - loss: 98.1366 - mae: 2.3638 - val_loss: 96.4646 - val_mae: 2.3556

Epoch 12/20                                                      
 - 3s - loss: 98.1391 - mae: 2.3651 - val_loss: 96.4782 - val_mae: 2.3451

Epoch 13/20                                                      
 - 3s - loss: 98.1449 - mae: 2.3638 - val_loss: 96.4676 - val_mae: 2.3514

Epoch 14/20                                                      
 - 3s - loss: 98.1360 - mae: 2.3649 - val_loss: 96.4738 - val_mae: 2.3424

Epoch 15/20                                                      
 - 3s - loss: 98.1462 - mae: 2.3633 - val_loss: 96.4650 - val_mae: 2.3524

Epoch 16/20                                                      
 - 3s - loss: 98.1398 - mae: 2.3647 - val_loss: 96.4688 - val_mae: 2.3478

Epoch 17/20                                                      
 - 3s - loss:

In [8]:
np.round(best_model.predict(x[0:1])).astype('int')

array([[ 2,  2,  4,  2,  2, 12,  6, 51,  0,  0,  1,  0,  0,  1,  1,  2,
         0,  0,  0,  0,  1,  2,  0,  4,  0,  0,  0, 13,  0,  0,  0, 22,
         3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 11,  3]])

In [9]:
y[0]

array([ 2,  0,  3,  2,  1, 14,  5, 60,  0,  0,  0,  0,  0,  3,  0,  3,  0,
        0,  1,  0,  0,  1,  0,  6,  1,  0,  0,  7,  0,  0,  0, 26,  3,  0,
        2,  0,  0,  1,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  2,  0,  0,
        0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,
        0,  1,  0,  0,  0,  0,  0,  9,  6], dtype=int64)

In [16]:
best_model.evaluate(x_test[0:1], y_test[0:1])



[11.31408977508545, 1.3076363801956177]

In [17]:
x_test[0:1].shape

(1, 66)

In [18]:
best_model.predict(x_test[0:1])

array([[ 2.37974119e+00,  2.24573636e+00,  3.78148508e+00,
         2.38664103e+00,  2.00838447e+00,  1.27829924e+01,
         6.88590622e+00,  5.11086655e+01, -2.03334481e-01,
         4.65536833e-01,  6.12488925e-01, -2.29467943e-01,
         3.05308819e-01,  1.66380143e+00,  6.24546885e-01,
         1.87013221e+00,  1.96223632e-01, -1.15499526e-01,
         8.94508213e-02,  1.47701412e-01,  6.10566974e-01,
         2.25085664e+00,  1.65941089e-01,  5.27327442e+00,
         3.54929894e-01,  8.10648575e-02,  1.00148916e-02,
         1.21229258e+01,  2.68596530e-01, -4.55120504e-02,
         6.19136810e-01,  2.08747215e+01,  2.68774939e+00,
         1.59481615e-01,  1.83791831e-01,  2.04227477e-01,
         6.04685321e-02,  1.46532863e-01,  1.84166983e-01,
        -3.69561613e-01,  3.91467571e-01, -2.94932425e-02,
         8.51429939e-01,  1.13504231e-01,  4.07641351e-01,
         1.56543374e-01,  5.24115264e-02,  1.81086332e-01,
         7.37116933e-02, -7.44217634e-02,  2.51282096e-0

# Save the model

In [11]:
from keras.models import model_from_json
# serialize model to JSON
model_json = best_model.to_json()
with open("model_neural_network.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
best_model.save_weights("model_neural_network.h5")
print("Saved model to disk")

Saved model to disk
