In [1]:
NN_TYPE = 'simple_lstm_only_flows'

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [3]:
from place_groups import selected_junctions, selected_middle_of_roads
from flow_data_maker import default_bootstrap_flow_data_maker
from common import *

import random
from sklearn.linear_model import LinearRegression
from datetime import timedelta
from keras.callbacks import ModelCheckpoint
import numpy as np
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import pandas as pd
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.recurrent import LSTM
from keras.layers.convolutional import Conv3D
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers import BatchNormalization
from keras.models import Sequential
from keras import regularizers

from matplotlib import pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (16, 10)
plt.rcParams['font.size'] = 16

# prevent tensorflow from allocating the entire GPU memory at once
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

Using TensorFlow backend.


In [4]:
def build_model(lstm_state_size, num_lags, num_outs, loss):
    model = Sequential()
    model.add(LSTM(lstm_state_size, input_shape=(num_lags, 1), return_sequences=False))
    model.add(Dense(units=num_outs, activation="linear"))  # Linear activation, because speed RESIDUALS can have any sign.
    model.compile(loss=loss, optimizer="rmsprop")
    return model

In [5]:
def plot_model_training(history):
    plt.plot(history.history['loss'], linestyle='--')
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss in Train Phase')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

In [6]:
def create_and_train_model(loss, lstm_state_size, num_lags, num_outs,
                           place_id, data_maker, mini_batch_size, num_epochs, validation_split):
    checkpoint_path = './hdf5/%d.hdf5' % random.randint(2**40, 2**41)
    print('Creating model %s' % checkpoint_path)
    x_train, y_train, _, _ = data_maker.get_train_and_test_inputs()
    model = build_model(lstm_state_size=lstm_state_size, num_lags=num_lags, num_outs=num_outs, loss=loss)
    history = model.fit(
        np.expand_dims(np.array(x_train.values), 2),
        y_train.values,
        batch_size=mini_batch_size,
        epochs=num_epochs,
        validation_split=validation_split,
        callbacks=[ModelCheckpoint(
            checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')],
        verbose=2)
    model.load_weights(checkpoint_path)
    plot_model_training(history)
    return model, history, data_maker

In [7]:
# model, history, data_maker = create_and_train_model(
#     loss='mse', 
#     lstm_state_size=20, 
#     num_lags=NUM_LAGS, 
#     num_outs=1,
#     place_id=EXAMPLE_PLACE_ID, 
#     data_maker=default_bootstrap_flow_data_maker(EXAMPLE_PLACE_ID),
#     mini_batch_size=512, 
#     num_epochs=100, 
#     validation_split=0.2)

Creating model ./hdf5/1415746309628.hdf5
Train on 34780 samples, validate on 8695 samples
Epoch 1/100
Epoch 00001: val_loss improved from inf to 17.07306, saving model to ./hdf5/1415746309628.hdf5
 - 4s - loss: 31.3648 - val_loss: 17.0731
Epoch 2/100
Epoch 00002: val_loss improved from 17.07306 to 3.31355, saving model to ./hdf5/1415746309628.hdf5
 - 2s - loss: 8.8522 - val_loss: 3.3135
Epoch 3/100
Epoch 00003: val_loss improved from 3.31355 to 1.15412, saving model to ./hdf5/1415746309628.hdf5
 - 2s - loss: 1.9657 - val_loss: 1.1541
Epoch 4/100
Epoch 00004: val_loss improved from 1.15412 to 0.74032, saving model to ./hdf5/1415746309628.hdf5
 - 2s - loss: 0.8812 - val_loss: 0.7403
Epoch 5/100
Epoch 00005: val_loss improved from 0.74032 to 0.51572, saving model to ./hdf5/1415746309628.hdf5
 - 2s - loss: 0.6575 - val_loss: 0.5157
Epoch 6/100
Epoch 00006: val_loss improved from 0.51572 to 0.47583, saving model to ./hdf5/1415746309628.hdf5
 - 2s - loss: 0.5761 - val_loss: 0.4758
Epoch 7/10

KeyboardInterrupt: 

In [None]:
def predict(model, speed_data_maker):
    _, _, x_test_normalized, y_test_normalized = speed_data_maker.get_train_and_test_inputs()
    predictions_normalized = model.predict(np.expand_dims(x_test_normalized, 2)).flatten()
    errors_df = speed_data_maker.individual_errors_without_interpolated_values(predictions_normalized)
    return compute_error_statistics(errors_df, 'speed_km_hr_true', 'speed_km_hr_predicted')

In [None]:
def one_place(place_id):
    model, history, speed_data_maker = create_and_train_model(
        loss='mse', 
        lstm_state_size=20, 
        num_lags=NUM_LAGS, 
        num_outs=1,
        place_id=place_id, 
        speed_data_maker=default_bootstrap_speed_data_maker(place_id),
        mini_batch_size=512, 
        num_epochs=100, 
        validation_split=0.2)
    return pd.DataFrame({NN_TYPE: predict(model, speed_data_maker)})\
        .reset_index()\
        .rename(columns={'index': 'stat'})\
        .assign(place_id=place_id)\
        .set_index(['place_id', 'stat'])

def all_places(group, name):
    pd.concat(map(one_place, group)).to_csv(name + '.csv')
    results = pd.read_csv('%s.csv' % name).groupby('stat').mean()
    results.to_csv('%s_summary.csv' % name)
    return results

In [None]:
results_middles = all_places(selected_middle_of_roads, NN_TYPE + '_middles')

In [None]:
results_middles

In [None]:
# results_junctions = all_places(selected_junctions, NN_TYPE + '_junctions')

In [None]:
# results_junctions