In [1]:
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
from datetime import datetime
from sklearn import datasets, linear_model
from matplotlib import pyplot as plt
from matplotlib import cm
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers import BatchNormalization, Input, Embedding, Concatenate, Conv1D, MaxPooling1D, Flatten, merge
from keras.layers import merge, Concatenate, Permute, RepeatVector, Reshape
from keras.models import Sequential, Model
import keras.backend as K
import statsmodels.formula.api as smf
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# prevent tensorflow from allocating the entire GPU memory at once
# config = tf.ConfigProto()
# config.gpu_options.allow_growth=True
# sess = tf.Session(config=config)

## GLOBAL PARAMETERS

In [2]:
NUM_LAGS = 10
sel = [5,7] # weather features to use
sel2 = [0,1,2,7] # eventlags featurs to use

# word embeddings parameters
#GLOVE_DIR = "/home/fmpr/datasets/glove.6B/"
GLOVE_DIR = "/mnt/sdb1/datasets/glove.6B/"
MAX_SEQUENCE_LENGTH = 350 #600
MAX_NB_WORDS = 600 #5000
EMBEDDING_DIM = 300 #300

## Load weather data

In [40]:
print("loading weather data...")

# load data
df = pd.read_csv("central_park_weather.csv")
df = df.set_index("date")
df.index = pd.to_datetime(df.index, format='%Y-%m-%d')

# replace predefined values with NaN
df = df.replace(99.99, np.nan)
df = df.replace(999.9, np.nan)
df = df.replace(9999.9, np.nan)

# replace NaN with 0 for snow depth
df["snow_depth"] = df["snow_depth"].fillna(0)

# do interpolation for the remaining NaNs
df = df.interpolate()

# standardize data
removed_mean = df.mean()
removed_std = df.std()
weather = (df - removed_mean) / removed_std
weather

loading weather data...


Unnamed: 0_level_0,min_temp,max_temp,wind_speed,wind_gust,visibility,pressure,precipitation,snow_depth,fog,rain_drizzle,snow_ice,thunder
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2009-01-01,-2.016735,-2.128488,2.946112,2.512634,0.666065,0.571023,-0.273606,-0.240596,-0.326133,-0.709709,-0.255411,-0.037018
2009-01-02,-1.602221,-1.739134,0.276908,-0.188044,0.236614,-0.040850,-0.378683,-0.240596,-0.326133,-0.709709,3.913921,-0.037018
2009-01-03,-1.108462,-1.472148,1.211129,0.018115,0.666065,-0.503153,-0.378683,-0.240596,-0.326133,-0.709709,-0.255411,-0.037018
2009-01-04,-1.413252,-1.238535,1.033182,-0.579745,0.666065,0.244691,-0.378683,-0.240596,-0.326133,-0.709709,-0.255411,-0.037018
2009-01-05,-0.626895,-1.188475,0.588315,-1.218837,0.666065,-0.272002,-0.378683,-0.240596,-0.326133,-0.709709,-0.255411,-0.037018
...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-27,-0.919493,-0.237338,0.677289,1.852927,0.482014,-0.394376,-0.326145,-0.240596,-0.326133,-0.709709,-0.255411,-0.037018
2016-12-28,-0.742715,-0.237338,0.098961,-0.579745,0.666065,0.122316,-0.378683,-0.240596,-0.326133,-0.709709,-0.255411,-0.037018
2016-12-29,-0.864630,-0.999360,-0.701800,-0.373587,-0.560938,-0.639125,-0.378683,-0.240596,-0.326133,1.408545,-0.255411,-0.037018
2016-12-30,-0.919493,-1.021609,1.166643,1.069524,0.604715,-1.781287,0.645816,-0.240596,3.065185,-0.709709,3.913921,-0.037018


## Load events data

In [5]:
print("loading events data...")

events = pd.read_csv("terminal5_events_preprocessed.tsv", sep="\t")
events.head()

events['start_time'] = pd.to_datetime(events['start_time'], format='%Y-%m-%d %H:%M')
events['date'] = events['start_time'].dt.strftime("%Y-%m-%d")
events = events[["date","start_time","title","url","description"]]

loading events data...


## Load taxi data (and merge with others and detrend)

In [84]:
print("loading taxi data (and merging and detrending)...")

df = pd.read_csv("pickups_terminal_5_0.003.csv")

df_sum = pd.DataFrame(df.groupby("date")["pickups"].sum())
df_sum["date"] = df_sum.index
df_sum.index = pd.to_datetime(df_sum.index, format='%Y-%m-%d %H:%M')
df_sum["dow"] = df_sum.index.weekday

# add events information
event_col = np.zeros((len(df_sum)))
late_event = np.zeros((len(df_sum)))
really_late_event = np.zeros((len(df_sum)))
event_desc_col = []
for i in range(len(df_sum)):
    if df_sum.iloc[i].date in events["date"].values:
        event_col[i] = 1
        event_descr = ""
        for e in events[events.date == df_sum.iloc[i].date]["description"]:
            event_descr += str(e) + " "
        event_desc_col.append(event_descr)
        for e in events[events.date == df_sum.iloc[i].date]["start_time"]:
            if e.hour >= 20:
                late_event[i] = 1
            if e.hour >= 21:
                really_late_event[i] = 1
    else:
        event_desc_col.append("None")

df_sum["event"] = event_col
df_sum["late_event"] = late_event
df_sum["really_late_event"] = really_late_event
df_sum["event_desc"] = event_desc_col
df_sum["event_next_day"] = pd.Series(df_sum["event"]).shift(-1)
df_sum["late_event_next_day"] = pd.Series(df_sum["late_event"]).shift(-1)
df_sum["really_late_event_next_day"] = pd.Series(df_sum["really_late_event"]).shift(-1)
df_sum["event_next_day_desc"] = pd.Series(df_sum["event_desc"]).shift(-1)

# merge with weather data

df_sum = df_sum.rename({'date': 'date_col'}, axis=1)
df_sum = df_sum.join(weather, how="inner", on="date")

loading taxi data (and merging and detrending)...


In [85]:
# keep only data after 2013
START_YEAR = 2013
df_sum = df_sum.loc[df_sum.index.year >= START_YEAR]
df_sum.head()

df_sum["year"] = df_sum.index.year

trend_mean = df_sum[df_sum.index.year < 2015].groupby(["dow"]).mean()["pickups"]
trend_std = df_sum["pickups"].std()

# build vectors with trend to remove and std
trend = []
std = []
for ix, row in df_sum.iterrows():
    trend.append(trend_mean[row.dow])
    std.append(trend_std)

df_sum["trend"] = trend
df_sum["std"] = std

# detrend data
df_sum["detrended"] = (df_sum["pickups"] - df_sum["trend"]) / df_sum["std"]

## Build lags and features

In [86]:
print("building lags...")

lags = pd.concat([pd.Series(df_sum["detrended"]).shift(x) for x in range(0, NUM_LAGS)], axis=1).to_numpy()
event_feats = np.concatenate([df_sum["event_next_day"].to_numpy()[:,np.newaxis],
                             df_sum["late_event"].to_numpy()[:,np.newaxis],
                             df_sum["really_late_event"].to_numpy()[:,np.newaxis],
                             df_sum["really_late_event_next_day"].to_numpy()[:,np.newaxis]], axis=1)
lags_event_feats = pd.concat([pd.Series(df_sum["event_next_day"]).shift(x) for x in range(0,NUM_LAGS)],axis=1).to_numpy()
event_texts = df_sum["event_next_day_desc"].to_numpy()
weather_feats = df_sum[['min_temp', u'max_temp', u'wind_speed',
       u'wind_gust', u'visibility', u'pressure', u'precipitation',
       u'snow_depth', u'fog', u'rain_drizzle', u'snow_ice', u'thunder']].to_numpy()
preds = pd.Series(df_sum["detrended"]).shift(-1).to_numpy()
trends = df_sum["trend"].to_numpy()
stds = df_sum["std"].to_numpy()

lags = lags[NUM_LAGS:-1,:]
event_feats = event_feats[NUM_LAGS:-1,:]
lags_event_feats = lags_event_feats[NUM_LAGS:-1,:]
event_texts = event_texts[NUM_LAGS:-1]
weather_feats = weather_feats[NUM_LAGS:-1,:]
preds = preds[NUM_LAGS:-1]
trends = trends[NUM_LAGS:-1]
stds = stds[NUM_LAGS:-1]

building lags...


## Train/test split

In [87]:
print("loading train/val/test split...")

i_train = 365*2 # 2013 and 2014
i_val = 365*3
i_test = -1 # 2015 and 2016 (everything else)

lags_train = lags[:i_train,:] # time series lags
event_feats_train = event_feats[:i_train,:] # event/no_event
lags_event_feats_train = lags_event_feats[:i_train,:] # lags for event/no_event
event_texts_train = event_texts[:i_train] # event text descriptions
weather_feats_train = weather_feats[:i_train,:] # weather data
y_train = preds[:i_train] # target values

lags_val = lags[i_train:i_val,:] # time series lags
event_feats_val = event_feats[i_train:i_val,:] # event/no_event
lags_event_feats_val = lags_event_feats[i_train:i_val,:] # lags for event/no_event
event_texts_val = event_texts[i_train:i_val] # event text descriptions
weather_feats_val = weather_feats[i_train:i_val,:] # weather data
y_val = preds[i_train:i_val] # target values

lags_test = lags[i_val:i_test,:]
event_feats_test = event_feats[i_val:i_test,:]
lags_event_feats_test = lags_event_feats[i_val:i_test,:]
event_texts_test = event_texts[i_val:i_test]
weather_feats_test = weather_feats[i_val:i_test,:]
y_test = preds[i_val:i_test]
trend_test = trends[i_val:i_test]
std_test = stds[i_val:i_test]

loading train/val/test split...


## Evaluation functions

In [88]:
def compute_error(trues, predicted):
    corr = np.corrcoef(predicted, trues)[0,1]
    mae = np.mean(np.abs(predicted - trues))
    rae = np.sum(np.abs(predicted - trues)) / np.sum(np.abs(trues - np.mean(trues)))
    rmse = np.sqrt(np.mean((predicted - trues)**2))
    rrse = np.sqrt(np.sum((predicted - trues)**2) / np.sum((trues - np.mean(trues))**2))
    mape = np.mean(np.abs((predicted - trues) / trues)) * 100
    r2 = max(0, 1 - np.sum((predicted - trues)**2) / np.sum((trues - np.mean(trues))**2))
    return corr, mae, rae, rmse, rrse, mape, r2


def compute_error_filtered(trues, predicted, filt):
    trues = trues[filt]
    predicted = predicted[filt]
    corr = np.corrcoef(predicted, trues)[0,1]
    mae = np.mean(np.abs(predicted - trues))
    mse = np.mean((predicted - trues)**2)
    rae = np.sum(np.abs(predicted - trues)) / np.sum(np.abs(trues - np.mean(trues)))
    rmse = np.sqrt(np.mean((predicted - trues)**2))
    r2 = max(0, 1 - np.sum((trues-predicted)**2) / np.sum((trues - np.mean(trues))**2))
    return corr, mae, rae, rmse, rrse, mape, r2

## MLP (just lags)

In [89]:
def build_model(num_inputs, num_lags, num_preds):
    input_lags = Input(shape=(num_lags,))
    
    x = input_lags
    x = BatchNormalization()(x)
    x = Dense(units=100, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.05))(x)
    x = Dropout(0.5)(x)
    preds = Dense(units=num_preds)(x)
    
    model = Model(input_lags, preds)
    model.compile(loss="mse", optimizer="adam")
    
    return model, input_lags, preds


print("\nrunning MLP with just lags...")

# checkpoint best model
checkpoint = ModelCheckpoint("weights.best.hdf5", monitor='val_loss', verbose=0, save_best_only=True, mode='min')

model, input_lags, preds = build_model(1, NUM_LAGS, 1)
model.fit(
    np.concatenate([lags_train], axis=1),
    y_train,
    batch_size=64,
    epochs=500,
    validation_data=(np.concatenate([lags_val], axis=1), y_val),
    callbacks=[checkpoint],
    verbose=0)   

print("Total number of iterations:  ", len(model.history.history["loss"]))
print("Best loss at iteratation:    ", np.argmin(model.history.history["loss"]), "   Best:", np.min(model.history.history["loss"]))
print("Best val_loss at iteratation:", np.argmin(model.history.history["val_loss"]), "   Best:", np.min(model.history.history["val_loss"]))

# load weights
model.load_weights("weights.best.hdf5")

# make predictions
preds_lstm = model.predict(np.concatenate([lags_test[:,:]], axis=1))
preds_lstm = preds_lstm[:,0] * std_test + trend_test
y_true = y_test * std_test + trend_test
corr, mae, rae, rmse, rrse, mape, r2 = compute_error(y_true, preds_lstm)
print("MAE:  %.3f\tRMSE: %.3f\tR2:   %.3f" % (mae, rmse, r2))


running MLP with just lags...
Total number of iterations:   500
Best loss at iteratation:     342    Best: 0.47571131587028503
Best val_loss at iteratation: 292    Best: 0.39400947093963623
MAE:  182.178	RMSE: 250.563	R2:   0.444


## MLP lags + weather

In [90]:
print("\nrunning MLP with lags + weather...")

# checkpoint best model
checkpoint = ModelCheckpoint("weights.best.hdf5", monitor='val_loss', verbose=0, save_best_only=True, mode='min')

model, input_lags, preds = build_model(1, NUM_LAGS+len(sel), 1)
model.fit(
    #lags_train,
    np.concatenate([lags_train, weather_feats_train[:,sel]], axis=1),
    y_train,
    batch_size=64,
    epochs=500,
    validation_data=(np.concatenate([lags_val, weather_feats_val[:,sel]], axis=1), y_val),
    callbacks=[checkpoint],
    verbose=0)   

print("Total number of iterations:  ", len(model.history.history["loss"]))
print("Best loss at iteratation:    ", np.argmin(model.history.history["loss"]), "   Best:", np.min(model.history.history["loss"]))
print("Best val_loss at iteratation:", np.argmin(model.history.history["val_loss"]), "   Best:", np.min(model.history.history["val_loss"]))

# load weights
model.load_weights("weights.best.hdf5")

# make predictions
preds_lstm = model.predict(np.concatenate([lags_test[:,:], weather_feats_test[:,sel]], axis=1))
preds_lstm = preds_lstm[:,0] * std_test + trend_test
corr, mae, rae, rmse, rrse, mape, r2 = compute_error(y_true, preds_lstm)
print("MAE:  %.3f\tRMSE: %.3f\tR2:   %.3f" % (mae, rmse, r2))


running MLP with lags + weather...
Total number of iterations:   500
Best loss at iteratation:     302    Best: 0.47776007652282715
Best val_loss at iteratation: 462    Best: 0.39925527572631836
MAE:  183.752	RMSE: 252.426	R2:   0.436


## MLP with weather + events information (no text) + late + event_lags

In [91]:
print("\nrunning MLP with lags + weather + event + late + event lags...")

def build_model_events(num_inputs, num_lags, num_feat, num_preds):
    input_lags = Input(shape=(num_lags,))
    input_events = Input(shape=(num_feat,))
    
    feat = Concatenate(axis=1)([input_lags, input_events])
    
    x = feat
    x = BatchNormalization()(x)
    x = Dense(units=100, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.05))(x)
    x = Dropout(0.5)(x)
    
    preds = Dense(units=num_preds)(x)
    preds = Activation("linear")(preds)
    
    model = Model([input_lags, input_events], preds)
    model.compile(loss="mse", optimizer="adam")
    
    return model, input_lags, preds

# checkpoint best model
checkpoint = ModelCheckpoint("weights.best.hdf5", monitor='val_loss', verbose=0, save_best_only=True, mode='min')

# fit model to the mean
model, input_lags, preds = build_model_events(1, NUM_LAGS+len(sel), 4+len(sel2), 1)
model.fit(
    [np.concatenate([lags_train, weather_feats_train[:,sel]], axis=1), 
     np.concatenate([event_feats_train[:,:], lags_event_feats_train[:,sel2]], axis=1)],
    y_train,
    batch_size=64,
    epochs=500,
    validation_data=([np.concatenate([lags_val, weather_feats_val[:,sel]], axis=1), 
                      np.concatenate([event_feats_val[:,:], lags_event_feats_val[:,sel2]], axis=1)], y_val),
    callbacks=[checkpoint],
    verbose=0)   

print("Total number of iterations:  ", len(model.history.history["loss"]))
print("Best loss at iteratation:    ", np.argmin(model.history.history["loss"]), "   Best:", np.min(model.history.history["loss"]))
print("Best val_loss at iteratation:", np.argmin(model.history.history["val_loss"]), "   Best:", np.min(model.history.history["val_loss"]))

# load weights
model.load_weights("weights.best.hdf5")

print(model.evaluate([np.concatenate([lags_test[:,:], weather_feats_test[:,sel]], axis=1), 
                      np.concatenate([event_feats_test[:,:], lags_event_feats_test[:,sel2]], axis=1)], 
                      y_test, verbose=2))

# make predictions
preds_lstm = model.predict([np.concatenate([lags_test[:,:], weather_feats_test[:,sel]], axis=1), 
                            np.concatenate([event_feats_test[:,:], lags_event_feats_test[:,sel2]], axis=1)])
preds_lstm = preds_lstm[:,0] * std_test + trend_test
corr, mae, rae, rmse, rrse, mape, r2 = compute_error(y_true, preds_lstm)
print("MAE:  %.3f\tRMSE: %.3f\tR2:   %.3f" % (mae, rmse, r2))


running MLP with lags + weather + event + late + event lags...
Total number of iterations:   500
Best loss at iteratation:     326    Best: 0.4455840289592743
Best val_loss at iteratation: 476    Best: 0.3511611223220825
6/6 - 0s - loss: 0.3940 - 49ms/epoch - 8ms/step
0.3940379023551941
MAE:  164.943	RMSE: 241.907	R2:   0.482


## MLP with weather + events information (no text) + event_lags + TEXT

In [100]:
print("\npreparing word embeddings for NNs with text...")

# Build index mapping words in the embeddings set to their embedding vector
embeddings_index = {}
# f = open(GLOVE_DIR + 'glove.6B.%dd.txt' % (EMBEDDING_DIM,))
f = open('glove.6B.300d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

# Vectorize the text samples into a 2D integer tensor and pad sequences
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(event_texts)
sequences_train = tokenizer.texts_to_sequences(event_texts_train)
sequences_val = tokenizer.texts_to_sequences(event_texts_val)
sequences_test = tokenizer.texts_to_sequences(event_texts_test)

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

data_train = pad_sequences(sequences_train, maxlen=MAX_SEQUENCE_LENGTH)
data_val = pad_sequences(sequences_val, maxlen=MAX_SEQUENCE_LENGTH)
data_test = pad_sequences(sequences_test, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of train tensor:', data_train.shape)
print('Shape of val tensor:', data_val.shape)
print('Shape of test tensor:', data_test.shape)

# Prepare embedding matrix
print('Preparing embedding matrix.')
num_words = min(MAX_NB_WORDS, len(word_index)+1)
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
for word, i in word_index.items():
    if i >= MAX_NB_WORDS:
        continue
    embedding_vector = embeddings_index.get(word)
    #print i
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector


def build_model_text(num_inputs, num_lags, num_feat, num_preds):
    input_lags = Input(shape=(num_lags,))
    input_events = Input(shape=(num_feat,))
    
    x_lags = Concatenate(axis=1)([input_lags, input_events])
    #x_lags = BatchNormalization()(x_lags)
    
    x = x_lags
    x = BatchNormalization()(x)
    x = Dense(units=100, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.05))(x)
    #x = Dense(units=50, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.1))(x)
    #x = BatchNormalization()(x)
    #x = Dropout(0.5)(x)
    #x = BatchNormalization()(x)
    #x = Dense(units=50, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.1))(x)
    #x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x_lags = BatchNormalization()(x)
    #x_lags = x
    
    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedding_layer = Embedding(num_words,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=True)
    embedded_sequences = embedding_layer(sequence_input)
    x = Conv1D(50, 3, activation='relu')(embedded_sequences)
    x = MaxPooling1D(3)(x)
    x = Dropout(0.5)(x)
    x = Conv1D(30, 3, activation='relu')(x)
    x = MaxPooling1D(3)(x)
    x = Dropout(0.5)(x)
    x = Conv1D(30, 5, activation='relu')(x)
    x = MaxPooling1D(5)(x)
    x = Dropout(0.5)(x)
    #x = Conv1D(50, 5, activation='relu')(x)
    #x = MaxPooling1D(5)(x)
    text_embedding = Flatten()(x)
    #text_embedding = Dropout(0.5)(text_embedding)
    #text_embedding = Dense(units=100, activation='relu')(text_embedding)
    #text_embedding = Dropout(0.5)(text_embedding)
    
    print("text_embedding:", text_embedding)
    #temp1 = Permute([1,2])(RepeatVector(180)(BatchNormalization()(text_embedding)))
    temp1 = Reshape((1,180))(text_embedding)
    temp1 = Permute([2,1])(temp1)
    print("temp1:", temp1)
    
    print("x_lags:", x_lags)
    temp2 = Permute([1,2])(RepeatVector(180)(BatchNormalization()(x_lags)))
    temp2 = Dropout(0.5)(temp2)
    print("temp2:", temp2)
    
    temp = Concatenate(axis=2)([temp1, temp2])
    print("concatenated:", temp)
    temp = Dense(1, activation="tanh")(temp)
    #temp = Permute([2,1])(temp)
    print("after tanh:", temp)
    temp = Reshape((180,))(temp)
    temp = BatchNormalization()(temp)
    print(temp)
    attention_probs = Activation("softmax")(temp)
    #print fail

    #attention_probs = Dense(180, activation='softmax', name='attention_vec')(text_embedding)
    #attention_mul = merge([text_embedding, attention_probs], output_shape=180, name='attention_mul', mode='mul')
    attention_mul = Concatenate([text_embedding, attention_probs])
    attention_mul = BatchNormalization()(attention_mul)
    #attention_mul = Dropout(0.5)(attention_mul)
    
    feat = Concatenate(axis=1)([x_lags, attention_mul])
    
    feat = BatchNormalization()(feat)
    #feat = Dense(units=50, activation='relu')(feat)
    #feat = Dropout(0.5)(feat)
    
    preds = Dense(units=num_preds)(feat)
    #preds = Dense(units=num_preds, kernel_regularizer=keras.regularizers.l2(0.2))(feat)
    preds = Activation("linear")(preds)
    
    model = Model([input_lags, input_events, sequence_input], preds)
    
    rmsp = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
    #model.compile(loss="mse", optimizer=rmsp)
    model.compile(loss="mse", optimizer="adam")
    
    return model, input_lags, preds


print("\nrunning MLP with lags + weather + events + late + text...")

# checkpoint best model
checkpoint = ModelCheckpoint("weights.best.hdf5", monitor='val_loss', verbose=0, save_best_only=True, mode='min')

# fit model to the mean
model, input_lags, preds = build_model_text(1, NUM_LAGS+len(sel), 4, 1)
model.fit(
    [np.concatenate([lags_train, weather_feats_train[:,sel]], axis=1), 
     np.concatenate([event_feats_train[:,:]], axis=1),
     data_train],
    y_train,
    batch_size=64,
    epochs=700,
    #validation_split=0.2,
    validation_data=([np.concatenate([lags_val, weather_feats_val[:,sel]], axis=1), 
                      np.concatenate([event_feats_val[:,:]], axis=1),
                      data_val], y_val),
    callbacks=[checkpoint],
    verbose=0)   

print("Total number of iterations:  ", len(model.history.history["loss"]))
print("Best loss at iteratation:    ", np.argmin(model.history.history["loss"]), "   Best:", np.min(model.history.history["loss"]))
print("Best val_loss at iteratation:", np.argmin(model.history.history["val_loss"]), "   Best:", np.min(model.history.history["val_loss"]))

# load weights
model.load_weights("weights.best.hdf5")

print(model.evaluate([np.concatenate([lags_test[:,:], weather_feats_test[:,sel]], axis=1), 
                      np.concatenate([event_feats_test[:,:]], axis=1),
                      data_test],
                      y_test, verbose=2))

# make predictions
preds_lstm = model.predict([np.concatenate([lags_test[:,:], weather_feats_test[:,sel]], axis=1), 
                            np.concatenate([event_feats_test[:,:]], axis=1),
                            data_test])
preds_lstm = preds_lstm[:,0] * std_test + trend_test
y_true = y_test * std_test + trend_test
corr, mae, rae, rmse, rrse, mape, r2 = compute_error(y_true, preds_lstm)
print("MAE:  %.3f\tRMSE: %.3f\tR2:   %.3f" % (mae, rmse, r2))


# ---------------------------------------- MLP with weather + events information (no text) + event_lags + TEXT

def build_model_text_v2(num_inputs, num_lags, num_feat, num_preds):
    input_lags = Input(shape=(num_lags,))
    input_events = Input(shape=(num_feat,))
    
    x_lags = Concatenate(axis=1)([input_lags, input_events])
    #x_lags = BatchNormalization()(x_lags)
    
    x = x_lags
    x = BatchNormalization()(x)
    x = Dense(units=100, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.05))(x)
    #x = Dense(units=50, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.1))(x)
    #x = BatchNormalization()(x)
    #x = Dropout(0.5)(x)
    #x = BatchNormalization()(x)
    #x = Dense(units=50, activation="tanh", kernel_regularizer=keras.regularizers.l2(0.1))(x)
    #x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x_lags = BatchNormalization()(x)
    #x_lags = x
    
    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedding_layer = Embedding(num_words,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=True)
    embedded_sequences = embedding_layer(sequence_input)
    x = Conv1D(50, 3, activation='relu')(embedded_sequences)
    x = MaxPooling1D(3)(x)
    x = Dropout(0.5)(x)
    x = Conv1D(30, 3, activation='relu')(x)
    x = MaxPooling1D(3)(x)
    x = Dropout(0.5)(x)
    x = Conv1D(30, 5, activation='relu')(x)
    x = MaxPooling1D(5)(x)
    x = Dropout(0.5)(x)
    #x = Conv1D(50, 5, activation='relu')(x)
    #x = MaxPooling1D(5)(x)
    text_embedding = Flatten()(x)
    #text_embedding = Dropout(0.5)(text_embedding)
    #text_embedding = Dense(units=100, activation='relu')(text_embedding)
    #text_embedding = Dropout(0.5)(text_embedding)
    
    print("text_embedding:", text_embedding)
    #temp1 = Permute([1,2])(RepeatVector(180)(BatchNormalization()(text_embedding)))
    temp1 = Reshape((1,180))(text_embedding)
    temp1 = Permute([2,1])(temp1)
    print("temp1:", temp1)
    
    print("x_lags:", x_lags)
    temp2 = Permute([1,2])(RepeatVector(180)(BatchNormalization()(x_lags)))
    temp2 = Dropout(0.5)(temp2)
    print("temp2:", temp2)
    
    temp = Concatenate(axis=2)([temp1, temp2])
    print("concatenated:", temp)
    temp = Dense(1, activation="tanh")(temp)
    #temp = Permute([2,1])(temp)
    print("after tanh:", temp)
    temp = Reshape((180,))(temp)
    temp = BatchNormalization()(temp)
    print(temp)
    attention_probs = Activation("softmax")(temp)
    #print fail

    #attention_probs = Dense(180, activation='softmax', name='attention_vec')(text_embedding)
    #attention_mul = merge([text_embedding, attention_probs], output_shape=180, name='attention_mul', mode='mul')
    attention_mul = Concatenate([text_embedding, attention_probs])
    attention_mul = BatchNormalization()(attention_mul)
    #attention_mul = Dropout(0.5)(attention_mul)
    
    feat = Concatenate(axis=1)([x_lags, attention_mul])
    
    feat = BatchNormalization()(feat)
    #feat = Dense(units=50, activation='relu')(feat)
    #feat = Dropout(0.5)(feat)
    
    preds = Dense(units=num_preds)(feat)
    #preds = Dense(units=num_preds, kernel_regularizer=keras.regularizers.l2(0.2))(feat)
    preds = Activation("linear")(preds)
    
    model = Model([input_lags, input_events, sequence_input], preds)
    
    rmsp = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
    #model.compile(loss="mse", optimizer=rmsp)
    model.compile(loss="mse", optimizer="adam")
    
    return model, input_lags, preds


print("\nrunning MLP with lags + weather + events + late + event_lags + text...")

# checkpoint best model
checkpoint = ModelCheckpoint("weights.best.hdf5", monitor='val_loss', verbose=0, save_best_only=True, mode='min')

# fit model to the mean
model, input_lags, preds = build_model_text_v2(1, NUM_LAGS+len(sel), 4+len(sel2), 1)
model.fit(
    [np.concatenate([lags_train, weather_feats_train[:,sel]], axis=1), 
     np.concatenate([event_feats_train[:,:], lags_event_feats_train[:,sel2]], axis=1),
     data_train],
    y_train,
    batch_size=64,
    epochs=700,
    #validation_split=0.2,
    validation_data=([np.concatenate([lags_val, weather_feats_val[:,sel]], axis=1), 
                      np.concatenate([event_feats_val[:,:], lags_event_feats_val[:,sel2]], axis=1),
                      data_val], y_val),
    callbacks=[checkpoint],
    verbose=0)   

print("Total number of iterations:  ", len(model.history.history["loss"]))
print("Best loss at iteratation:    ", np.argmin(model.history.history["loss"]), "   Best:", np.min(model.history.history["loss"]))
print("Best val_loss at iteratation:", np.argmin(model.history.history["val_loss"]), "   Best:", np.min(model.history.history["val_loss"]))

# load weights
model.load_weights("weights.best.hdf5")

print(model.evaluate([np.concatenate([lags_test[:,:], weather_feats_test[:,sel]], axis=1), 
                      np.concatenate([event_feats_test[:,:], lags_event_feats_test[:,sel2]], axis=1),
                      data_test],
                      y_test, verbose=2))

# make predictions
preds_lstm = model.predict([np.concatenate([lags_test[:,:], weather_feats_test[:,sel]], axis=1), 
                            np.concatenate([event_feats_test[:,:], lags_event_feats_test[:,sel2]], axis=1),
                            data_test])
preds_lstm = preds_lstm[:,0] * std_test + trend_test
corr, mae, rae, rmse, rrse, mape, r2 = compute_error(y_true, preds_lstm)
print("MAE:  %.3f\tRMSE: %.3f\tR2:   %.3f" % (mae, rmse, r2))


preparing word embeddings for NNs with text...
Found 965 unique tokens.
Shape of train tensor: (730, 350)
Shape of val tensor: (365, 350)
Shape of test tensor: (170, 350)
Preparing embedding matrix.

running MLP with lags + weather + events + late + text...
text_embedding: KerasTensor(type_spec=TensorSpec(shape=(None, 180), dtype=tf.float32, name=None), name='flatten_5/Reshape:0', description="created by layer 'flatten_5'")
temp1: KerasTensor(type_spec=TensorSpec(shape=(None, 180, 1), dtype=tf.float32, name=None), name='permute_10/transpose:0', description="created by layer 'permute_10'")
x_lags: KerasTensor(type_spec=TensorSpec(shape=(None, 100), dtype=tf.float32, name=None), name='batch_normalization_28/batchnorm/add_1:0', description="created by layer 'batch_normalization_28'")
temp2: KerasTensor(type_spec=TensorSpec(shape=(None, 180, 100), dtype=tf.float32, name=None), name='dropout_36/Identity:0', description="created by layer 'dropout_36'")
concatenated: KerasTensor(type_spec=Te

ValueError: Input has undefined rank. Received: input_shape=<unknown>.

In [82]:
from keras.layers import merge