In [1]:
from sys import version
import tensorflow
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

print(version)

Using TensorFlow backend.
3.6.2 |Anaconda custom (64-bit)| (default, Sep 21 2017, 18:29:43) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]


In [2]:
# Load and prepare training and xval data
TRAINING_FILE, XVAL_FILE = "../combined_data/15min/train.tsv.gz", "../combined_data/15min/xval.tsv.gz"
train, xval = pd.read_csv(TRAINING_FILE, sep='\t'), pd.read_csv(XVAL_FILE, sep='\t')
print(f'Training dimension: {train.shape}')
print(f'Xval dimension: {xval.shape}')
print('\n'.join(train.columns))
train.head(n=2).T

Training dimension: (542954, 20)
Xval dimension: (67876, 20)
opd_date
trip_start_hr_15
rte
dir
day_of_week
is_ns
is_rapid
is_weekend
orca_total
frac_disabled
frac_youth
frac_senior
frac_li
frac_uw
ons
region
start
end
type
summer


Unnamed: 0,0,1
opd_date,2019-01-07,2019-01-07
trip_start_hr_15,00_0,00_0
rte,1,2
dir,N,N
day_of_week,0,0
is_ns,1,1
is_rapid,0,0
is_weekend,0,0
orca_total,2,3
frac_disabled,0.0666667,0.00641026


In [3]:

X_NUM_COLS = [
    'orca_total', 'frac_disabled', 'frac_youth', 'frac_senior', 'frac_li', 'frac_uw'
]
X_CAT_COLS = ['is_ns', 'is_rapid', 'is_weekend', 'trip_start_hr_15', 'rte', 'dir', 'day_of_week', 'region', 'start', 'end', 'summer']


label_encoders = {col: LabelEncoder() for col in X_CAT_COLS}
one_hot_encoder = OneHotEncoder()
scaler = StandardScaler()

X_train = np.concatenate((
    scaler.fit_transform(train[X_NUM_COLS]),
    one_hot_encoder.fit_transform(
        np.stack([label_encoders[col].fit_transform(train[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)
X_xval = np.concatenate((
    scaler.transform(xval[X_NUM_COLS]),
    one_hot_encoder.transform(
        np.stack([label_encoders[col].transform(xval[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)

y_train = train['ons']
y_xval = xval['ons']

column_labels = list()
column_labels.extend(X_NUM_COLS)
for cat in X_CAT_COLS:
    for clazz in label_encoders[cat].classes_:
        column_labels.append(f'{cat}: {clazz}')

In [4]:
assert len(column_labels) == X_train.shape[1]

for i, label in enumerate(column_labels):
    print(f'{label}: {np.squeeze(X_train[:5, i])}')

orca_total: [[-0.76657383 -0.72243036 -0.45756958 -0.85486075 -0.6782869 ]]
frac_disabled: [[ 1.36376967 -0.41035794 -0.54689108 -0.59909492  0.01430026]]
frac_youth: [[-0.50030876 -0.50030876 -0.50030876 -0.50030876 -0.50030876]]
frac_senior: [[-0.53708954 -0.53708954 -0.53708954 -0.53708954 -0.53708954]]
frac_li: [[-0.65231668 -0.30054282 -0.31176964 -0.65231668 -0.65231668]]
frac_uw: [[-0.4245457  -0.4245457  -0.2131194  -0.4245457  -0.01050253]]
is_ns: 0.0: [[0. 0. 0. 0. 0.]]
is_ns: 1.0: [[1. 1. 1. 1. 1.]]
is_rapid: 0.0: [[1. 1. 1. 1. 1.]]
is_rapid: 1.0: [[0. 0. 0. 0. 0.]]
is_weekend: 0.0: [[1. 1. 1. 1. 1.]]
is_weekend: 1.0: [[0. 0. 0. 0. 0.]]
trip_start_hr_15: 00_0: [[1. 1. 1. 1. 1.]]
trip_start_hr_15: 00_15: [[0. 0. 0. 0. 0.]]
trip_start_hr_15: 00_30: [[0. 0. 0. 0. 0.]]
trip_start_hr_15: 00_45: [[0. 0. 0. 0. 0.]]
trip_start_hr_15: 01_0: [[0. 0. 0. 0. 0.]]
trip_start_hr_15: 01_15: [[0. 0. 0. 0. 0.]]
trip_start_hr_15: 01_30: [[0. 0. 0. 0. 0.]]
trip_start_hr_15: 01_45: [[0. 0. 0. 0.

In [27]:
# Attempt 1: long thin neural net
xval_perf = list()
for n_middle_layers in (1,2,4,8,10):
    print(f"{n_middle_layers} middle layers")
    model = Sequential()
    model.add(Dense(4, activation="linear", input_dim=426))
    model.add(Dropout(0.5))
    for i in range(n_middle_layers-1):
        model.add(Dense(4, activation='sigmoid'))
        model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
    model.fit(X_train, y_train, epochs=10, batch_size=256)
    xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
    xval_perf.append(xval_mae)
    print(xval_mae)

1 middle layers
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
9.860205568023265
2 middle layers
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
15.237501715748275
4 middle layers
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
17.95892187204293
8 middle layers
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
22.89282713942961
10 middle layers
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
22.887278550135814


In [28]:
xval_perf   # 9.86 is pretty good

[9.860205568023265,
 15.237501715748275,
 17.95892187204293,
 22.89282713942961,
 22.887278550135814]

In [46]:
# transfer learning approach. Train a neural net to classify up to n clusters.
from sklearn.cluster import KMeans
from numpy.random import uniform, seed

n_clusters = 24
# Best I could do at 6: 21 mae on training

seed(10)
X_samp = X_train[uniform(size=X_train.shape[0]) < 0.1,:]
kmeans = KMeans(n_clusters, n_jobs=-1)
cluster_preds = kmeans.fit_predict(X_samp)
X_classes = kmeans.predict(X_train)

In [47]:
pd.Series(X_classes).value_counts()

3     44631
13    39301
1     39136
19    37222
0     33689
4     33535
12    31647
14    31437
17    30908
9     30571
11    28349
16    22873
6     20808
2     20480
20    19412
5     15292
22    11544
15     9966
18     9062
7      8623
8      7331
23     6517
21     5576
10     5044
dtype: int64

In [49]:
from keras.utils import np_utils

classifier = Sequential()
classifier.add(Dense(32, activation="relu", input_dim=X_train.shape[1]))
classifier.add(Dropout(0.5))
classifier.add(Dense(n_clusters, activation='sigmoid'))
classifier.add(Dropout(0.5))
classifier.add(Dense(n_clusters, activation='sigmoid'))
classifier.add(Dropout(0.5))
classifier.add(Dense(n_clusters, activation='softmax'))

classifier.compile(
    loss='binary_crossentropy', 
    #optimizer='adam',
    optimizer=SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True),
    metrics=['accuracy']
)
classifier.fit(
    X_train, 
    np_utils.to_categorical(X_classes), 
    epochs=5, 
    batch_size=256)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1a47b82dd8>

In [50]:

# predictor = Sequential()
# predictor.add(Dense(4, activation="relu", input_dim=n_clusters))
# predictor.add(Dropout(0.5))
# predictor.add(Dense(4, activation="sigmoid"))
# predictor.add(Dropout(0.5))
# predictor.add(Dense(4, activation="sigmoid"))
# predictor.add(Dropout(0.5))
# predictor.add(Dense(1, activation="linear"))
# 
# predictor.compile(
#     loss='mean_absolute_error', 
#     optimizer=SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True), 
#     metrics=['mean_squared_error']
# )

In [52]:
#_train_fwd = classifier.predict(X_train)  # (n, n_clusters)
#predictor.fit(X_train_fwd, y_train, epochs=15, batch_size=256)

In [53]:
X_train_nums = X_train[:,:len(X_NUM_COLS)]
X_train_fwd = np.concatenate([classifier.predict(X_train), X_train_nums], axis=1)  # (n, n_clusters + len(X_NUM_COLS))
print(X_train_fwd.shape)

(542954, 30)


In [55]:
# So that didn't work the way I wanted. Let's add the numeric variables back in addition to the classifier outputs
predictor = Sequential()
predictor.add(Dense(4, activation="relu", input_dim=X_train_fwd.shape[1]))
predictor.add(Dropout(0.3))
predictor.add(Dense(4, activation="tanh"))
predictor.add(Dropout(0.3))
predictor.add(Dense(2, activation="sigmoid"))
predictor.add(Dropout(0.3))
predictor.add(Dense(2, activation="sigmoid"))
predictor.add(Dropout(0.3))
predictor.add(Dense(1, activation="linear"))
predictor.compile(
    loss='mean_absolute_error', 
    optimizer=SGD(lr=0.05, decay=1e-5, momentum=0.9, nesterov=True), 
    metrics=['mean_squared_error']
)

predictor.fit(X_train_fwd, y_train, epochs=20, batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a5b706630>

In [56]:
forward = np.concatenate([classifier.predict(X_xval), X_xval[:,:len(X_NUM_COLS)]], axis=1)
predictor.evaluate(forward, y_xval, batch_size=128)[0]



19.639980235682316

In [59]:
# Try a smaller predictor network
predictor = Sequential()
predictor.add(Dense(8, activation="linear", input_dim=X_train_fwd.shape[1]))
predictor.add(Dropout(0.4))
predictor.add(Dense(4, activation="tanh"))
predictor.add(Dropout(0.4))
predictor.add(Dense(1, activation="linear"))
predictor.compile(
    loss='mean_absolute_error', 
    optimizer=SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True), 
    metrics=['mean_squared_error']
)

predictor.fit(X_train_fwd, y_train, epochs=20, batch_size=256)

forward = np.concatenate([classifier.predict(X_xval), X_xval[:,:len(X_NUM_COLS)]], axis=1)
predictor.evaluate(forward, y_xval, batch_size=128)[0]

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


14.663555800342216

In [61]:
# Next try again with only a few clusters (4)
# transfer learning approach. Train a neural net to classify up to n clusters.
from sklearn.cluster import KMeans
from numpy.random import uniform, seed
from keras.utils import np_utils

n_clusters = 4
# Best I could do at 6: 21 mae on training

seed(10)
print("K MEANS")
X_samp = X_train[uniform(size=X_train.shape[0]) < 0.1,:]
kmeans = KMeans(n_clusters, n_jobs=-1)
cluster_preds = kmeans.fit_predict(X_samp)
X_classes = kmeans.predict(X_train)

print("CLASSIFIER")
classifier = Sequential()
classifier.add(Dense(32, activation="relu", input_dim=X_train.shape[1]))
classifier.add(Dropout(0.5))
classifier.add(Dense(n_clusters, activation='sigmoid'))
classifier.add(Dropout(0.5))
classifier.add(Dense(n_clusters, activation='sigmoid'))
classifier.add(Dropout(0.5))
classifier.add(Dense(n_clusters, activation='softmax'))

classifier.compile(
    loss='binary_crossentropy', 
    #optimizer='adam',
    optimizer=SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True),
    metrics=['accuracy']
)
classifier.fit(
    X_train, 
    np_utils.to_categorical(X_classes), 
    epochs=5, 
    batch_size=256)

X_train_nums = X_train[:,:len(X_NUM_COLS)]
X_train_fwd = np.concatenate([classifier.predict(X_train), X_train_nums], axis=1)  # (n, n_clusters + len(X_NUM_COLS))
print(X_train_fwd.shape)

print("PREDICTOR")
# Try a smaller predictor network
predictor = Sequential()
predictor.add(Dense(8, activation="linear", input_dim=X_train_fwd.shape[1]))
predictor.add(Dropout(0.4))
predictor.add(Dense(4, activation="tanh"))
predictor.add(Dropout(0.4))
predictor.add(Dense(1, activation="linear"))
predictor.compile(
    loss='mean_absolute_error', 
    optimizer=SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True), 
    metrics=['mean_squared_error']
)

predictor.fit(X_train_fwd, y_train, epochs=20, batch_size=256)

forward = np.concatenate([classifier.predict(X_xval), X_xval[:,:len(X_NUM_COLS)]], axis=1)
predictor.evaluate(forward, y_xval, batch_size=128)[0]

K MEANS
CLASSIFIER
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
(542954, 10)
PREDICTOR
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


15.65815137514376

In [77]:

model = Sequential()
model.add(Dense(64, activation="linear", input_dim=426))
model.add(Dropout(0.5))
model.add(Dense(8, activation='tanh'))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, epochs=10, batch_size=256)

xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(xval_mae)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
13.026481550720122


In [78]:

model = Sequential()
model.add(Dense(64, activation="sigmoid", input_dim=426))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, epochs=5, batch_size=256)

xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(xval_mae)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
7.448609103815916


In [79]:
# Try other activation functions
for act in ['relu', 'tanh', 'linear']:
    print(f"ACTIVATION: {act}")
    model = Sequential()
    model.add(Dense(64, activation=act, input_dim=426))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
    model.fit(X_train, y_train, epochs=5, batch_size=256)
    xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
    print(f"XVAL MAE: {xval_mae}")


ACTIVATION: relu
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
XVAL MAE: 8.229848937734253
ACTIVATION: tanh
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
XVAL MAE: 8.778570294288862
ACTIVATION: linear
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
XVAL MAE: 9.406058834946368


In [80]:
# Sigmoid is best. Experiment with different 1st layer sizes
for sz in (8, 64, 128, 256):
    print(f"1st layer size: {sz}")
    model = Sequential()
    model.add(Dense(sz, activation="sigmoid", input_dim=426))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
    model.fit(X_train, y_train, epochs=5, batch_size=256)
    xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
    print(f"XVAL MAE: {xval_mae}")


1st layer size: 8
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
XVAL MAE: 10.520857586001625
1st layer size: 64
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
XVAL MAE: 7.486555921061234
1st layer size: 128
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
XVAL MAE: 7.211624796315084
1st layer size: 256
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
XVAL MAE: 7.065202585183582


In [81]:
# ok, let's try a few options smaller and larger of 256 larger. With 1 more epoch for an additional boost.
for sz in (200, 512, 600, 1024):
    print(f"1st layer size: {sz}")
    model = Sequential()
    model.add(Dense(sz, activation="sigmoid", input_dim=426))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
    model.fit(X_train, y_train, epochs=6, batch_size=256)
    xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
    print(f"XVAL MAE: {xval_mae}")


1st layer size: 200
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
XVAL MAE: 7.077427285457586
1st layer size: 512
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
XVAL MAE: 7.004053317184714
1st layer size: 600
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
XVAL MAE: 6.979730711146821
1st layer size: 1024
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
XVAL MAE: 7.004750417366187


In [84]:
# So 600 is the best in class so far. 
# What about adding another layer now that I think the combinations are being represented?
    
model = Sequential()
model.add(Dense(600, activation="sigmoid", input_dim=426))
model.add(Dropout(0.5))
model.add(Dense(4, activation="linear"))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, epochs=6, batch_size=256)
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")

Epoch 1/6

KeyboardInterrupt: 

In [85]:
# lol NOPE. What about less dropout?
model = Sequential()
model.add(Dense(600, activation="sigmoid", input_dim=426))
model.add(Dropout(0.3))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=a0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, epochs=6, batch_size=256)
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
XVAL MAE: 6.8486833875029935


In [87]:
# lol NOPE. What about less dropout?
model = Sequential()
model.add(Dense(600, activation="sigmoid", input_dim=426))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, epochs=6, batch_size=256)
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
XVAL MAE: 6.733853124014033


In [88]:

model = Sequential()
model.add(Dense(600, activation="sigmoid", input_dim=426))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.2, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, epochs=8, batch_size=256)
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
XVAL MAE: 6.526257157178217


In [89]:
model.fit(X_train, y_train, epochs=2, batch_size=256)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1a72c22a90>

In [90]:
model.fit(X_train, y_train, epochs=3, batch_size=256)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1a72a50080>

In [91]:
model.fit(X_train, y_train, epochs=2, batch_size=256)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1a72a50048>

In [92]:
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")

XVAL MAE: 6.399070236831684


In [93]:
model.fit(X_train, y_train, epochs=4, batch_size=256)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1a47d61be0>

In [95]:
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")

model.fit(X_train, y_train, epochs=4, batch_size=256)

XVAL MAE: 6.331358876153304
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1a47d79b00>

In [96]:
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")


XVAL MAE: 6.4226108718982


In [97]:
#best fit
model = Sequential()
model.add(Dense(600, activation="sigmoid", input_dim=426))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.2, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, epochs=15, batch_size=256)
xval_mae = model.evaluate(X_xval, y_xval, batch_size=128)[0]
print(f"XVAL MAE: {xval_mae}")

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
XVAL MAE: 6.391801264650083


In [100]:
preds = model.predict(X_xval)
with open("../predictions/nn_15min_xval.txt", 'wt') as out:
    for pred in preds:
        out.write(str(pred)+'\n')


In [104]:
# Ok. Produce predictions from the best model so far, also fit and predict on :
# combined, 30 min (also re-scale model)
train, xval = pd.read_csv('../combined_data/30min/train.tsv.gz', sep='\t'), pd.read_csv("../combined_data/30min/xval.tsv.gz", sep='\t')

X_NUM_COLS = [
    'orca_total', 'frac_disabled', 'frac_youth', 'frac_senior', 'frac_li', 'frac_uw'
]
X_CAT_COLS = ['is_ns', 'is_rapid', 'is_weekend', 'trip_start_hr_30', 'rte', 'dir', 'day_of_week', 'region', 'start', 'end', 'summer']


label_encoders = {col: LabelEncoder() for col in X_CAT_COLS}
one_hot_encoder = OneHotEncoder()
scaler = StandardScaler()

X_train = np.concatenate((
    scaler.fit_transform(train[X_NUM_COLS]),
    one_hot_encoder.fit_transform(
        np.stack([label_encoders[col].fit_transform(train[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)
X_xval = np.concatenate((
    scaler.transform(xval[X_NUM_COLS]),
    one_hot_encoder.transform(
        np.stack([label_encoders[col].transform(xval[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)

y_train = train['ons']
y_xval = xval['ons']

column_labels = list()
column_labels.extend(X_NUM_COLS)
for cat in X_CAT_COLS:
    for clazz in label_encoders[cat].classes_:
        column_labels.append(f'{cat}: {clazz}')

#best fit
model = Sequential()
model.add(Dense(512, activation="sigmoid", input_dim=378))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.2, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=15, batch_size=256)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
XVAL MAE: 7.789570094983333


In [105]:
preds = model.predict(X_xval)
with open("../predictions/nn_30min_xval.txt", 'wt') as out:
    for pred in preds:
        out.write(str(pred)+'\n')



In [111]:
# winter only, 15 min
train, xval = pd.read_csv('../winter_data/aggregates/15min/train.tsv.gz', sep='\t'), pd.read_csv("../winter_data/aggregates/15min/xval.tsv.gz", sep='\t')

X_NUM_COLS = [
    'orca_total', 'frac_disabled', 'frac_youth', 'frac_senior', 'frac_li', 'frac_uw'
]
X_CAT_COLS = ['is_ns', 'is_rapid', 'is_weekend', 'trip_start_hr_15', 'rte', 'dir', 'day_of_week', 'region', 'start', 'end', 'summer']


label_encoders = {col: LabelEncoder() for col in X_CAT_COLS}
one_hot_encoder = OneHotEncoder()
scaler = StandardScaler()

X_train = np.concatenate((
    scaler.fit_transform(train[X_NUM_COLS]),
    one_hot_encoder.fit_transform(
        np.stack([label_encoders[col].fit_transform(train[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)
X_xval = np.concatenate((
    scaler.transform(xval[X_NUM_COLS]),
    one_hot_encoder.transform(
        np.stack([label_encoders[col].transform(xval[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)

y_train = train['ons']
y_xval = xval['ons']

column_labels = list()
column_labels.extend(X_NUM_COLS)
for cat in X_CAT_COLS:
    for clazz in label_encoders[cat].classes_:
        column_labels.append(f'{cat}: {clazz}')

#less aggressive fit for more homogenous data
model = Sequential()
model.add(Dense(600, activation="sigmoid", input_dim=425))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=5, batch_size=256)


Train on 225708 samples, validate on 28498 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1a4690d7b8>

In [112]:
model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=2, batch_size=256)

Train on 225708 samples, validate on 28498 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1a45f9d630>

In [113]:
model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=2, batch_size=256)

Train on 225708 samples, validate on 28498 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1a460cc470>

In [114]:
preds = model.predict(X_xval)
with open("../predictions/nn_15min_winter_xval.txt", 'wt') as out:
    for pred in preds:
        out.write(str(pred)+'\n')


In [116]:
# winter only, 30 min
train, xval = pd.read_csv('../winter_data/aggregates/30min/train.tsv.gz', sep='\t'), pd.read_csv("../winter_data/aggregates/30min/xval.tsv.gz", sep='\t')

X_NUM_COLS = [
    'orca_total', 'frac_disabled', 'frac_youth', 'frac_senior', 'frac_li', 'frac_uw'
]
X_CAT_COLS = ['is_ns', 'is_rapid', 'is_weekend', 'trip_start_hr_30', 'rte', 'dir', 'day_of_week', 'region', 'start', 'end', 'summer']


label_encoders = {col: LabelEncoder() for col in X_CAT_COLS}
one_hot_encoder = OneHotEncoder()
scaler = StandardScaler()

X_train = np.concatenate((
    scaler.fit_transform(train[X_NUM_COLS]),
    one_hot_encoder.fit_transform(
        np.stack([label_encoders[col].fit_transform(train[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)
X_xval = np.concatenate((
    scaler.transform(xval[X_NUM_COLS]),
    one_hot_encoder.transform(
        np.stack([label_encoders[col].transform(xval[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)

y_train = train['ons']
y_xval = xval['ons']

column_labels = list()
column_labels.extend(X_NUM_COLS)
for cat in X_CAT_COLS:
    for clazz in label_encoders[cat].classes_:
        column_labels.append(f'{cat}: {clazz}')

#best fit
model = Sequential()
model.add(Dense(512, activation="sigmoid", input_dim=377))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=15, batch_size=256)

Train on 175074 samples, validate on 22386 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15

KeyboardInterrupt: 

In [117]:
preds = model.predict(X_xval)
with open("../predictions/nn_30min_winter_xval.txt", 'wt') as out:
    for pred in preds:
        out.write(str(pred)+'\n')


In [121]:
# summer only, 15 min
train, xval = pd.read_csv('../summer_data/aggregates/15min/train.tsv.gz', sep='\t'), pd.read_csv("../summer_data/aggregates/15min/xval.tsv.gz", sep='\t')

X_NUM_COLS = [
    'orca_total', 'frac_disabled', 'frac_youth', 'frac_senior', 'frac_li', 'frac_uw'
]
X_CAT_COLS = ['is_ns', 'is_rapid', 'is_weekend', 'trip_start_hr_15', 'rte', 'dir', 'day_of_week', 'region', 'start', 'end', 'summer']


label_encoders = {col: LabelEncoder() for col in X_CAT_COLS}
one_hot_encoder = OneHotEncoder()
scaler = StandardScaler()

X_train = np.concatenate((
    scaler.fit_transform(train[X_NUM_COLS]),
    one_hot_encoder.fit_transform(
        np.stack([label_encoders[col].fit_transform(train[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)
X_xval = np.concatenate((
    scaler.transform(xval[X_NUM_COLS]),
    one_hot_encoder.transform(
        np.stack([label_encoders[col].transform(xval[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)

y_train = train['ons']
y_xval = xval['ons']

column_labels = list()
column_labels.extend(X_NUM_COLS)
for cat in X_CAT_COLS:
    for clazz in label_encoders[cat].classes_:
        column_labels.append(f'{cat}: {clazz}')

#less aggressive fit for more homogenous data
model = Sequential()
model.add(Dense(600, activation="sigmoid", input_dim=416))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=10, batch_size=256)


Train on 316444 samples, validate on 39948 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
 40704/316444 [==>...........................] - ETA: 23s - loss: 7.3036 - mean_absolute_error: 7.3036 - mean_squared_error: 187.1380

KeyboardInterrupt: 

In [122]:
preds = model.predict(X_xval)
with open("../predictions/nn_15min_summer_xval.txt", 'wt') as out:
    for pred in preds:
        out.write(str(pred)+'\n')


In [125]:
# summer only, 30 min
train, xval = pd.read_csv('../summer_data/aggregates/30min/train.tsv.gz', sep='\t'), pd.read_csv("../summer_data/aggregates/30min/xval.tsv.gz", sep='\t')

X_NUM_COLS = [
    'orca_total', 'frac_disabled', 'frac_youth', 'frac_senior', 'frac_li', 'frac_uw'
]
X_CAT_COLS = ['is_ns', 'is_rapid', 'is_weekend', 'trip_start_hr_30', 'rte', 'dir', 'day_of_week', 'region', 'start', 'end', 'summer']


label_encoders = {col: LabelEncoder() for col in X_CAT_COLS}
one_hot_encoder = OneHotEncoder()
scaler = StandardScaler()

X_train = np.concatenate((
    scaler.fit_transform(train[X_NUM_COLS]),
    one_hot_encoder.fit_transform(
        np.stack([label_encoders[col].fit_transform(train[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)
X_xval = np.concatenate((
    scaler.transform(xval[X_NUM_COLS]),
    one_hot_encoder.transform(
        np.stack([label_encoders[col].transform(xval[col]) for col in X_CAT_COLS]).T
    ).todense()
), axis=1)

y_train = train['ons']
y_xval = xval['ons']

column_labels = list()
column_labels.extend(X_NUM_COLS)
for cat in X_CAT_COLS:
    for clazz in label_encoders[cat].classes_:
        column_labels.append(f'{cat}: {clazz}')

#best fit
model = Sequential()
model.add(Dense(512, activation="sigmoid", input_dim=368))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_absolute_error', optimizer=sgd, metrics=['mean_absolute_error', 'mean_squared_error'])
model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=5, batch_size=256)

Train on 248579 samples, validate on 31495 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1a525290f0>

In [126]:
preds = model.predict(X_xval)
with open("../predictions/nn_30min_summer_xval.txt", 'wt') as out:
    for pred in preds:
        out.write(str(pred)+'\n')