In [15]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from sklearn.metrics import fbeta_score, precision_recall_curve, auc, roc_curve
from modeling_helper import transform_xy
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [2]:
flights = pd.read_csv('/Users/user/Documents/Github/flightstatus/data/dev/flights_processed.csv')

The dataset includes US flights in 2015. We will use flights 
- from January to June for training
- from July to September for validation
- October to December for test.

In [3]:
datatrain = flights[flights.MONTH<=6]
dataval = flights[(flights.MONTH>=7)&(flights.MONTH<=9)]
datatest = flights[flights.MONTH<=10]

In [4]:
X_train, y_train = transform_xy(datatrain)
X_val, y_val = transform_xy(dataval)
X_test, y_test = transform_xy(datatest)

In [5]:
n_cols = X_train.shape[1]

In [6]:
ss = StandardScaler()
X_train_s = ss.fit_transform(X_train)
X_val_s = ss.transform(X_val)
X_test_s = ss.transform(X_test)

In [7]:
# set up early stopping and class weight
early_stopping_monitor = EarlyStopping(patience=5)
spw = (sum(y_train==0) + sum(y_val==0))/(sum(y_train==1)+sum(y_val==1))
cw = {0:1., 1:spw[1]} # This gives the positive class 1.9 times the weight of the negative class

### Starting model

In [15]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.001), loss='categorical_crossentropy', metrics=[tf.keras.metrics.AUC(curve='PR')])
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight=cw, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50


<keras.callbacks.callbacks.History at 0x7f7cb8424610>

__Note__: `class_weight` is used for weighting the loss function during training only. Therefore, we see that the validation loss is lower than the train loss.

In [19]:
y_train_prob = model.predict(X_train_s)[:,1]
y_val_prob = model.predict(X_val_s)[:,1]

In [22]:
precision, recall, thresholds = precision_recall_curve(y_train[:,1], y_train_prob)
auc(recall, precision)

0.5043923450759831

In [23]:
precision, recall, thresholds = precision_recall_curve(y_val[:,1], y_val_prob)
auc(recall, precision)

0.476082238189684

### Increase model capacity
- adding 2 fully connected layers with 100 nodes

In [24]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.001), loss='categorical_crossentropy', metrics=[tf.keras.metrics.AUC(curve='PR')])
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight=cw, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


<keras.callbacks.callbacks.History at 0x7f7cb87a8cd0>

In [25]:
y_train_prob = model.predict(X_train_s)[:,1]
y_val_prob = model.predict(X_val_s)[:,1]

In [26]:
precision, recall, thresholds = precision_recall_curve(y_train[:,1], y_train_prob)
auc(recall, precision)

0.5030946071485047

In [27]:
precision, recall, thresholds = precision_recall_curve(y_val[:,1], y_val_prob)
auc(recall, precision)

0.4804807127748579

In [43]:
model.save('neuralnetwork.h5')

### Increase model capacity
- adding to 500 nodes in the middle layers

In [8]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(100, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.001), loss='categorical_crossentropy', metrics=[tf.keras.metrics.AUC(curve='PR')])
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight=cw, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50


<keras.callbacks.callbacks.History at 0x7fcd5589fa90>

In [9]:
y_train_prob = model.predict(X_train_s)[:,1]
y_val_prob = model.predict(X_val_s)[:,1]

In [10]:
precision, recall, thresholds = precision_recall_curve(y_train[:,1], y_train_prob)
auc(recall, precision)

0.5052536719926125

In [11]:
precision, recall, thresholds = precision_recall_curve(y_val[:,1], y_val_prob)
auc(recall, precision)

0.4749822763163157

In [12]:
model.save('neuralnetwork.h5')

### Increase model capacity
- adding 2 more fully connected layers of 500 nodes
- increase the middle layer's nodes to 1000

In [32]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(100, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.001), loss='categorical_crossentropy', metrics=[tf.keras.metrics.AUC(curve='PR')])
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight=cw, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


<keras.callbacks.callbacks.History at 0x7f7cb8801fa0>

In [33]:
yy_train_prob = model.predict(X_train_s)[:,1]
y_val_prob = model.predict(X_val_s)[:,1]

In [34]:
precision, recall, thresholds = precision_recall_curve(y_train[:,1], y_train_prob)
auc(recall, precision)

0.5024298687072728

In [35]:
precision, recall, thresholds = precision_recall_curve(y_val[:,1], y_val_prob)
auc(recall, precision)

0.4766038409793225

**The validation loss did not improve in the last round. We will decrease the model capacity by removing a layer.**

In [36]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(100, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.001), loss='categorical_crossentropy', metrics=[tf.keras.metrics.AUC(curve='PR')])
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight=cw, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50


<keras.callbacks.callbacks.History at 0x7f7cb887ce80>

### Make predictions

In [13]:
y_train_prob = model.predict(X_train_s)[:,1]
y_val_prob = model.predict(X_val_s)[:,1]
y_test_prob = model.predict(X_test_s)[:,1]

### ROC

In [16]:
fpr, tpr, thresholds = roc_curve(y_train[:,1], y_train_prob)
auc(fpr, tpr)

0.6633633772331589

In [17]:
fpr, tpr, thresholds = roc_curve(y_val[:,1], y_val_prob)
auc(fpr, tpr)

0.6622402962712145

In [18]:
fpr, tpr, thresholds = roc_curve(y_test[:,1], y_test_prob)
auc(fpr, tpr)

0.6628200490374877

### Precision-Recall Curve

In [19]:
precision, recall, thresholds = precision_recall_curve(y_train[:,1], y_train_prob)
auc(recall, precision)

0.5052536719926125

In [20]:
precision, recall, thresholds = precision_recall_curve(y_val[:,1], y_val_prob)
auc(recall, precision)

0.4749822763163157

In [21]:
precision, recall, thresholds = precision_recall_curve(y_test[:,1], y_test_prob)
auc(recall, precision)

0.49407089543558397

### Reload the final model

In [6]:
#reload the model
m_nn = load_model('neuralnetwork.h5')