In [40]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
from modeling_helper import transform_xy
from sklearn.preprocessing import StandardScaler

In [4]:
flights = pd.read_csv('/Users/user/Documents/Github/flightstatus/data/dev/flights_processed.csv')

In [5]:
datatrain = flights[flights.MONTH<=6]
dataval = flights[(flights.MONTH>=7)&(flights.MONTH<=9)]
datatest = flights[flights.MONTH<=10]

In [6]:
X_train, y_train = transform_xy(datatrain)
X_val, y_val = transform_xy(dataval)
X_test, y_test = transform_xy(datatest)

In [33]:
n_cols = X_train.shape[1]

In [42]:
ss = StandardScaler()
X_train_s = ss.fit_transform(X_train)
X_val_s = ss.transform(X_val)
X_test_s = ss.transform(X_test)

## Starting model

In [49]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.0001), loss='categorical_crossentropy')
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight={0:1., 1:2.})

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ffb39ea8630>

**class_weight**: used for weighting the loss function during training only.   
Therefore, we see that the val_loss is lower than the train loss.

In [56]:
y_val_pred = model.predict(X_val_s)
y_val_pred_abs = (y_val_pred[:,1] > 0.5)*1
print(classification_report(y_val[:,1], y_val_pred_abs))

             precision    recall  f1-score   support

        0.0       0.81      0.33      0.47   1004446
        1.0       0.38      0.84      0.52    491754

avg / total       0.67      0.50      0.49   1496200



## Increase model capacity: more layers

In [57]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.0001), loss='categorical_crossentropy')
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight={0:1., 1:2.})

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


<keras.callbacks.History at 0x7ffb7e49f7b8>

In [58]:
y_val_pred = model.predict(X_val_s)
y_val_pred_abs = (y_val_pred[:,1] > 0.5)*1
print(classification_report(y_val[:,1], y_val_pred_abs))

             precision    recall  f1-score   support

        0.0       0.78      0.39      0.52   1004446
        1.0       0.39      0.78      0.52    491754

avg / total       0.65      0.52      0.52   1496200



## Increase model capacity: more nodes

In [61]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(1000, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.0001), loss='categorical_crossentropy')
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight={0:1., 1:2.}, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


<keras.callbacks.History at 0x7ffaa7120d68>

In [63]:
y_val_pred = model.predict(X_val_s)
y_val_pred_abs = (y_val_pred[:,1] > 0.5)*1
print(classification_report(y_val[:,1], y_val_pred_abs))

             precision    recall  f1-score   support

        0.0       0.80      0.43      0.56   1004446
        1.0       0.40      0.78      0.53    491754

avg / total       0.67      0.54      0.55   1496200



## Increase model capacity: more layers

In [65]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(1000, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(200, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.0001), loss='categorical_crossentropy')
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight={0:1., 1:2.}, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


<keras.callbacks.History at 0x7ffaa39840b8>

In [67]:
y_val_pred = model.predict(X_val_s)
y_val_pred_abs = (y_val_pred[:,1] > 0.5)*1
print(classification_report(y_val[:,1], y_val_pred_abs))

             precision    recall  f1-score   support

        0.0       0.66      0.04      0.08   1004446
        1.0       0.33      0.95      0.49    491754

avg / total       0.55      0.34      0.21   1496200



**The validation loss did not improve in the last round. We will decrease the model capacity**

In [71]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = (n_cols,))) 
model.add(Dropout(rate = 0.8))
model.add(Dense(1000, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer=SGD(lr=0.0001), loss='categorical_crossentropy')
model.fit(X_train_s, y_train, callbacks = [early_stopping_monitor], validation_data=(X_val_s, y_val),
         batch_size=64, class_weight={0:1., 1:2.}, epochs=50)

Train on 2889506 samples, validate on 1496200 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


<keras.callbacks.History at 0x7ffaa2413438>

In [73]:
y_val_pred = model.predict(X_val_s)
y_val_pred_abs = (y_val_pred[:,1] > 0.5)*1
print(classification_report(y_val[:,1], y_val_pred_abs))

             precision    recall  f1-score   support

        0.0       0.79      0.44      0.56   1004446
        1.0       0.40      0.77      0.53    491754

avg / total       0.66      0.54      0.55   1496200



In [None]:
model.save('neuralnetwork.h5')