# Nueral Network
This is another model we are testing for our application. A nueral network may perform better than an XG Boost model, so we will load the same data and see if we can get better results.

In [22]:
# required imports
import pandas as pd
import numpy as np
import gc
from sklearn.model_selection import train_test_split

# tf imports
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow_addons.optimizers import RectifiedAdam, Lookahead
from sklearn.base import BaseEstimator
from tensorflow_addons.activations import gelu
from keras.callbacks import ModelCheckpoint


# sklearn imports
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, RobustScaler, OrdinalEncoder, FunctionTransformer, QuantileTransformer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


## Model
Now we will build a customized and adaptable nueral network with keras. Since the input size can vary based on the attribute we are training on, we need to be able to change it based on the input shape. We preset the chosen hyperparemteres such as loss functions and activation functions. 

The first function "create_model" will allow us to build a model with the proper number of layers we want as well as the correct input sizes. This is called by the "KerasModel" function whic

In [11]:
def create_model(grid_params, in_dim, out_dim, patience=20, loss='rmse', activation='sigmoid'):
    
    mul_input = grid_params['mul_input']
    n_layer = grid_params['n_layer']
    
    first_layer_size = int(in_dim*mul_input)
    hidden_layers = []
    for i_layer in range(n_layer, 0, -1):
        layer_size = int(((first_layer_size - out_dim) / n_layer) * i_layer + out_dim)
        hidden_layers.append(layer_size)

    print("Input dim:" + str(in_dim))
    print("Hidden Layers:" + str(hidden_layers))
    print("Output dim:" + str(out_dim))

    model = Sequential()
    
    model.add(Dense(in_dim,input_shape=[in_dim],activation=gelu))
    #model.add(BatchNormalization())
    model.add(Dropout(.5))
    
    for layer in hidden_layers:
        model.add(Dense(layer,activation=gelu))
        #model.add(BatchNormalization())
        model.add(Dropout(.5))
    
    model.add(Dense(out_dim, activation=activation))
    
    radam = RectifiedAdam()
    ranger = Lookahead(radam, sync_period=6, slow_step_size=0.5)
    optimizer = ranger#Adam(learning_rate=0.001)
    
    es = EarlyStopping(monitor='val_loss', verbose=1, mode='min', patience=patience, restore_best_weights=True)
    es.set_model(model)

    model.compile(optimizer=optimizer, loss=[loss], metrics=[])
    
    return model, [ es ]

In [30]:
class KerasModel(BaseEstimator):

    def __init__(
        self, 
        n_layer=1, 
        mul_input=1.75, 
        patience=5,
        batch_size=32,
        loss='msle',
        activation='sigmoid'
        ):
        self._estimator_type = 'reg' 
        self.n_layer = n_layer
        self.mul_input = mul_input
        self.patience = patience
        self.loss = loss
        self.activation = activation
        self.batch_size = batch_size
        #self.__name__ = self._wrapped_obj.__class__.__name__ + "PredictWrapper"

    def __repr__(self):
        if not hasattr(self, 'model'):
            return "Empty"
        return self.model.__repr__()

    def __str__(self):
        if not hasattr(self, 'model'):
            return "Empty"
        return self.model.__str__()
        
    def fit(self, X, Y, x_val, y_val):
        model, cbs = create_model(
            self.get_params(),
            X.shape[1],
            Y.shape[0],
            patience=self.patience,
            loss=self.loss,
            activation=self.activation
        )
#         X_train, X_valid, y_train, y_valid = train_test_split(X, Y, test_size=0.2, random_state=42, shuffle=True)
        self.model = model
        self.model.fit(X_train,y_train, batch_size=self.batch_size,epochs=10000, validation_data=[X_val,y_val], verbose=2, callbacks=cbs)
        return self

    def predict(self, *args, **kwargs):
        return self.model.predict(*args, **kwargs)

In [20]:
# load data
# This is the same loading code as before, so refer to the other notebooks for reference.
full = pd.read_csv('./data/features_v3.csv.gz')
full.head()

full['random'] = np.random.rand(len(full))

TRAIN_SAMPLE_SIZE = 0.75

train = full[full.Istrain == 1]
test = full[full.Istrain == 0]

column_stats = pd.concat([
    pd.DataFrame(full.count()).rename(columns={0: 'cnt'}),
    pd.DataFrame(full.nunique()).rename(columns={0: 'unique'}),
], sort=True, axis=1)
column_stats.sort_values(by='unique')

train_columns = list(column_stats[column_stats.cnt < 10 ** 6].index)
print(train_columns)

target_columns = [
    'TotalTimeStopped_p20',
    'TotalTimeStopped_p50',
    'TotalTimeStopped_p80',
    'DistanceToFirstStop_p20',
    'DistanceToFirstStop_p50',
    'DistanceToFirstStop_p80',
]

do_not_use = train_columns + ['IsTrain', 'Path', 'RowId', 'IntersectionId',
                              'random', 'intersection_random', 'ValidationGroup']

feature_columns = [c for c in full.columns if c not in do_not_use]
print(len(feature_columns))
print(feature_columns)

['DistanceToFirstStop_p20', 'DistanceToFirstStop_p40', 'DistanceToFirstStop_p50', 'DistanceToFirstStop_p60', 'DistanceToFirstStop_p80', 'TimeFromFirstStop_p20', 'TimeFromFirstStop_p40', 'TimeFromFirstStop_p50', 'TimeFromFirstStop_p60', 'TimeFromFirstStop_p80', 'TotalTimeStopped_p20', 'TotalTimeStopped_p40', 'TotalTimeStopped_p50', 'TotalTimeStopped_p60', 'TotalTimeStopped_p80']
35
['City', 'EntryHeading', 'EntryStreetName', 'ExitHeading', 'ExitStreetName', 'Hour', 'Istrain', 'Latitude', 'Longitude', 'Month', 'Weekend', 'Latitude3', 'Longitude3', 'EntryStreetMissing', 'ExitStreetMissing', 'CMWH', 'DiffHeading', 'Rainfall', 'Temperature', 'EntryType', 'ExitType', 'Intersection', 'SameStreet', 'LatitudeDist', 'LongitudeDist', 'CenterDistL1', 'CenterDistL2', 'Longitude3Count', 'Latitude3Count', 'ExitStreetNameCount', 'EntryStreetNameCount', 'IntersectionCount', 'Longitude3UniqueIntersections', 'Latitude3UniqueIntersections', 'ExitStreetNameUniqueIntersections']


## Training
First we will initialize the model with the chosen hyperparameters. Then we will train it on the specific attributes and save the best model, and finally we will run validation.

In [31]:
model = KerasModel(n_layer=3, mul_input=8, batch_size=1024, patience=10, activation=None, loss='mse')

We will loop through the target columns and train the model iteratively on each. We will also save the best model each time.

In [None]:
for i, target in enumerate(target_columns):
    print(f'Training and predicting for target {target}')
    train_idx = train.random < TRAIN_SAMPLE_SIZE
    valid_idx = train.random >= TRAIN_SAMPLE_SIZE

    Xtr = train[train_idx][feature_columns]
    Xv = train[valid_idx][feature_columns]
    ytr = train[train_idx][target].values
    yv = train[valid_idx][target].values
    print(Xtr.shape, ytr.shape, Xv.shape, yv.shape)

    mc = ModelCheckpoint('.models/nn.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
    
    history = model.fit(Xtr, ytr, Xv, yv)

Training and predicting for target TotalTimeStopped_p20
(642086, 35) (642086,) (214301, 35) (214301,)
Input dim:35
Hidden Layers:[280, 214215, 428150]
Output dim:642086


In [None]:
# validation code
Y_test = pipeline.predict(X_test)
res_df = pd.DataFrame(data=Y_test, columns=targets)
res_df['RowId'] = X_test['RowId']