# Nueral Network
This is another model we are testing for our application. A nueral network may perform better than an XG Boost model, so we will load the same data and see if we can get better results.

In [22]:
# required imports
import pandas as pd
import numpy as np
import gc
from sklearn.model_selection import train_test_split

# tf imports
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow_addons.optimizers import RectifiedAdam, Lookahead
from sklearn.base import BaseEstimator
from tensorflow_addons.activations import gelu
from keras.callbacks import ModelCheckpoint


# sklearn imports
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, RobustScaler, OrdinalEncoder, FunctionTransformer, QuantileTransformer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


In [15]:
def pre_process(X, Y=None):
    X['IntersectionId'] = X['IntersectionId'].astype('str') + X['City']
    X['city_month'] = X["City"] + X["Month"].astype(str)
    # Creating a new column by mapping the city_month variable to it's corresponding average monthly temperature
    X["average_temp"] = X['city_month'].map(monthly_av)
    # Creating a new column by mapping the city_month variable to it's corresponding average monthly rainfall
    X["average_rainfall"] = X['city_month'].map(monthly_rainfall)
    # Creating a new column by mapping the city_month variable to it's corresponding average monthly snowfall
    X["average_snowfall"] = X['city_month'].map(monthly_snowfall)
    # Creating a new column by mapping the city_month variable to it's corresponding average monthly daylight
    X["average_daylight"] = X['city_month'].map(monthly_daylight)
    # Creating a new column by mapping the city_month variable to it's corresponding average monthly sunshine
    X["average_sunshine"] = X['city_month'].map(monthly_sunshine)
    
    
    X["Center_Latitude"] = X['City'].map(center_latitude)
    X["Center_Longitude"] = X['City'].map(center_longitude)
    X["CenterDistance"] = np.sqrt((X['Latitude'] - X["Center_Latitude"]) ** 2 + (X['Center_Longitude'] - X["Longitude"]) ** 2)
    
    X['SameStreet'] = X['EntryStreetName'] ==  X['ExitStreetName']
    X['SameHeading'] = X['EntryHeading'] ==  X['ExitHeading']
    X['Vector'] = X['EntryHeading'] + X['ExitHeading']
    X['Hour_x'] = np.cos(X['Hour'] * np.pi/12.)
    X['Hour_y'] = np.sin(X['Hour'] * np.pi/12.)
    X['Month_x'] = np.cos(X['Month'] * np.pi/6.)
    X['Month_y'] = np.sin(X['Month'] * np.pi/6.)
    X['is_day'] = 0
    X.iloc[X[(X['Hour'] > 5) & (X['Hour'] < 20)].index, X.columns.get_loc('is_day')] = 1 
    
    for street_dir in ['Entry', 'Exit']:
        data = np.char.lower(X[street_dir + 'Heading'].values.astype('str'))
        # N => Y +1
        # S => Y -1
        # E => X +1
        # W => X -1
        X['NS_' + street_dir] = np.where(np.char.rfind(data, 'N') > -1, 1, 0)
        X['NS_' + street_dir] = np.where(np.char.rfind(data, 'S') > -1, -1, X['NS_' + street_dir].values)
        X['EW_' + street_dir] = np.where(np.char.rfind(data, 'E') > -1, 1, 0)
        X['EW_' + street_dir] = np.where(np.char.rfind(data, 'W') > -1, -1, X['EW_' + street_dir].values)
        X[street_dir + '_Angle'] = X[street_dir + 'Heading'].map(directions)

    X['Angle'] = X['Exit_Angle'] - X['Entry_Angle'] 
    X['x_Angle'] = np.cos(X['Angle'].values)
    X['y_Angle'] = np.sin(X['Angle'].values)

    X['NS'] = X['NS_Exit'] - X['NS_Entry'] 
    X['EW'] = X['EW_Exit'] - X['EW_Entry']
    
    for street_dir in ['Entry', 'Exit']:
        data = np.char.lower(X[street_dir + 'StreetName'].values.astype('str'))
        for type_cat in ['road', 'way', 'street', 'avenue', 'boulevard', 'lane', 'drive', 'terrace', 'place', 'court', 'plaza', 'square']:
            X['Is' + street_dir + type_cat] = np.char.rfind(data, type_cat) > -1
            
    #X = X.drop(columns=['IntersectionId', 'Center_Latitude', 'Center_Longitude', 'city_month', 'Latitude', 'Longitude', 'CenterDistance' ])
    #X = X.drop(columns=['EntryStreetName', 'ExitStreetName' ])

    road_type = []
    for street_dir in ['Entry', 'Exit']:
        for type_cat in ['road', 'way', 'street', 'avenue', 'boulevard', 'lane', 'drive', 'terrace', 'place', 'court', 'plaza', 'square']:
            road_type.append('Is' + street_dir + type_cat)
    
    return X[[
        'CenterDistance',
        'EntryHeading',
        'ExitHeading',
        'NS_Entry',
        'EW_Entry',
        'NS_Exit',
        'EW_Exit',
        'Entry_Angle',
        'Exit_Angle',
        'NS',
        'EW',
        'Angle',
        'x_Angle',
        'y_Angle',
        'is_day',
        'SameStreet',
        'SameHeading',
        'Vector',
        'Hour_x',
        'Hour_y',
        'Month_x',
        'Month_y',
        'City',
        'average_temp',
        'average_rainfall',
        'average_snowfall',
        'average_daylight',
        'average_sunshine',
        *road_type
    ]]

In [11]:
def create_model(grid_params, in_dim, out_dim, patience=20, loss='rmse', activation='sigmoid'):
    
    mul_input = grid_params['mul_input']
    n_layer = grid_params['n_layer']
    
    first_layer_size = int(in_dim*mul_input)
    hidden_layers = []
    for i_layer in range(n_layer, 0, -1):
        layer_size = int(((first_layer_size - out_dim) / n_layer) * i_layer + out_dim)
        hidden_layers.append(layer_size)

    print("Input dim:" + str(in_dim))
    print("Hidden Layers:" + str(hidden_layers))
    print("Output dim:" + str(out_dim))

    model = Sequential()
    
    model.add(Dense(in_dim,input_shape=[in_dim],activation=gelu))
    #model.add(BatchNormalization())
    model.add(Dropout(.5))
    
    for layer in hidden_layers:
        model.add(Dense(layer,activation=gelu))
        #model.add(BatchNormalization())
        model.add(Dropout(.5))
    
    model.add(Dense(out_dim, activation=activation))
    
    radam = RectifiedAdam()
    ranger = Lookahead(radam, sync_period=6, slow_step_size=0.5)
    optimizer = ranger#Adam(learning_rate=0.001)
    
    es = EarlyStopping(monitor='val_loss', verbose=1, mode='min', patience=patience, restore_best_weights=True)
    es.set_model(model)

    model.compile(optimizer=optimizer, loss=[loss], metrics=[])
    
    return model, [ es ]

In [30]:
class KerasModel(BaseEstimator):

    def __init__(
        self, 
        n_layer=1, 
        mul_input=1.75, 
        patience=5,
        batch_size=32,
        loss='msle',
        activation='sigmoid'
        ):
        self._estimator_type = 'reg' 
        self.n_layer = n_layer
        self.mul_input = mul_input
        self.patience = patience
        self.loss = loss
        self.activation = activation
        self.batch_size = batch_size
        #self.__name__ = self._wrapped_obj.__class__.__name__ + "PredictWrapper"

    def __repr__(self):
        if not hasattr(self, 'model'):
            return "Empty"
        return self.model.__repr__()

    def __str__(self):
        if not hasattr(self, 'model'):
            return "Empty"
        return self.model.__str__()
        
    def fit(self, X, Y, x_val, y_val):
        model, cbs = create_model(
            self.get_params(),
            X.shape[1],
            Y.shape[0],
            patience=self.patience,
            loss=self.loss,
            activation=self.activation
        )
#         X_train, X_valid, y_train, y_valid = train_test_split(X, Y, test_size=0.2, random_state=42, shuffle=True)
        self.model = model
        self.model.fit(X_train,y_train, batch_size=self.batch_size,epochs=10000, validation_data=[X_val,y_val], verbose=2, callbacks=cbs)
        return self

    def predict(self, *args, **kwargs):
        return self.model.predict(*args, **kwargs)

In [31]:
model = KerasModel(n_layer=3, mul_input=8, batch_size=1024, patience=10, activation=None, loss='mse')

In [20]:
# load data
full = pd.read_csv('./data/features_v3.csv.gz')
full.head()

full['random'] = np.random.rand(len(full))

TRAIN_SAMPLE_SIZE = 0.75

train = full[full.Istrain == 1]
test = full[full.Istrain == 0]

column_stats = pd.concat([
    pd.DataFrame(full.count()).rename(columns={0: 'cnt'}),
    pd.DataFrame(full.nunique()).rename(columns={0: 'unique'}),
], sort=True, axis=1)
column_stats.sort_values(by='unique')

train_columns = list(column_stats[column_stats.cnt < 10 ** 6].index)
print(train_columns)

target_columns = [
    'TotalTimeStopped_p20',
    'TotalTimeStopped_p50',
    'TotalTimeStopped_p80',
    'DistanceToFirstStop_p20',
    'DistanceToFirstStop_p50',
    'DistanceToFirstStop_p80',
]

do_not_use = train_columns + ['IsTrain', 'Path', 'RowId', 'IntersectionId',
                              'random', 'intersection_random', 'ValidationGroup']

feature_columns = [c for c in full.columns if c not in do_not_use]
print(len(feature_columns))
print(feature_columns)

['DistanceToFirstStop_p20', 'DistanceToFirstStop_p40', 'DistanceToFirstStop_p50', 'DistanceToFirstStop_p60', 'DistanceToFirstStop_p80', 'TimeFromFirstStop_p20', 'TimeFromFirstStop_p40', 'TimeFromFirstStop_p50', 'TimeFromFirstStop_p60', 'TimeFromFirstStop_p80', 'TotalTimeStopped_p20', 'TotalTimeStopped_p40', 'TotalTimeStopped_p50', 'TotalTimeStopped_p60', 'TotalTimeStopped_p80']
35
['City', 'EntryHeading', 'EntryStreetName', 'ExitHeading', 'ExitStreetName', 'Hour', 'Istrain', 'Latitude', 'Longitude', 'Month', 'Weekend', 'Latitude3', 'Longitude3', 'EntryStreetMissing', 'ExitStreetMissing', 'CMWH', 'DiffHeading', 'Rainfall', 'Temperature', 'EntryType', 'ExitType', 'Intersection', 'SameStreet', 'LatitudeDist', 'LongitudeDist', 'CenterDistL1', 'CenterDistL2', 'Longitude3Count', 'Latitude3Count', 'ExitStreetNameCount', 'EntryStreetNameCount', 'IntersectionCount', 'Longitude3UniqueIntersections', 'Latitude3UniqueIntersections', 'ExitStreetNameUniqueIntersections']


In [None]:
for i, target in enumerate(target_columns):
    print(f'Training and predicting for target {target}')
    train_idx = train.random < TRAIN_SAMPLE_SIZE
    valid_idx = train.random >= TRAIN_SAMPLE_SIZE

    Xtr = train[train_idx][feature_columns]
    Xv = train[valid_idx][feature_columns]
    ytr = train[train_idx][target].values
    yv = train[valid_idx][target].values
    print(Xtr.shape, ytr.shape, Xv.shape, yv.shape)

    mc = ModelCheckpoint('.models/nn.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
    
    history = model.fit(Xtr, ytr, Xv, yv)

Training and predicting for target TotalTimeStopped_p20
(642086, 35) (642086,) (214301, 35) (214301,)
Input dim:35
Hidden Layers:[280, 214215, 428150]
Output dim:642086


In [None]:
Y_test = pipeline.predict(X_test)
res_df = pd.DataFrame(data=Y_test, columns=targets)
res_df['RowId'] = X_test['RowId']