In [None]:
import datetime

import pandas as pd
import numpy as np

np.random.seed(0)

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec

from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold, train_test_split, KFold
from sklearn.metrics import f1_score,mean_squared_error

import tensorflow as tf
tf.random.set_seed(
    0
)
from tensorflow.keras.layers import Input, Dense, concatenate,Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import regularizers
import tensorflow_addons as tfa

from catboost import Pool, cv,CatBoostClassifier,CatBoostRegressor

from hyperopt import hp
from hyperopt import fmin, tpe, space_eval

from tqdm import tqdm

In [None]:
train_df = pd.read_csv('train_df_final.csv')
test_df = pd.read_csv('test_df_final.csv')
submission_df = pd.read_csv('sample_submission.csv')

In [None]:
train_df.head()

In [None]:
train_df = train_df.fillna(value=0)
test_df = test_df.fillna(value=0)

# EDA

In [None]:
features = list(filter(lambda each: ('predicted' not in each) and ('anomaly' not in each) and (each != 'label'), train_df.columns))

In [None]:
train = train_df[features+['label']]
test = test_df[features]

In [None]:
plt.figure(figsize=(12,45*4))
gs = gridspec.GridSpec(45, 1)
for i, cn in enumerate(train[features]):
    ax = plt.subplot(gs[i])
    sns.distplot(train[cn][train.label == 1], bins=50)
    sns.distplot(train[cn][train.label == 0], bins=50)
    ax.set_xlabel('')
    ax.set_title('histogram of feature: ' + str(cn))
plt.show()

In [None]:
X = train_df[features]
Y = train_df['label']

In [None]:
def tsne_plot(x1, y1, name="graph.png",random_state=0):
    tsne = TSNE(n_components=2, random_state=random_state)
    X_t = tsne.fit_transform(x1)

    plt.figure(figsize=(12, 8))
    
    plt.scatter(X_t[np.where(y1 == 1), 0], X_t[np.where(y1 == 1), 1], marker='o', color='g', linewidth='1', alpha=0.8, label='Correct')
    plt.scatter(X_t[np.where(y1 == 0), 0], X_t[np.where(y1 == 0), 1], marker='o', color='r', linewidth='1', alpha=0.8, label='Incorrect')
    
    plt.legend(loc='best');
    plt.savefig(name);
    plt.show();
    

In [None]:
%%time
tsne_plot(X, Y, "original.png")

# Auto encoder

In [None]:
scaler = MinMaxScaler()
x_scale = scaler.fit_transform(X)
x_correct, x_incorrect = x_scale[Y == 1], x_scale[Y == 0]

In [None]:
def get_model():
    ## input layer 
    input_layer = Input(shape=(X.shape[1],))

    ## encoding part
    encoded = Dense(100, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer)
    encoded = Dense(50, activation='relu')(encoded)

    ## decoding part
    decoded = Dense(50, activation='tanh')(encoded)
    decoded = Dense(100, activation='tanh')(decoded)

    ## output layer
    output_layer = Dense(X.shape[1], activation='relu')(decoded)
    
    autoencoder = Model(input_layer, output_layer)
    autoencoder.compile(optimizer="adadelta", loss="mse")
    
    return autoencoder

In [None]:
def run_iterations(iterations,size=2000):
    autoencoder = get_model()
    autoencoder.fit(x_correct[:size], x_correct[:size], 
                    batch_size = 256, epochs = iterations, 
                    shuffle = True, validation_split = 0.20,verbose=False)
    
    hidden_representation = Sequential()
    hidden_representation.add(autoencoder.layers[0])
    hidden_representation.add(autoencoder.layers[1])
    hidden_representation.add(autoencoder.layers[2])
    
    correct_hid_rep = hidden_representation.predict(x_correct[:3000])
    incorrect_hid_rep = hidden_representation.predict(x_incorrect)
    
    rep_x = np.append(correct_hid_rep, incorrect_hid_rep, axis = 0)
    y_c = np.ones(correct_hid_rep.shape[0])
    y_i = np.zeros(incorrect_hid_rep.shape[0])
    rep_y = np.append(y_c, y_i)
    tsne_plot(rep_x, rep_y, f"latent_representation_{size}_{iterations}.png")

In [None]:
# for size in range(2_000,14_000,5_000):
#     for it in range(10,101,10):
#         run_iterations(it,size)

In [None]:
autoencoder = get_model()
autoencoder.fit(x_correct, x_correct, 
                batch_size = 256, epochs = 50, 
                shuffle = True, validation_split = 0.20,verbose=False)

In [None]:
hidden_representation = Sequential()
hidden_representation.add(autoencoder.layers[0])
hidden_representation.add(autoencoder.layers[1])
hidden_representation.add(autoencoder.layers[2])

In [None]:
correct_hid_rep = hidden_representation.predict(x_correct)
incorrect_hid_rep = hidden_representation.predict(x_incorrect)

In [None]:
rep_x = np.append(correct_hid_rep, incorrect_hid_rep, axis = 0)
y_c = np.ones(correct_hid_rep.shape[0])
y_i = np.zeros(incorrect_hid_rep.shape[0])
rep_y = np.append(y_c, y_i)

In [None]:
x_test = hidden_representation.predict(scaler.transform(test_df[features]))
 = hidden_representation.predict(x_scale)

In [None]:
params = {
    'loss_function':'Logloss',
    'random_state':0,
    'early_stopping_rounds':50,
    'eval_metric':'F1',
#     'class_weights':class_weights
}

In [None]:
folds = 3
validation_scores = []
models = []

train_preds = np.zeros(train_df.shape[0])
test_preds = np.zeros(test_df.shape[0])
skf = StratifiedKFold(n_splits=folds)
for train_index, test_index in skf.split(rep_x, rep_y):
    X_train, X_test = rep_x[train_index], rep_x[test_index]
    y_train, y_test = rep_y[train_index], rep_y[test_index]

    model = CatBoostClassifier(**params)
    model.fit(X=X_train,y=y_train,eval_set=(X_test,y_test),verbose=10)
    
    validation_score = model.best_score_['validation']['F1']
    print('Validation f1',validation_score)
    validation_scores.append(validation_score)
    models.append(model)
    test_preds += model.predict(x_test)
    train_preds += model.predict(x_train)

In [None]:
stacking_train_df = pd.read_csv('stacking_train_df.csv')
stacking_test_df = pd.read_csv('stacking_test_df.csv')

stacking_train_df['catboost_autoencoder'] = train_preds
stacking_test_df['catboost_autoencoder'] = test_preds

In [None]:
np.mean(validation_scores), np.std(validation_scores)

# Neural network

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto',
    baseline=None, restore_best_weights=True
)

callbacks = [early_stopping]

def get_model(input_size,layers=[40,20,10]):
    input_layer = Input(shape=(input_size,))
    
    X = Dense(layers[0],activation='relu')(input_layer)
    for nodes in layers[1:]:
        X = Dense(nodes, activation='relu')(X)
    output_layer = Dense(1, activation='sigmoid')(X)
    
    model = Model(input_layer, output_layer)
    model.compile(optimizer='adam', 
                  loss=tfa.losses.SigmoidFocalCrossEntropy(),
                  metrics=[tfa.metrics.F1Score(num_classes=2,average='micro')])
    return model

## Without using linear predictions

In [None]:
features = list(filter(lambda each: ('predicted' not in each) and ('anomaly' not in each) and (each != 'label'), train_df.columns))
train = train_df[features+['label']]
test = test_df[features]

In [None]:
X = train_df[features]
y = train_df['label']
scaler = MinMaxScaler()
X_scale = scaler.fit_transform(X)
X_test = scaler.transform(test_df[features])

In [None]:
folds = 3

validation_scores = []
models = []

test_preds = np.zeros(test_df.shape[0])
train_preds = np.zeros(train_df.shape[0])
skf = StratifiedKFold(n_splits=folds)
fold=1
for train_index, test_index in skf.split(X_scale, y):
    print('fold:',fold)
    fold += 1
    
    X_train, X_valid = X_scale[train_index], X_scale[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    model = get_model(X.shape[1],[100,50,50])
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=100,validation_data=(X_valid,y_valid),callbacks=[callbacks])
    
    y_hat = model.predict(X_valid)
    y_hat = np.where(y_hat > 0.5,1,0)
    score = f1_score(y_valid, y_hat, average='micro')
    validation_scores.append(score)
    print('validation score:', score)
    
    preds = model.predict(X_test).reshape(test_preds.shape)
    test_preds += preds
    train_preds += model.predict(X_scale).reshape(train_preds.shape)
    models.append(model)

In [None]:
np.mean(validation_scores), np.std(validation_scores)

In [None]:
stacking_train_df['nn_base'] = train_preds
stacking_test_df['nn_base'] = test_preds

In [None]:
submission_df['prediction'] = np.where(test_preds > 1.5,1,0)
submission_df.to_csv('submission_nn.csv',index=False)

## With linear predictions

In [None]:
features = list(filter(lambda each: ('anomaly' not in each) and (each != 'label'), train_df.columns))

In [None]:
X = train_df[features]
y = train_df['label']
scaler = MinMaxScaler()
X_scale = scaler.fit_transform(X)
X_test = scaler.transform(test_df[features])

In [None]:
folds = 3

validation_scores = []
models = []

test_preds = np.zeros(test_df.shape[0])
train_preds = np.zeros(train_df.shape[0])
skf = StratifiedKFold(n_splits=folds)
fold=1
for train_index, test_index in skf.split(X_scale, y):
    print('fold:',fold)
    fold += 1
    
    X_train, X_valid = X_scale[train_index], X_scale[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    model = get_model(X.shape[1],[50,100,150])
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=100,validation_data=(X_valid,y_valid),callbacks=[callbacks])
    
    y_hat = model.predict(X_valid)
    y_hat = np.where(y_hat > 0.5,1,0)
    score = f1_score(y_valid, y_hat, average='micro')
    validation_scores.append(score)
    print('validation score:', score)
    
    preds = model.predict(X_test).reshape(test_preds.shape)
    test_preds += preds
    train_preds += model.predict(X_scale).reshape(train_preds.shape)
    models.append(model)

In [None]:
stacking_train_df['nn_linear_pred'] = train_preds
stacking_test_df['nn_linear_pred'] = test_preds

In [None]:
np.mean(validation_scores), np.std(validation_scores)

In [None]:
submission_df['prediction'] = np.where(test_preds > 1.5,1,0)
submission_df.to_csv('submission_nn.csv',index=False)

### Hyperparameter tunning

In [None]:
def get_mean_validation_score(params):
    folds = params['folds']
    layer_1 = params['layer_1']
    layer_2 = params['layer_2']
    layer_3 = params['layer_3']
    
    validation_scores = []

    skf = StratifiedKFold(n_splits=folds)
    for train_index, test_index in skf.split(X_scale, y):
        X_train, X_valid = X_scale[train_index], X_scale[test_index]
        y_train, y_valid = y[train_index], y[test_index]
        model = get_model(X.shape[1],[layer_1,layer_2,layer_3])
        model.fit(x=X_train,y=y_train,batch_size=512,epochs=100,validation_data=(X_valid,y_valid),verbose=False,callbacks=[callbacks])

        y_hat = model.predict(X_valid)
        y_hat = np.where(y_hat > 0.5,1,0)
        score = f1_score(y_valid, y_hat, average='micro')
        validation_scores.append(score)
        
    return np.mean(validation_scores) * -1

In [None]:
layer_nodes = [i*10 for i in range(1,30)]
folds = [2,3,5]
space = {
    'folds': hp.choice('folds', [2,3,5]),
    'layer_1': hp.choice('layer_1', layer_nodes),
    'layer_2': hp.choice('layer_2', layer_nodes),
    'layer_3': hp.choice('layer_3', layer_nodes)
}


In [None]:
best = fmin(get_mean_validation_score, space, algo=tpe.suggest, max_evals=100)

In [None]:
folds[best['folds']-1] , layer_nodes[best['layer_1']-1] ,layer_nodes[best['layer_2']-1], layer_nodes[best['layer_3']-1]

In [None]:
folds[best['folds']] 

# Neural networks for predictions

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto',
    baseline=None, restore_best_weights=True
)

callbacks = [early_stopping]

BATCH_SIZE = 512

STEPS_PER_EPOCH = 3400//BATCH_SIZE

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
      0.001,
      decay_steps=STEPS_PER_EPOCH*1000,
      decay_rate=1,
      staircase=False)

def get_log_dir(model,dropout,residual):
    model_name = '-'.join(map(lambda x: str(x),model)) + f':{dropout}' + f':{residual}'
    return f'./logs/{model_name}'

def get_pred_model(input_size,layers=[40,20,10],drop_out=0.2,residual=True):   
    input_layer = Input(shape=(input_size,))
    intermediate_layers = []    
    X = Dense(layers[0],activation='relu')(input_layer)
    intermediate_layers.append(X)
    X = Dropout(drop_out)(X)
    for nodes in layers[1:]:
        X = Dense(nodes, activation='relu')(X)
        intermediate_layers.append(X)
        X = Dropout(drop_out)(X)
    conc = concatenate(intermediate_layers)
    if residual:
        output_layer = Dense(1, activation='relu')(conc)
    else:
        output_layer = Dense(1, activation='relu')(X)
    model = Model(input_layer, output_layer)
    model.compile(optimizer=tf.keras.optimizers.Adam(lr_schedule), 
                  loss=tf.keras.losses.MeanSquaredError(),
                  metrics=[tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.RootMeanSquaredError()])
    feature_extractor = Model(input_layer, conc)
    return feature_extractor,model

## Fare

In [None]:
target = 'fare'
# cols = ['duration','meter_waiting','meter_waiting_fare','is_more_than_one_day']
cols = ['additional_fare', 
    'duration', 
    'meter_waiting', 
    'meter_waiting_fare',
    'meter_waiting_till_pickup', 
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

In [None]:
X = train_df[train_df['label']==1][cols]
scaler = MinMaxScaler()
X_scale = scaler.fit_transform(X)

y = train_df[train_df['label']==1][target].values

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
tf.random.set_seed(0)

folds = 3

validation_scores = []
models = []

model_def = [60,80,50,20,15]

test_preds = np.zeros(test_df.shape[0])
kf = KFold(n_splits=folds)
fold=1
for train_index, test_index in kf.split(X_scale, y):
    print('fold:',fold)
    fold += 1
        
    X_train, X_valid = X_scale[train_index], X_scale[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    _, model = get_pred_model(X.shape[1],model_def)
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=500,validation_data=(X_valid,y_valid),callbacks=callbacks)
    
    y_hat = model.predict(X_valid)    
    score = mean_squared_error(y_valid, y_hat) ** 0.5
    validation_scores.append(score)
    print('validation score:', score)
    
    models.append(model)

In [None]:
np.mean(validation_scores) , np.std(validation_scores),validation_scores

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
tf.random.set_seed(0)

fare_representation, fare_model = get_pred_model(X_scale.shape[1],model_def)

fare_model.fit(x=X_scale,y=y,batch_size=512,epochs=150)

In [None]:
tf.keras.utils.plot_model(fare_representation, "fare_representation_model.png", show_shapes=True)

In [None]:
fare_representation.save('models/fare_representation')

## Duration

In [None]:
target = 'duration'
# cols = ['duration','meter_waiting','meter_waiting_fare','is_more_than_one_day']
cols = ['additional_fare', 
    'meter_waiting', 
    'meter_waiting_fare',
    'meter_waiting_till_pickup', 
    'fare',
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

In [None]:
X = train_df[train_df['label']==1][cols]
scaler = MinMaxScaler()
X_scale = scaler.fit_transform(X)

y = train_df[train_df['label']==1][target].values

#### Hyperparemeter tunining

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scale, y, test_size=0.33, random_state=0)

In [None]:
def get_score(params):
    model_def = params['model']
    dropout = params['dropout']
    residual = params['residual']
    
    tf.compat.v1.reset_default_graph()
    tf.keras.backend.clear_session() 
    tf.random.set_seed(0)
    
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=get_log_dir(model_def,dropout,residual))

    _,model = get_pred_model(X.shape[1],model_def,dropout,residual)
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=500,validation_data=(X_valid,y_valid),verbose=False,callbacks=callbacks+[tensorboard_callback])
    y_hat = model.predict(X_valid)    
    score = mean_squared_error(y_valid, y_hat) ** 0.5
    return score

In [None]:
models = [
    [60,80,80,60,40,20,10],
    [60,80,80,60,40,20,10,5],
    [60,80,100,80,60,40,20,10],
    [60,80,100,80,60,40,20,10,5],    
    [60,80,60,40,20,10],
    [60,80,60,40,20,10,5],
    
    [60,80,60,80],
    [60,80,60,80,60],
    [60,80,60,80,60,80],
    
    [60,40,60,40],
    [60,40,60,40,60],
    [60,40,60,40,60,40],
    
    [60,60,60],
    [60,60,60,60],
    [80,80,80],
    [80,80,80,80],
    [40,40,40],
    [40,40,40,40],
    
    [40,30,20,10],
    [40,30,20,10,5]
]

dropouts = [
    0.2,0.4,0.6,0.8
]

residuals = [
    True, False
]

best_config = {}
best_score = float('inf') 
score_config = {}
for model in tqdm(models):
    for dropout in dropouts:
        for residual in residuals:
            params = {
                'model':model,
                'dropout':dropout,
                'residual':residual
            }
            score = get_score(params)
            score_config[score] = params
            if score < best_score:
                best_config = params
                best_score = score

In [None]:
best_score ,best_config

In [None]:
get_score(best_config)

In [None]:
# list(score_config.values())

def get_score(config):
    for each in score_config:
        if score_config[each] == config:
            return each
        
sorted(list(score_config.values()),key=get_score)

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session() 
tf.random.set_seed(0)

folds = 3

validation_scores = []
models = []

model_def =[60, 40, 60, 40, 60, 40]
dropout = 0.2
residual = True

test_preds = np.zeros(test_df.shape[0])
kf = KFold(n_splits=folds)
fold=1
for train_index, test_index in kf.split(X_scale, y):
    print('fold:',fold)
    fold += 1
#     tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=get_log_dir(model_def,fold))
    X_train, X_valid = X_scale[train_index], X_scale[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    _,model = get_pred_model(X.shape[1],model_def,dropout,residual)
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=500,validation_data=(X_valid,y_valid),callbacks=callbacks)
    
    y_hat = model.predict(X_valid)    
    score = mean_squared_error(y_valid, y_hat) ** 0.5
    validation_scores.append(score)
    print('validation score:', score)
    
    models.append(model)

In [None]:
np.mean(validation_scores) , np.std(validation_scores),validation_scores

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session() 
tf.random.set_seed(0)

duration_representation,duration_model = get_pred_model(X_scale.shape[1],model_def,dropout,residual)

duration_model.fit(x=X_scale,y=y,batch_size=512,epochs=35)

In [None]:
tf.keras.utils.plot_model(duration_representation, "duration_representation.png", show_shapes=True)

In [None]:
duration_representation.save('models/duration_representation')

## Meter waiting

In [None]:
target = 'meter_waiting'
# cols = ['duration','meter_waiting','meter_waiting_fare','is_more_than_one_day']
cols = ['additional_fare', 
    'meter_waiting_fare',
    'meter_waiting_till_pickup', 
    'fare',
    'duration',
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

In [None]:
X = train_df[train_df['label']==1][cols]
scaler = MinMaxScaler()
X_scale = scaler.fit_transform(X)

y = train_df[train_df['label']==1][target].values

#### Hyperparameter tunning

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scale, y, test_size=0.33, random_state=0)

In [None]:
def get_score(params):
    model_def = params['model']
    dropout = params['dropout']
    residual = params['residual']
    
    tf.compat.v1.reset_default_graph()
    tf.keras.backend.clear_session() 
    tf.random.set_seed(0)
    
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=get_log_dir(model_def,dropout,residual))

    _,model = get_pred_model(X.shape[1],model_def,dropout,residual)
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=500,validation_data=(X_valid,y_valid),verbose=False,callbacks=callbacks+[tensorboard_callback])
    y_hat = model.predict(X_valid)    
    score = mean_squared_error(y_valid, y_hat) ** 0.5
    return score

In [None]:
models = [
    [60,80,80,60,40,20,10],
    [60,80,80,60,40,20,10,5],
    [60,80,100,80,60,40,20,10],
    [60,80,100,80,60,40,20,10,5],    
    [60,80,60,40,20,10],
    [60,80,60,40,20,10,5],
    
    [60,80,60,80],
    [60,80,60,80,60],
    [60,80,60,80,60,80],
    
    [60,40,60,40],
    [60,40,60,40,60],
    [60,40,60,40,60,40],
    
    [60,60,60],
    [60,60,60,60],
    [80,80,80],
    [80,80,80,80],
    [40,40,40],
    [40,40,40,40],
    
    [40,30,20,10],
    [40,30,20,10,5]
]

dropouts = [
    0.2,0.4,0.6,0.8
]

residuals = [
    True, False
]

best_config = {}
best_score = float('inf') 
score_config = {}
for model in tqdm(models):
    for dropout in dropouts:
        for residual in residuals:
            params = {
                'model':model,
                'dropout':dropout,
                'residual':residual
            }
            score = get_score(params)
            score_config[score] = params
            if score < best_score:
                best_config = params
                best_score = score

In [None]:
best_score ,best_config

In [None]:
def get_score(config):
    for each in score_config:
        if score_config[each] == config:
            return each
        
sorted(list(score_config.values()),key=get_score)

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
tf.random.set_seed(0)

folds = 3

validation_scores = []
models = []

model_def =[60, 40, 60, 40, 60, 40]
dropout = 0.2
residual = True

test_preds = np.zeros(test_df.shape[0])
kf = KFold(n_splits=folds)
fold=1
for train_index, test_index in kf.split(X_scale, y):
    print('fold:',fold)
    fold += 1
    
#     tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=get_log_dir(model_def,fold))
    
    X_train, X_valid = X_scale[train_index], X_scale[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    _,model = get_pred_model(X.shape[1],model_def,dropout,residual)
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=500,validation_data=(X_valid,y_valid),callbacks=callbacks)
    
    y_hat = model.predict(X_valid)    
    score = mean_squared_error(y_valid, y_hat) ** 0.5
    validation_scores.append(score)
    print('validation score:', score)
    
#     preds = model.predict(X_test).reshape(test_preds.shape)
#     test_preds += preds
    models.append(model)

In [None]:
np.mean(validation_scores) , np.std(validation_scores),validation_scores

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session() 
tf.random.set_seed(0)

meter_waiting_representation,meter_waiting_model = get_pred_model(X_scale.shape[1],model_def,dropout,residual)

meter_waiting_model.fit(x=X_scale,y=y,batch_size=512,epochs=35)

In [None]:
tf.keras.utils.plot_model(meter_waiting_representation, "meter_waiting_representation.png", show_shapes=True)

In [None]:
meter_waiting_representation.save('models/meter_waiting_representation')

### Meter waiting fare

In [None]:
target = 'meter_waiting_fare'
# cols = ['duration','meter_waiting','meter_waiting_fare','is_more_than_one_day']
cols = ['additional_fare', 
    'meter_waiting',    
    'meter_waiting_till_pickup', 
    'fare',
    'duration',
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

In [None]:
X = train_df[train_df['label']==1][cols]
scaler = MinMaxScaler()
X_scale = scaler.fit_transform(X)

y = train_df[train_df['label']==1][target].values

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scale, y, test_size=0.33, random_state=0)

In [None]:
def get_score(params):
    model_def = params['model']
    dropout = params['dropout']
    residual = params['residual']
    
    tf.compat.v1.reset_default_graph()
    tf.keras.backend.clear_session() 
    tf.random.set_seed(0)
    
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=get_log_dir(model_def,dropout,residual))

    _,model = get_pred_model(X.shape[1],model_def,dropout,residual)
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=500,validation_data=(X_valid,y_valid),verbose=False,callbacks=callbacks+[tensorboard_callback])
    y_hat = model.predict(X_valid)    
    score = mean_squared_error(y_valid, y_hat) ** 0.5
    return score

In [None]:
models = [
    [60,80,80,60,40,20,10],
    [60,80,80,60,40,20,10,5],
    [60,80,100,80,60,40,20,10],
    [60,80,100,80,60,40,20,10,5],    
    [60,80,60,40,20,10],
    [60,80,60,40,20,10,5],
    
    [60,80,60,80],
    [60,80,60,80,60],
    [60,80,60,80,60,80],
    
    [60,40,60,40],
    [60,40,60,40,60],
    [60,40,60,40,60,40],
    
    [60,60,60],
    [60,60,60,60],
    [80,80,80],
    [80,80,80,80],
    [40,40,40],
    [40,40,40,40],
    
    [40,30,20,10],
    [40,30,20,10,5]
]

dropouts = [
    0.2,0.4,0.6,0.8
]

residuals = [
    True, False
]

best_config = {}
best_score = float('inf') 
score_config = {}
for model in tqdm(models):
    for dropout in dropouts:
        for residual in residuals:
            params = {
                'model':model,
                'dropout':dropout,
                'residual':residual
            }
            score = get_score(params)
            score_config[score] = params
            if score < best_score:
                best_config = params
                best_score = score

In [None]:
best_score ,best_config

In [None]:
def get_score(config):
    for each in score_config:
        if score_config[each] == config:
            return each
        
sorted(list(score_config.values()),key=get_score)

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
tf.random.set_seed(0)

folds = 3

validation_scores = []
models = []

# model_def = [60, 80, 100, 80, 60, 40, 20, 10]
model_def = [60,60,60]
dropout = 0.2
residual = False

test_preds = np.zeros(test_df.shape[0])
kf = KFold(n_splits=folds)
fold=1
for train_index, test_index in kf.split(X_scale, y):
    print('fold:',fold)
    fold += 1
        
    X_train, X_valid = X_scale[train_index], X_scale[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    _,model = get_pred_model(X.shape[1],model_def,dropout, residual)
    model.fit(x=X_train,y=y_train,batch_size=512,epochs=500,validation_data=(X_valid,y_valid),callbacks=callbacks)
    
    y_hat = model.predict(X_valid)    
    score = mean_squared_error(y_valid, y_hat) ** 0.5
    validation_scores.append(score)
    print('validation score:', score)
    
    models.append(model)

In [None]:
np.mean(validation_scores) , np.std(validation_scores),validation_scores

In [None]:
meter_waiting_fare_representation ,meter_waiting_fare_model = get_pred_model(X_scale.shape[1],model_def,dropout, residual)

meter_waiting_fare_model.fit(x=X_scale,y=y,batch_size=512,epochs=60)

In [None]:
tf.keras.utils.plot_model(meter_waiting_fare_representation, "meter_waiting_fare_representation.png", show_shapes=True)

In [None]:
meter_waiting_fare_representation.save('models/meter_waiting_fare_representation')

## Combined model

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto',
    baseline=None, restore_best_weights=True
)

def get_log_dir(model):
    model_name = '-'.join(map(lambda x: str(x),model))
    return f'./logs/{model_name}'

callbacks = [early_stopping]

def get_combined_model(fare_model, duration_model, meter_waiting_model, meter_waiting_fare_model, model_def=[100,50], freeze_input=True):
    if freeze_input:
        fare_model.trainable = False
        duration_model.trainable = False
        meter_waiting_model.trainable = False
        meter_waiting_fare_model.trainable = False
    
    fare_input = Input(shape=(fare_model.input.shape[1],), name='fare_input')
    fare = fare_model(fare_input)
    
    duration_input = Input(shape=(duration_model.input.shape[1],), name='duration_input')
    duration = fare_model(duration_input)
    
    meter_waiting_input = Input(shape=(meter_waiting_model.input.shape[1],), name='meter_waiting_input')
    meter_waiting = fare_model(meter_waiting_input)
    
    meter_waiting_fare_input = Input(shape=(meter_waiting_fare_model.input.shape[1],), name='meter_waiting_fare_input')
    meter_waiting_fare = fare_model(meter_waiting_fare_input)
    
    X = concatenate([fare,duration,meter_waiting,meter_waiting_fare])
    
    for nodes in model_def:
        X = Dense(nodes, activation='relu')(X)
    output_layer = Dense(1, activation='sigmoid')(X)
    
    model = Model([fare_input,duration_input,meter_waiting_input,meter_waiting_fare_input],output_layer)
    
    model.compile(optimizer=tf.keras.optimizers.Adam(lr_schedule), 
                  loss=tfa.losses.SigmoidFocalCrossEntropy(),
                  metrics=[tfa.metrics.F1Score(num_classes=2,average='micro')])
    return model

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
tf.random.set_seed(0)

fare_representation = tf.keras.models.load_model('models/fare_representation')
duration_representation = tf.keras.models.load_model('models/duration_representation')
meter_waiting_representation = tf.keras.models.load_model('models/meter_waiting_representation')
meter_waiting_fare_representation = tf.keras.models.load_model('models/meter_waiting_fare_representation')

In [None]:
features = [
    'additional_fare', 
    'duration', 
    'meter_waiting', 
    'meter_waiting_fare',
    'meter_waiting_till_pickup', 
    'fare',
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference'
]

fare_features = ['additional_fare', 
    'duration', 
    'meter_waiting', 
    'meter_waiting_fare',
    'meter_waiting_till_pickup', 
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

duration_features = ['additional_fare', 
    'meter_waiting', 
    'meter_waiting_fare',
    'meter_waiting_till_pickup', 
    'fare',
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

meter_waiting_features = ['additional_fare', 
    'meter_waiting_fare',
    'meter_waiting_till_pickup', 
    'fare',
    'duration',
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

meter_waiting_fare_features = ['additional_fare', 
    'meter_waiting',    
    'meter_waiting_till_pickup', 
    'fare',
    'duration',
    'pickup_date', 
    'pickup_hour', 
    'pickup_minute',
    'drop_date', 
    'drop_hour', 
    'drop_minute',
    'pick_cluster',
    'is_more_than_one_day',
    'distance_km',
    'fare_per_km',
    'pickup_timeslot',
    'day_of_week',
    'is_weekday',
    'cal_time_difference']

In [None]:
X = train_df[features]
y = train_df['label']
scaler = MinMaxScaler()
X_scale = scaler.fit_transform(X)
X_test = scaler.transform(test_df[features])

In [None]:
train = train_df.copy()
train[features] = X_scale

test = test_df.copy()
test[features] = X_test

### Hyperparameter tunning

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(train[features], y, test_size=0.33, random_state=0,stratify=y)
X_train_fare, X_train_duration, X_train_meter_waiting,X_train_meter_waiting_fare = X_train[fare_features],X_train[duration_features],X_train[meter_waiting_features],X_train[meter_waiting_fare_features]
X_valid_fare, X_valid_duration, X_valid_meter_waiting,X_valid_meter_waiting_fare = X_valid[fare_features],X_valid[duration_features],X_valid[meter_waiting_features],X_valid[meter_waiting_fare_features]

In [None]:
def get_score(model_def):
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=get_log_dir(model_def))
    model = get_combined_model(fare_representation,
                               duration_representation,
                               meter_waiting_representation,
                               meter_waiting_fare_representation,
                               model_def=model_def)

    model.fit({'fare_input':X_train_fare,
                  'duration_input':X_train_duration,
                  'meter_waiting_input':X_train_meter_waiting,
                  'meter_waiting_fare_input':X_train_meter_waiting_fare},
                  y_train,
                  batch_size=512,
                  epochs=500,
                  validation_data=({'fare_input':X_valid_fare,
                  'duration_input':X_valid_duration,
                  'meter_waiting_input':X_valid_meter_waiting,
                  'meter_waiting_fare_input':X_valid_meter_waiting_fare},y_valid),
              callbacks=callbacks + [tensorboard_callback],
              verbose=False)
    y_hat = model.predict({'fare_input':X_valid_fare,
              'duration_input':X_valid_duration,
              'meter_waiting_input':X_valid_meter_waiting,
              'meter_waiting_fare_input':X_valid_meter_waiting_fare})

    y_hat = np.where(y_hat > 0.5,1,0)
    score = f1_score(y_valid, y_hat, average='micro')
    return score

In [None]:
models = [    
    (800,400,200,100,50),
    (800,400,200,100,50,25),
    (800,400,200,100,50,25,10),
    (800,400,200,100,50,25,10,5),

    (400,800,400),
    (400,800,400,100),
    (400,800,400,100,50),
    (400,800,400,100,50,20),
    (400,800,400,100,50,20,10),
    
    (1000,800,400,200,100,50),
    (1000,800,400,200,100,50,25),
    (1000,800,400,200,100,50,25,10),
    (1000,800,400,200,100,50,25,10,5),
    
    (450,900,450),
    (450,900,450,100),
    (450,900,450,100,50),
    (450,900,450,100,50,20),
    (450,900,450,100,50,20,10),
    
    (400,800,400,800),
    (400,800,400,800,100),
    (400,800,400,800,100,50),
    (400,800,400,800,100,50,20),
    (400,800,400,800,100,50,20,10),
    
    (400,800,400,800,400),
    (400,800,400,800,400,100),
    (400,800,400,800,400,100,50),
    (400,800,400,800,400,100,50,20),
    (400,800,400,800,400,100,50,20,10),
    
    (450,900,450,900),
    (450,900,450,900,100),
    (450,900,450,900,100,50),
    (450,900,450,900,100,50,20),
    (450,900,450,900,100,50,20,10),
]
best_score = 0
best_model = None
model_score = {}
for model in tqdm(models):
    score = get_score(model)
    if score > best_score:
        best_score = score
        best_model = model
    model_score[model] = score

In [None]:
sorted(models,key=lambda x:model_score[x])

In [None]:
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
tf.random.set_seed(0)

folds = 3

validation_scores = []
models = []

model_def=(400, 800, 400, 100, 50, 20, 10)
# model_def=(400, 800, 400, 100, 50, 20)
test_preds = np.zeros(test_df.shape[0])
train_preds = np.zeros(train_df.shape[0])
skf = StratifiedKFold(n_splits=folds)
fold=1
for train_index, test_index in skf.split(train, y):
    print('fold:',fold)
    fold += 1
    
    X_fare_train, X_fare_test = train[fare_features].iloc[train_index,:],train[fare_features].iloc[test_index,:]
    X_duration_train, X_duration_test = train[duration_features].iloc[train_index,:],train[duration_features].iloc[test_index,:]
    X_meter_waiting_train, X_meter_waiting_test = train[meter_waiting_features].iloc[train_index,:],train[meter_waiting_features].iloc[test_index,:]
    X_meter_waiting_fare_train, X_meter_waiting_fare_test = train[meter_waiting_fare_features].iloc[train_index,:],train[meter_waiting_fare_features].iloc[test_index,:]
    
    y_train, y_valid = y[train_index], y[test_index]
    model = get_combined_model(fare_representation,
                           duration_representation,
                           meter_waiting_representation,
                           meter_waiting_fare_representation,
                           model_def=model_def)
    model.fit({'fare_input':X_fare_train,
              'duration_input':X_duration_train,
              'meter_waiting_input':X_meter_waiting_train,
              'meter_waiting_fare_input':X_meter_waiting_fare_train},
              y_train,
              batch_size=512,
              epochs=500,
              validation_data=({'fare_input':X_fare_test,
              'duration_input':X_duration_test,
              'meter_waiting_input':X_meter_waiting_test,
              'meter_waiting_fare_input':X_meter_waiting_fare_test},y_valid),callbacks=callbacks)
    y_hat = model.predict({'fare_input':X_fare_test,
              'duration_input':X_duration_test,
              'meter_waiting_input':X_meter_waiting_test,
              'meter_waiting_fare_input':X_meter_waiting_fare_test})
    
    y_hat = np.where(y_hat > 0.5,1,0)
    score = f1_score(y_valid, y_hat, average='micro')
    validation_scores.append(score)
    print('validation score:', score)
    
    X_fare, X_duration, X_meter_waiting,X_X_meter_waiting_fare = test[fare_features],test[duration_features],test[meter_waiting_features],test[meter_waiting_fare_features]
    
    preds = model.predict({'fare_input':X_fare,
              'duration_input':X_duration,
              'meter_waiting_input':X_meter_waiting,
              'meter_waiting_fare_input':X_X_meter_waiting_fare}).reshape(test_preds.shape)
    test_preds += preds
    models.append(model)
    
    X_fare, X_duration, X_meter_waiting,X_X_meter_waiting_fare = train[fare_features],train[duration_features],train[meter_waiting_features],train[meter_waiting_fare_features]
    preds = model.predict({'fare_input':X_fare,
              'duration_input':X_duration,
              'meter_waiting_input':X_meter_waiting,
              'meter_waiting_fare_input':X_X_meter_waiting_fare}).reshape(train_preds.shape)
    train_preds += preds

In [None]:
np.mean(validation_scores) , np.std(validation_scores), validation_scores

In [None]:
np.mean(validation_scores) , np.std(validation_scores), validation_scores

In [None]:
best_model = models[np.argmax(validation_scores)]

tf.keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True)

In [None]:
submission_df['prediction'] = np.where(test_preds > 1.5, 1, 0)
submission_df.to_csv('submission.csv',index=False)

In [None]:
submission_df['prediction'].sum() / submission_df.shape[0]

In [None]:
submission_df.shape