In [1]:
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import normalize
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
import time
import warnings 
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def accuracy_f_score(y_pred,y_true):
    print(f"Accuracy score: {round(accuracy_score(y_true, y_pred) * 100,2)}%")
    print('\033[92m' + f"F1 score: {f1_score(y_true, y_pred)}" + '\033[0m')
    

def in_city(y_pred):
    targets = []
    for pred in y_pred:
        if (3750901.5068 <= pred[0] <= 3770901.5069) and (-19268905.6133 <= pred[1] <= -19208905.6133):
            targets.append(1)
        else:
            targets.append(0)
    return targets

def journey_time(x,y):
    """
    Compute journey time in seconds.
    """
    x = pd.to_datetime(x)
    y = pd.to_datetime(y)
    return (y-x).total_seconds()

In [3]:
"""
df = pd.read_csv('data_train/data_train.csv')
train_df = df[df["time_exit"][:].str[:2] == '15']
train_df["j_time"] = list(map(journey_time, train_df["time_entry"], train_df["time_exit"]))
train_df.drop(["vmax","vmin","vmean","time_entry","time_exit","hash","Unnamed: 0"], axis=1, inplace=True)
train_df.to_csv('data_train/3_features_final.csv')
"""

'\ndf = pd.read_csv(\'data_train/data_train.csv\')\ntrain_df = df[df["time_exit"][:].str[:2] == \'15\']\ntrain_df["j_time"] = list(map(journey_time, train_df["time_entry"], train_df["time_exit"]))\ntrain_df.drop(["vmax","vmin","vmean","time_entry","time_exit","hash","Unnamed: 0"], axis=1, inplace=True)\ntrain_df.to_csv(\'data_train/3_features_final.csv\')\n'

In [4]:
train_df = pd.read_csv('data_train/3_features_final.csv')

y = [train_df["x_exit"].values, train_df["y_exit"].values]
y = np.transpose(np.array(y))

train_df.set_index("trajectory_id", inplace=True)
train_df.drop(["x_exit","y_exit","Unnamed: 0"], axis=1, inplace=True)
X = train_df.values


In [5]:
from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)

from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X = pca.fit_transform(X)

In [20]:
from tensorflow.keras.callbacks import LearningRateScheduler
import math
def step_decay(epoch):
    initial_lrate = 0.1
    drop = 0.5
    epochs_drop = 20
    lrate = initial_lrate * math.pow(drop,  
           math.floor((1+epoch)/epochs_drop))
    return lrate
lrate = LearningRateScheduler(step_decay)

class LossHistory(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.lr = []
 
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.lr.append(step_decay(len(self.losses)))

loss_history = LossHistory()
lrate = LearningRateScheduler(step_decay)

In [21]:
#X = X.reshape((134037,3))
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=410,shuffle=True)



In [22]:
model = Sequential()

model.add(Dense(256, kernel_initializer='normal', activation=None, name='Dense_1',
                kernel_regularizer=keras.regularizers.l2(l=0.001)))
model.add(tf.keras.layers.LeakyReLU(alpha=0.3))


model.add(Dense(1024, kernel_initializer='normal', activation=None, name='Dense_2',
                kernel_regularizer=keras.regularizers.l2(l=0.001)))
model.add(tf.keras.layers.LeakyReLU(alpha=0.3))


model.add(Dense(1024, kernel_initializer='normal', activation=None, name='Dense_3',
                kernel_regularizer=keras.regularizers.l2(l=0.001)))
model.add(tf.keras.layers.LeakyReLU(alpha=0.3))


model.add(Dense(2, kernel_initializer='normal', activation='linear', name='Classifier'))

optimiser = tf.keras.optimizers.Adam(lr=0.00)

In [23]:
NAME = f"Atlanta-CC-DNN-{int(time.time())}"
tensorboard = TensorBoard(log_dir=f"logs/{NAME}")

EPOCHS = 100
BATCH_SIZE = 512 

In [24]:
#gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

checkpoint_name = 'Weights-{epoch:03d}--{val_loss:.5f}.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose=1, save_best_only=True, mode ='auto')

model.compile(loss='mean_absolute_percentage_error',
                  optimizer=optimiser,
                  metrics=['accuracy'])

model.fit(x_train,y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.2,
          callbacks=[checkpoint, tensorboard,loss_history,lrate])

Train on 85783 samples, validate on 21446 samples
Epoch 1/100
Epoch 00001: val_loss improved from inf to 13.32528, saving model to Weights-001--13.32528.hdf5
Epoch 2/100
Epoch 00002: val_loss improved from 13.32528 to 8.98538, saving model to Weights-002--8.98538.hdf5
Epoch 3/100
Epoch 00003: val_loss improved from 8.98538 to 6.73349, saving model to Weights-003--6.73349.hdf5
Epoch 4/100
Epoch 00004: val_loss improved from 6.73349 to 5.50423, saving model to Weights-004--5.50423.hdf5
Epoch 5/100
Epoch 00005: val_loss did not improve from 5.50423
Epoch 6/100
Epoch 00006: val_loss did not improve from 5.50423
Epoch 7/100
Epoch 00007: val_loss did not improve from 5.50423
Epoch 8/100
Epoch 00008: val_loss did not improve from 5.50423
Epoch 9/100
Epoch 00009: val_loss improved from 5.50423 to 3.93519, saving model to Weights-009--3.93519.hdf5
Epoch 10/100
Epoch 00010: val_loss did not improve from 3.93519
Epoch 11/100
Epoch 00011: val_loss did not improve from 3.93519
Epoch 12/100
Epoch 00

KeyboardInterrupt: 

In [25]:
weights_file = 'Weights-027--2.06749.hdf5' # choose the best checkpoint 
model.load_weights(weights_file) # load it
model.compile(loss='mean_absolute_percentage_error', optimizer=optimiser, metrics=['accuracy'])

In [11]:
# pred = model.predict(x_test)

# pred_b = in_city(pred)
# y_test_b = in_city(y_test)

# accuracy_f_score(pred_b,y_test_b)