In [1]:
#imports
import pandas as pd
import numpy as np
from datetime import datetime
from statistics import mode
import matplotlib.pyplot as plt
import pickle
from collections import Counter
from tensorflow.keras.preprocessing import sequence
from scipy import stats

In [2]:
import time
import warnings
warnings.filterwarnings("ignore")

In [3]:
with open("test_data.pkl", "rb") as f:
    test_set, y_test = pickle.load(f)

In [4]:
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K
model = load_model("best_model.hdf5")

In [5]:
from numpy import savez_compressed, load
import random
def getData(filename1, filename2, filename3, batch_size=512, skip=0.2, train=True):
    #initiate a batch count for each size of trips
    batch_count = {k: 0 for k in range(50)}
    inputs = {k: [] for k in range(50)}
    targets = {k: [] for k in range(50)}
    while True:
        #load files
        trip_sequences = load(filename1, allow_pickle=True)["arr_0"]
        trip_infos = load(filename2, allow_pickle=True)["arr_0"]
        predictions = load(filename3, allow_pickle=True)["arr_0"]
        
        if train:
            random.seed()
            #Shuffle
            to_shuffle = list(zip(trip_sequences, trip_infos, predictions))
            random.shuffle(to_shuffle)
            trip_sequences, trip_infos, predictions = zip(*to_shuffle)

        #trip_sequences = np.array(trip_sequences)
        #trip_infos = np.array(trip_infos) 
        predictions = np.array(predictions, dtype="int32")
        
        for i in range(len(trip_sequences)):
          
            #consider trip i
            trip_sequence = trip_sequences[i]
            trip_info = trip_infos[i]
            prediction = predictions[i]
            len_trip = len(trip_sequence)
            
            #add info to corresponding  size
            inputs[len_trip].append([trip_sequence, trip_info])
            targets[len_trip].append(prediction)
            batch_count[len_trip] += 1
            
            if batch_count[len_trip] == batch_size:
                X_trip = np.array([t[0] for t in inputs[len_trip]], dtype='float32')
                X_info = np.array([t[1] for t in inputs[len_trip]], dtype='float32')
                y = np.array(targets[len_trip]).reshape(batch_size,)
                #yield the data to feed the 
                yield [X_trip, X_info], y
                inputs[len_trip] = []
                targets[len_trip] = []
                batch_count[len_trip] = 0
               
               

In [6]:
import tensorflow as tf
from tensorflow.keras.metrics import sparse_top_k_categorical_accuracy
learning_rate = 0.00001
batch_size = 512
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, clipvalue=.25)
model.compile(optimizer=optimizer,
              loss="sparse_categorical_crossentropy",        #custom_loss(model, embedding_size, 1, batch_size), # Call the loss function with the model
              metrics=['sparse_top_k_categorical_accuracy'])

In [7]:
batch_size = 512
test_generator = getData("X_test_trip.npz", "X_test_info.npz", "y_test.npz",
                                        batch_size=batch_size, train=True)


val_generator = getData("X_val_cities.npz", "X_val_info.npz",
                          "y_val.npz", batch_size=batch_size, train=False)

num_train_samples = 10875
num_val_samples = 17913

In [8]:
#fit model on val generator
model.fit(val_generator,
          steps_per_epoch=num_val_samples // batch_size,
          epochs=1,
          validation_data=test_generator,
          validation_steps=num_train_samples // batch_size,
          batch_size=batch_size,                 
)



<tensorflow.python.keras.callbacks.History at 0x7fc087d83fa0>

In [9]:
#fit model on test_generator

model.fit(test_generator,
          steps_per_epoch=num_train_samples // batch_size,
          epochs=1,
          validation_data=val_generator,
          validation_steps=num_val_samples // batch_size,
          batch_size=batch_size,                 
)




<tensorflow.python.keras.callbacks.History at 0x7fc0680fd9a0>

In [10]:
model.save("final_model.hdf5")

___