Import packages

In [42]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import numpy as np
import os
import time

Load google driver

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Set data root path

In [7]:
data_root_path = "/content/drive/My Drive/didicomp/"

Load data

In [74]:
# Ground truth tavel time
travel_time = np.load(data_root_path + "travel_time.npy")
# distance traveled
distances = np.load(data_root_path + "distances.npy")
# simple 10mins ago average traveled time
average_traveled = np.load(data_root_path + "simple_avg_time.npy")

Normalize data

In [25]:
# function to rescale the values to (0,1)
def sigmoid_normalize(data):
  max = np.max(data)
  min = np.min(data)
  range = max - min
  data = (data - min) / range
  return data

In [75]:
max_distance = np.max(distances)
min_distance = np.min(distances)
max_average_travelede = np.max(average_traveled)
min_average_traveled = np.min(average_traveled)
max_travel_time = np.max(travel_time)
min_travel_time = np.min(travel_time)

In [76]:
# normalize all data
travel_time = sigmoid_normalize(travel_time)
distances = sigmoid_normalize(distances)
average_traveled = sigmoid_normalize(average_traveled)

Build Model

In [48]:
class FeedForward(keras.Model):
  def __init__(self):
    super(FeedForward, self).__init__()
  def build(self, input_shape):
    self.dense1 = layers.Dense(10, activation="relu",kernel_regularizer=tf.keras.regularizers.l2(0.01),
                              activity_regularizer=tf.keras.regularizers.l2(0.01))
    self.dense2 = layers.Dense(100, activation="relu",kernel_regularizer=tf.keras.regularizers.l2(0.01),
                              activity_regularizer=tf.keras.regularizers.l2(0.01))
    self.dense3 = layers.Dense(10, activation="relu",kernel_regularizer=tf.keras.regularizers.l2(0.01),
                              activity_regularizer=tf.keras.regularizers.l2(0.01))
    self.dense4 = layers.Dense(1, activation="sigmoid",kernel_regularizer=tf.keras.regularizers.l2(0.01),
                              activity_regularizer=tf.keras.regularizers.l2(0.01))
  def call(self, X):
    x = self.dense1(X)
    x = self.dense2(x)
    x = self.dense3(x)
    x = self.dense4(x)

    return x

In [37]:
# Shuffle data
def shuffle(X, Y):
  shuffled_indices = np.arange(X.shape[0])
  np.random.shuffle(shuffled_indices)
  X = X[shuffled_indices]
  Y = Y[shuffled_indices]
  return X,Y

Initiate model, define loss function and create optimizer

In [86]:
model = FeedForward()

optimizer = keras.optimizers.Adam(1e-4)
def train_loss(y_true,y_pred):
    return tf.keras.losses.MSE(y_true, y_pred)

Set checkpoint path

In [39]:
checkpoint_dir = data_root_path + '/ff_checkpoint/'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                  model=model
                  )

Define train function

In [41]:
def train_step(X,Y):
  loss = 0
  gen_features = None
  with tf.GradientTape() as model_tape:
      Y_ = model(X)
      loss = train_loss(Y_, Y)
  gradients_of_model = model_tape.gradient(loss, model.trainable_variables)

  optimizer.apply_gradients(zip(gradients_of_model, model.trainable_variables))
  return loss

In [51]:
def train(X, Y, batch_size, epochs):
  input_size = X.shape[0]
  batch_num = input_size // batch_size + 1
  for epoch in range(epochs):
    start = time.time()
    X, Y = shuffle(X, Y)
    loss_ep = 0
    length = 0
    for batch_id in range(batch_num):
      if batch_id % 1000 == 0:
        if batch_id != 0:
          print("epoch:",epoch,",batch:",batch_id,"/",batch_num)
          print("mean_loss:{}".format(loss_ep/length))
      b_start = batch_id * batch_size
      b_end = b_start + batch_size
      if b_end > input_size:
        b_end = input_size
      batch_X = X[b_start:b_end]
      batch_Y = Y[b_start:b_end]
      loss = train_step(batch_X, batch_Y)
      loss_ep = loss_ep + np.mean(loss.numpy())
      length = length + 1

    # if (epoch + 1) % 5 == 0:
    #   checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

In [87]:
features = np.concatenate((average_traveled,distances), axis=1)
print(features)

[[0.04417959 0.02018482]
 [0.04991487 0.02219041]
 [0.05591899 0.0544721 ]
 ...
 [0.0664038  0.03384314]
 [0.07348329 0.04016492]
 [0.03208173 0.00856228]]


Train

In [None]:
EPOCHS = 5
BATCH_SIZE = 128
train(features, travel_time, BATCH_SIZE,EPOCHS)

Load test data

In [91]:
test_distances = np.load(data_root_path + "test_distances.npy")
test_avg_time = np.load(data_root_path + "test_simple_avg_time.npy")

Generate test results

In [60]:
def normalize_test_data(data, max_value, min_value):
  range = max_value - min_value
  data = (data - min_value) / range
  return data

In [62]:
def denormalize_test_data(data, max_value, min_value):
  range = max_value - min_value
  data = data * range + min_value
  return data

In [92]:
test_avg_time = normalize_test_data(test_avg_time, max_average_travelede, min_average_traveled)
test_distances = normalize_test_data(test_distances, max_distance, min_distance)

In [93]:
test_features = np.concatenate((test_avg_time,test_distances), axis=1)

In [68]:
def predict(X, batch_size):
  input_size = X.shape[0]
  batch_num = input_size // batch_size + 1
  start = time.time()
  predictions = np.zeros((input_size,1))
  for batch_id in range(batch_num):
    b_start = batch_id * batch_size
    b_end = b_start + batch_size
    if b_end > input_size:
      b_end = input_size
    batch_X = X[b_start:b_end]
    predictions[b_start:b_end] = model(batch_X).numpy()
    
  predictions = denormalize_test_data(predictions, max_travel_time, min_travel_time)
  return predictions

In [94]:
test_travel_time = predict(test_features, BATCH_SIZE)

In [95]:
np.save(data_root_path + "test_travel_time",test_travel_time)