In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split

In [2]:
path = [0, 1, 2, 3, 8, 10, 9, 7, 6, 5, 4, 0]
order = [0, 1, 2, 3, 10, 9, 8, 7, 4, 6, 5]
cumTime = [0, 14, 19, 22, 29, 39, 43, 47, 51, 56, 60, 73]
cumDistance = [0, 5.6, 7.4, 8.6, 10.7, 13.2, 14.2, 15.2, 16.1, 17.4, 18.1, 22.5]

In [3]:
def time_estimation(ggmap_time, numStop, numCustomer, timePerStop=5, timePerCustomer=2):

    estimation = ggmap_time * 1.1 + timePerStop * numStop + timePerCustomer * numCustomer
    estimation = estimation + (np.abs(np.random.normal(0, 0.2)) - 0.1) * estimation

    return np.round(estimation, 0)

In [4]:
df = pd.read_csv("./user-data.csv")
df = df.drop(['Unnamed: 0'], axis=1)

df.head()

Unnamed: 0,userID,orderID,addressID,parcelNumber
0,6500000001,1007,6,2
1,6500000002,1014,5,1
2,6500000003,1021,1,1
3,6500000004,1028,3,1
4,6500000005,1035,2,1


In [5]:
cumCustomer = [0] + list(df.groupby("addressID").count().userID.cumsum())

In [6]:
df["timeEstimation"] = df.apply(lambda x: time_estimation(cumTime[order[x.addressID]], order[x.addressID], cumCustomer[x.addressID]), axis=1)

In [7]:
df.head()

Unnamed: 0,userID,orderID,addressID,parcelNumber,timeEstimation
0,6500000001,1007,6,2,247.0
1,6500000002,1014,5,1,217.0
2,6500000003,1021,1,1,47.0
3,6500000004,1028,3,1,98.0
4,6500000005,1035,2,1,84.0


In [8]:
train, test = train_test_split(df, test_size=0.2)

In [9]:
train

Unnamed: 0,userID,orderID,addressID,parcelNumber,timeEstimation
80,6500000081,1567,7,1,254.0
56,6500000057,1399,4,3,261.0
5,6500000006,1042,7,1,239.0
81,6500000082,1574,6,1,197.0
83,6500000084,1588,6,1,259.0
...,...,...,...,...,...
78,6500000079,1553,6,1,226.0
93,6500000094,1658,10,1,257.0
13,6500000014,1098,5,3,280.0
17,6500000018,1126,3,1,126.0


In [10]:
df.head()

Unnamed: 0,userID,orderID,addressID,parcelNumber,timeEstimation
0,6500000001,1007,6,2,247.0
1,6500000002,1014,5,1,217.0
2,6500000003,1021,1,1,47.0
3,6500000004,1028,3,1,98.0
4,6500000005,1035,2,1,84.0


In [11]:
df.apply(lambda x: time_estimation(cumTime[order[int(x.addressID)]], order[int(x.addressID)], cumCustomer[int(x.addressID)]), axis=1)

0     213.0
1     265.0
2      52.0
3     113.0
4      76.0
      ...  
95    182.0
96    124.0
97     87.0
98    231.0
99    255.0
Length: 100, dtype: float64

In [12]:
df_new = pd.DataFrame({"ggmap_time":np.array([0 for _ in range(100)])})
df_new["ggmap_time"] = df.apply(lambda x: cumTime[order[int(x.addressID)]], axis=1)
df_new["numStop"] = df.apply(lambda x: order[int(x.addressID)], axis=1)
df_new["numCustomer"] = df.apply(lambda x: cumCustomer[int(x.addressID)], axis=1)
df_new["timeEstimation"] = df.apply(lambda x: time_estimation(cumTime[order[int(x.addressID)]], order[int(x.addressID)], cumCustomer[int(x.addressID)]), axis=1)
df_new.head()

Unnamed: 0,ggmap_time,numStop,numCustomer,timeEstimation
0,51,8,61,247.0
1,56,9,48,225.0
2,14,1,14,45.0
3,22,3,33,143.0
4,19,2,20,64.0


In [13]:
df_new.to_csv("delivery-training-data.csv")

In [14]:
df_new["timeEstimation"] = 0
# df.apply(lambda x: time_estimation(cumTime[order[x.addressID]], order[x.addressID], cumCustomer[x.addressID]), axis=1)


In [15]:
train_features = train.copy()
train_labels = train_features.pop('timeEstimation')
train_features.pop("userID")
train_features = np.array(train_features)
address = train_features[:, 1]
train_features[:, 0] = np.array(list(map(lambda x: cumTime[order[x]], address)))
train_features[:, 1] = np.array(list(map(lambda x: order[x], address)))
train_features[:, 2] = np.array(list(map(lambda x: cumCustomer[x], address)))


test_features = test.copy()
test_labels = test_features.pop('timeEstimation')
test_features.pop("userID")
test_features = np.array(test_features)
address = test_features[:, 1]
test_features[:, 0] = np.array(list(map(lambda x: cumTime[order[x]], address)))
test_features[:, 1] = np.array(list(map(lambda x: order[x], address)))
test_features[:, 2] = np.array(list(map(lambda x: cumCustomer[x], address)))

In [16]:
normalize = layers.Normalization()
normalize.adapt(train_features)

In [17]:
model = tf.keras.Sequential([
  # normalize,
  layers.Dense(64),
  layers.ReLU(),
  layers.Dense(512),
  layers.ReLU(),
  layers.Dense(512),
  layers.ReLU(),
  layers.Dense(32),
  layers.ReLU(),
  layers.Dense(1)
])

model.compile(loss = tf.keras.losses.MeanSquaredError(),
              optimizer = tf.keras.optimizers.Adam())

model.fit(train_features, train_labels, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x18b529175b0>

In [18]:
model.trainable = False

In [19]:
model.evaluate(test_features, test_labels)



834.2633056640625

In [20]:
df_test = pd.DataFrame({'estimatedTime': test_labels, 'modelTime': np.array(model(test_features))[:, 0]})
df_test["pctError"] = abs((df_test.modelTime / df_test.estimatedTime) * 100 - 100)

df_test.head()

Unnamed: 0,estimatedTime,modelTime,pctError
1,217.0,215.495178,0.693466
88,111.0,127.319832,14.702551
89,47.0,52.611633,11.939645
53,217.0,215.495178,0.693466
20,261.0,208.087143,20.273125


In [21]:
df_test.replace([np.inf, -np.inf], 0, inplace=True)

In [22]:
df_test.pctError.mean()

10.715227250299534

In [23]:
model.save('./model/model.h5')

In [None]:
import keras



In [24]:
model.save('./testsave/model')



INFO:tensorflow:Assets written to: ./testsave/model\assets


INFO:tensorflow:Assets written to: ./testsave/model\assets


In [25]:
import onnx
import keras2onnx

onnx_model = keras2onnx.convert_keras(model, model.name)
onnx.save_model(onnx_model, "test.onnx")

AttributeError: module 'tensorflow.python.keras' has no attribute 'applications'