In [1]:
import numpy as np
# import pandas as pd

import keras
from keras.preprocessing.text import one_hot,Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, Embedding, Input, LSTM, Dropout, concatenate, GRU
from keras.models import Model

from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
feature_dim = 53 - 6
input_days = 50
city_num = 7

mobility_num = 6

In [3]:
import csv
def process(filename, all_data, cities, city_type):
    with open(filename,'r') as dest_f:
        data_iter = csv.reader(dest_f)
        data = [data[2:13] + [(5 + idx) % 7] + data[16:] for idx, data in enumerate(data_iter)]
    
    data = np.asarray(data[1:])
    data[np.where(data == '')] = '0'
    data = data.astype(float)
    
    city = np.ones((data.shape[0], input_days)) * city_type
    
    all_data.append(data)
    cities.append(city)

In [4]:
all_data, cities = [], []

In [5]:
process("/Users/junyang/Downloads/japanprocessed.csv", all_data, cities, 1)

In [6]:
process("/Users/junyang/Downloads/franceprocessed.csv", all_data, cities, 2)
process("/Users/junyang/Downloads/indiaprocessed.csv", all_data, cities, 3)
process("/Users/junyang/Downloads/unitedkingdomprocessed.csv", all_data, cities, 4)
process("/Users/junyang/Downloads/unitedstatesprocessed.csv", all_data, cities, 5)
process("/Users/junyang/Downloads/egyptprocessed.csv", all_data, cities, 5)

In [7]:
def build_model():
    feature_input = Input(shape=(input_days, feature_dim,), name="Features")
    city_input = Input(shape=(input_days,), name="City_ID")
    
    x = Embedding(input_dim=city_num, output_dim=4, input_length=input_days)(city_input)
    x = concatenate([feature_input, x])
    
    x = GRU(units = 256, return_sequences = False, input_shape = (input_days, feature_dim))(x)
#     x = LSTM(units = 64, return_sequences = False, input_shape = (input_days, 64))(x)
    x = Dense(units = 256, activation="selu")(x)
    x = Dropout(0.2)(x)
    x = Dense(units = 128, activation="selu")(x)
    x = Dense(units = mobility_num, name="Predicted_Mobility")(x)
    model = Model(inputs=[feature_input, city_input], outputs=x)
    
    return model

In [8]:
model = build_model()
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [9]:
overalldata = np.stack(all_data).reshape((-11, 53))
overallcities = np.stack(cities).reshape((-1, 50))

Original_X_data = overalldata[:, :-mobility_num]
Original_mobility = overalldata[:, -mobility_num:]

X_sc = MinMaxScaler()
X_sc.fit(Original_X_data)
X_data = X_sc.transform(Original_X_data)

y_sc = MinMaxScaler()
y_sc.fit(Original_mobility)
mobility = y_sc.transform(Original_mobility)

train_X, train_y, train_cs = [], [], []
test_X_t, test_y_t, test_cs_t = [], [], []
for da, cc in zip(all_data, cities):
    X, y, cs = [], [], []
    X_t, y_t, cs_t = [], [], []

    X_data = X_sc.transform(da[:, :-mobility_num])
    mobility = y_sc.transform(da[:, -mobility_num:])
                
    for i in range(input_days, da.shape[0] - 40):
        X.append(X_data[i - input_days: i, :])
        y.append(mobility[i])
        cs.append(cc[i])

    for i in range(da.shape[0] - 40, da.shape[0]):
        X_t.append(X_data[i - input_days: i, :])
        y_t.append(mobility[i])
        cs_t.append(cc[i])
    
    train_X.append(np.stack(X))
    train_y.append(np.stack(y))
    train_cs.append(np.stack(cs))
    
    test_X_t.append(np.stack(X_t))
    test_y_t.append(np.stack(y_t))
    test_cs_t.append(np.stack(cs_t))
    

# X = np.stack(X)
# y = np.stack(y)
# city = np.ones((X.shape[0], input_days))



# def split()

In [10]:
train_X = np.concatenate(train_X)
train_y = np.concatenate(train_y)
train_cs = np.concatenate(train_cs)

val_X_t = np.concatenate(test_X_t)
val_y_t = np.concatenate(test_y_t)
val_cs_t = np.concatenate(test_cs_t)




In [11]:
#     # data = np.random.random((200, feature_dim))
# Original_X_data = data[:, :-mobility_num]
# Original_mobility = data[:, -mobility_num:]

# X_sc = MinMaxScaler()
# X_sc.fit(Original_X_data)
# X_data = X_sc.transform(Original_X_data)

# y_sc = MinMaxScaler()
# y_sc.fit(Original_mobility)
# mobility = y_sc.transform(Original_mobility)

# # X_data = Original_X_data
# # mobility = Original_mobility

# # print(y_sc.data_max_)

# X, y = [], []
# for i in range(input_days, data.shape[0]):
#     X.append(X_data[i - input_days: i, :])
#     y.append(mobility[i])

# X = np.stack(X)
# y = np.stack(y)
# city = np.ones((X.shape[0], input_days))

# # model.fit([X, city], y, epochs = 100, batch_size = 100)

In [12]:
# train_x, train_y = X[:-30], y[:-30]
# test_x, test_y = X[-30:], y[-30:]
# train_city, test_city = city[:-30], city[-30:]

In [13]:
train_X.shape

(714, 50, 47)

In [15]:
model.fit([train_X, train_cs], train_y, epochs = 300, batch_size = 50, validation_data=([val_X_t[60:], val_cs_t[60:]], val_y_t[60:]))

Train on 714 samples, validate on 180 samples
Epoch 1/300
Epoch 2/300
100/714 [===>..........................] - ETA: 1s - loss: 0.0012

KeyboardInterrupt: 

In [17]:
pred = model.predict([test_X_t[4], test_cs_t[4]])

In [18]:
pred

array([[0.30232352, 0.26760295, 0.44396234, 0.261531  , 0.4805976 ,
        0.23798555],
       [0.3189156 , 0.27866533, 0.47287503, 0.29760572, 0.4581284 ,
        0.27903643],
       [0.32161447, 0.27014026, 0.46639556, 0.28964224, 0.46130732,
        0.28888065],
       [0.29257563, 0.24562782, 0.44551468, 0.2531548 , 0.50946367,
        0.24999347],
       [0.31235397, 0.2437102 , 0.44443914, 0.22748122, 0.50724137,
        0.28005415],
       [0.3522287 , 0.2439721 , 0.45511386, 0.27505797, 0.26671076,
        0.57771575],
       [0.35608748, 0.23756261, 0.4414622 , 0.28245464, 0.23262325,
        0.59253705],
       [0.29475608, 0.2650123 , 0.44167787, 0.26924255, 0.48752964,
        0.22723003],
       [0.31812957, 0.27619028, 0.47819   , 0.31691927, 0.46234998,
        0.2662389 ],
       [0.3164926 , 0.2661798 , 0.46387374, 0.29790193, 0.46588114,
        0.2803564 ],
       [0.30009446, 0.2605012 , 0.45341986, 0.27837786, 0.50885516,
        0.24349284],
       [0.30804166, 0

In [19]:
test_y_t[4]

array([[0.32380952, 0.3       , 0.47154472, 0.27799228, 0.46153846,
        0.26851852],
       [0.26666667, 0.2       , 0.3495935 , 0.19305019, 0.59615385,
        0.21296296],
       [0.33333333, 0.3       , 0.50406504, 0.32818533, 0.46153846,
        0.26851852],
       [0.32380952, 0.3       , 0.50406504, 0.30888031, 0.48076923,
        0.25925926],
       [0.32380952, 0.26666667, 0.43902439, 0.22007722, 0.5       ,
        0.27777778],
       [0.38095238, 0.3       , 0.50406504, 0.35907336, 0.23076923,
        0.62037037],
       [0.38095238, 0.29166667, 0.46341463, 0.32818533, 0.21153846,
        0.59259259],
       [0.31428571, 0.29166667, 0.44715447, 0.27799228, 0.46153846,
        0.25925926],
       [0.32380952, 0.29166667, 0.47154472, 0.30501931, 0.48076923,
        0.25925926],
       [0.31428571, 0.26666667, 0.44715447, 0.25868726, 0.5       ,
        0.25925926],
       [0.31428571, 0.26666667, 0.46341463, 0.24710425, 0.5       ,
        0.25925926],
       [0.34285714, 0

In [20]:
pred_true = y_sc.inverse_transform(pred)

In [24]:
y_true = y_sc.inverse_transform(test_y_t[4])

In [30]:
for tmp in pred_true[-30:, 5]:
    print(tmp)

-65.702774
-56.493683
-29.359385
-21.942768
-63.37257
-65.66274
-60.866127
-60.646095
-57.022327
-47.769775
-50.22336
-73.02804
-67.34671
-63.75627
-62.916035
-67.07828
-40.220215
-34.98554
-66.35213
-60.834206
-56.77393
-60.94502
-54.61152
-27.17166
-25.645466
-53.064674
-49.805584
-53.40757
-56.967865
-54.517456


In [29]:
for tmp in y_true[-30:, 5]:
    print(tmp)

-64.0
-62.0
-27.0
-35.0
-64.0
-62.99999999999999
-64.0
-62.99999999999999
-61.0
-25.999999999999996
-30.000000000000004
-64.0
-64.0
-62.0
-62.99999999999999
-61.0
-29.0
-28.000000000000007
-62.99999999999999
-62.99999999999999
-64.0
-62.99999999999999
-61.0
-25.999999999999996
-29.0
-77.0
-62.99999999999999
-62.0
-62.99999999999999
-59.0


In [27]:
date = date[1:]

NameError: name 'date' is not defined

In [37]:
for tmp in date[-30:]:
    print(tmp)

2020-08-12
2020-08-13
2020-08-14
2020-08-15
2020-08-16
2020-08-17
2020-08-18
2020-08-19
2020-08-20
2020-08-21
2020-08-22
2020-08-23
2020-08-24
2020-08-25
2020-08-26
2020-08-27
2020-08-28
2020-08-29
2020-08-30
2020-08-31
2020-09-01
2020-09-02
2020-09-03
2020-09-04
2020-09-05
2020-09-06
2020-09-07
2020-09-08
2020-09-09
2020-09-10


In [50]:
for tmp in Original_mobility[:-30, 0]:
    print(tmp)

-13.0
-4.0
-2.0
-4.0
-4.0
-4.0
-9.0
-3.0
-5.0
-6.0
-11.0
-8.0
-9.0
-13.0
-15.0
-21.0
-8.0
-20.0
-14.0
-14.0
-18.0
-26.0
-13.0
-20.0
-9.0
-10.0
-10.0
-29.0
-10.0
-13.0
-9.0
-9.0
-3.0
-15.0
-13.0
-13.0
-13.0
-8.0
-6.0
-10.0
-20.0
-46.0
-63.0
-24.0
-23.0
-38.0
-28.0
-30.0
-42.0
-50.0
-29.0
-32.0
-45.0
-47.0
-48.0
-52.0
-59.0
-65.0
-47.0
-49.0
-52.0
-53.0
-72.0
-54.0
-63.0
-53.0
-57.0
-56.0
-57.0
-59.0
-61.0
-59.0
-53.0
-57.0
-54.0
-57.0
-61.0
-60.0
-60.0
-57.0
-68.0
-53.0
-55.0
-55.0
-55.0
-54.0
-53.0
-53.0
-53.0
-54.0
-64.0
-48.0
-55.0
-57.0
-52.0
-54.0
-53.0
-49.0
-46.0
-46.0
-47.0
-43.0
-44.0
-42.0
-38.0
-39.0
-41.0
-35.0
-37.0
-37.0
-37.0
-35.0
-30.0
-34.0
-33.0
-33.0
-38.0
-33.0
-40.0
-31.0
-29.0
-27.0
-27.0
-29.0
-36.0
-20.0
-23.0
-36.0
-23.0
-26.0
-27.0
-25.0
-20.0
-28.0
-22.0
-28.0
-30.0
-22.0
-29.0
-26.0
-23.0
-33.0
-25.0
-27.0
-30.0
-27.0
-25.0
-22.0
-28.0
-28.0
-29.0
-24.0
-33.0
-29.0
-24.0
-26.0
-23.0
-22.0
-34.0
-30.0
-34.0
-30.0
-28.0
-27.0
-27.0
-25.0
-28.0
-26.0
-25.0
-26.

In [52]:
for tmp in date[:-30]:
    print(tmp)

2020-02-15
2020-02-16
2020-02-17
2020-02-18
2020-02-19
2020-02-20
2020-02-21
2020-02-22
2020-02-23
2020-02-24
2020-02-25
2020-02-26
2020-02-27
2020-02-28
2020-02-29
2020-03-01
2020-03-02
2020-03-03
2020-03-04
2020-03-05
2020-03-06
2020-03-07
2020-03-08
2020-03-09
2020-03-10
2020-03-11
2020-03-12
2020-03-13
2020-03-14
2020-03-15
2020-03-16
2020-03-17
2020-03-18
2020-03-19
2020-03-20
2020-03-21
2020-03-22
2020-03-23
2020-03-24
2020-03-25
2020-03-26
2020-03-27
2020-03-28
2020-03-29
2020-03-30
2020-03-31
2020-04-01
2020-04-02
2020-04-03
2020-04-04
2020-04-05
2020-04-06
2020-04-07
2020-04-08
2020-04-09
2020-04-10
2020-04-11
2020-04-12
2020-04-13
2020-04-14
2020-04-15
2020-04-16
2020-04-17
2020-04-18
2020-04-19
2020-04-20
2020-04-21
2020-04-22
2020-04-23
2020-04-24
2020-04-25
2020-04-26
2020-04-27
2020-04-28
2020-04-29
2020-04-30
2020-05-01
2020-05-02
2020-05-03
2020-05-04
2020-05-05
2020-05-06
2020-05-07
2020-05-08
2020-05-09
2020-05-10
2020-05-11
2020-05-12
2020-05-13
2020-05-14
2020-05-15

In [16]:
X_data.shape

(209, 47)

In [18]:
from sklearn import datasets, ensemble
# from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

params = {'n_estimators': 500,
          'max_depth': 4,
          'min_samples_split': 5,
          'learning_rate': 0.01,
          'loss': 'ls'}

# reg = ensemble.GradientBoostingRegressor(**params)
# reg.fit(X_data, y_train)

In [20]:
reg = ensemble.GradientBoostingRegressor(**params)
reg.fit(X_data[:-30], mobility[:-30, 0])

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.01, loss='ls', max_depth=4, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=5, min_weight_fraction_leaf=0.0,
             n_estimators=500, n_iter_no_change=None, presort='auto',
             random_state=None, subsample=1.0, tol=0.0001,
             validation_fraction=0.1, verbose=0, warm_start=False)

In [21]:
pred = reg.predict(X_data[-30:])

In [33]:
pred = np.concatenate([pred.reshape(-1, 1), np.zeros((30, 5))], axis=1)
pred_true = y_sc.inverse_transform(pred)
for tmp in pred_true[:, 0]:
    print(tmp)

-29.684424369901482
-29.47506776132603
-29.233705375329308
-29.374935214761646
-26.795182336679517
-26.37970409190207
-27.20903436031242
-26.94649261470788
-26.458994044326392
-26.19645229872185
-24.64231673107128
-26.99325338713552
-26.59145836525616
-26.84726558503216
-26.84726558503216
-25.701285968649
-26.113743358923603
-25.55469342652943
-27.00487802934404
-26.985682704144185
-29.228884773163486
-26.97318101354288
-26.82527240454152
-26.961683526737467
-24.984314376049735
-26.724222408835757
-25.898037644443413
-29.200696860827456
-21.526336732429062
-21.80683135614225


In [26]:
y_sc.data_min_

array([-72., -34., -66.,   0., -81.,   0.])

In [32]:
`np.concatenate([pred.reshape(-1, 1), np.zeros((30, 5))], axis=1)

array([[0.60450822, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.60749903, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.61094707, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.6089295 , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.64578311, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.65171851, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.63987094, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.64362153, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.6505858 , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.6543364 , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.67653833, 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.64295352, 0

In [35]:
all_data[4]

array([[  2.,   0.,   0., ...,   0.,  -2.,   6.],
       [  2.,   0.,   0., ...,  10., -51.,   0.],
       [  2.,   0.,   0., ...,   2.,  -6.,   1.],
       ...,
       [  2.,  55.,   0., ...,  17., -62.,   2.],
       [  2.,  55.,   0., ...,  20., -63.,   3.],
       [  2.,  50.,   0., ...,  16., -59.,   4.]])

In [40]:
model.save("model3.h5")

In [3]:
from keras.utils.vis_utils import plot_model

plot_model(model, to_file="model.png")