In [4]:
import numpy as np
import pandas as pd
import math
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_addons as tfa
from sklearn.preprocessing import MinMaxScaler

In [5]:
df = pd.read_csv("data2.csv")

In [6]:
cols = ["class", "tsa", "visits_hotels", "visits_restaurants", "visits_retail", "visits_personal_care", "visits_gyms", "visits_health", "temperature", "flu", "tests", "hospitalized"]
state_cols = ["density", "svi"]

In [7]:
y_series = df["class"]
X_series = df.copy()
wind = 14
num_classes = 20
min_max_scaler = MinMaxScaler()
for col in cols + state_cols:
    X_series[[col]] = min_max_scaler.fit_transform(X_series[[col]])
X = np.empty((len(df) - wind, wind, len(cols)))
include_set = []
for i in range(len(df) - wind):
    X[i] = np.array(X_series[cols][i:i+wind])
    include_set.append(len(X_series["state"][i:i+wind].unique()) == 1)
X = X[include_set]
    
X_state = np.array(df[state_cols][(wind-1):-1])
X_state = X_state[include_set]

y = np.array(y_series[wind:])
y_hot = np.zeros((len(y), num_classes))
y_hot[np.arange(y.size),y] = 1
y_hot = y_hot[include_set]

In [8]:
X_train, X_test, X_state_train, X_state_test, y_train, y_test = train_test_split(X, X_state, y_hot, test_size=0.25)

In [9]:
len(cols)

12

In [10]:
def construct_model():
    feat_inputs = tf.keras.Input(shape=(wind, 12))
    state_inputs = tf.keras.Input(shape=(2))
    lstm = tf.keras.layers.LSTM(120, return_sequences=True, input_shape=(wind, 12))
    lstm_out = lstm(feat_inputs)
    dropout2 = tf.keras.layers.Dropout(0.2)
    dropout2_out = dropout2(lstm_out)
    lstm2 = tf.keras.layers.LSTM(120, return_sequences=False)
    lstm2_out = lstm2(dropout2_out)
    dense1 = tf.keras.layers.Dense(3)
    dense1_out = dense1(state_inputs)
    dense1_out.shape
    concat = tf.keras.layers.concatenate((tf.keras.layers.Flatten()(lstm2_out), dense1_out), axis=1)
    dropout = tf.keras.layers.Dropout(0.2)
    dropout_out = dropout(concat)
    dense2 = tf.keras.layers.Dense(num_classes, activation="softmax")
    dense2_out = dense2(dropout_out)
    model = tf.keras.Model(inputs=(feat_inputs, state_inputs), outputs=dense2_out, name="model")
    model.compile(loss=tf.losses.CategoricalCrossentropy(),
                    optimizer=tf.optimizers.Adam(),
                    metrics=[tfa.metrics.F1Score(num_classes=num_classes,average="micro")])
    return model

In [11]:
models = []
scores = []
for i in range(10):
    print("Training %d" % i)
    model = construct_model()
    model.fit((X_train, X_state_train), y_train, epochs=40, batch_size=16, validation_data=((X_test, X_state_test), y_test))
    models.append(model)
    scores.append(model.evaluate((X_test, X_state_test), y_test))

Training 0
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Training 1
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Training 2
Epoch 1/40
Epoch 2/40
Epo

In [None]:
start_idx = wind - 5
unequal = list(filter(lambda x: np.where(x[1]==1)[0] != x[0][start_idx][0] * 4 or np.where(x[1]==1)[0] != x[0][start_idx + 1][0] * 4 or np.where(x[1]==1)[0] != x[0][start_idx + 2][0] * 4 or np.where(x[1]==1)[0] != x[0][start_idx + 3][0] * 4 or np.where(x[1]==1)[0] != x[0][start_idx + 4][0] * 4, zip(X, y_hot, X_state)))k'jhrl w

In [None]:
model.evaluate((np.array([x[0] for x in unequal]), np.array([x[2] for x in unequal])), np.array([x[1] for x in unequal]))

In [243]:
model.metrics_names

['loss', 'f1_score']

In [140]:
X

array([[[0.000000e+00, 2.280522e+06, 9.757000e+03, ..., 4.333100e+00,
         7.500000e+01, 0.000000e+00],
        [0.000000e+00, 2.089641e+06, 9.475000e+03, ..., 6.582710e+00,
         1.880000e+02, 0.000000e+00],
        [0.000000e+00, 1.736393e+06, 9.707000e+03, ..., 6.582710e+00,
         2.260000e+02, 0.000000e+00],
        [0.000000e+00, 1.877401e+06, 1.047300e+04, ..., 6.582710e+00,
         2.510000e+02, 0.000000e+00],
        [0.000000e+00, 2.130015e+06, 1.144900e+04, ..., 6.582710e+00,
         3.740000e+02, 0.000000e+00]],

       [[0.000000e+00, 2.089641e+06, 9.475000e+03, ..., 6.582710e+00,
         1.880000e+02, 0.000000e+00],
        [0.000000e+00, 1.736393e+06, 9.707000e+03, ..., 6.582710e+00,
         2.260000e+02, 0.000000e+00],
        [0.000000e+00, 1.877401e+06, 1.047300e+04, ..., 6.582710e+00,
         2.510000e+02, 0.000000e+00],
        [0.000000e+00, 2.130015e+06, 1.144900e+04, ..., 6.582710e+00,
         3.740000e+02, 0.000000e+00],
        [0.000000e+00, 2.1

In [141]:
y

array([0, 0, 0, ..., 3, 3, 3])

In [12]:
print("\n".join([str(x[1]) for x in scores]))

0.8248299360275269
0.8200112581253052
0.8279478549957275
0.8083900213241577
0.8137755393981934
0.8222789168357849
0.8001700639724731
0.8123582601547241
0.8146258592605591
0.8197278380393982


In [13]:
print(np.mean(np.array([x[1] for x in scores])))

0.816411554813385
