In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import pandas as pd
import numpy as np
from functools import reduce
from statsmodels.tsa.stattools import adfuller
from sklearn.model_selection import train_test_split

from keras.layers import (
    Conv1D,
    Dense,
    Dropout,
    Input,
    Concatenate,
    GlobalMaxPooling1D,
    MaxPooling1D,
    Flatten,
)
from keras.models import Model, Sequential

<IPython.core.display.Javascript object>

## within def function

In [3]:
# Step 2. Read file
X_countyname = pd.read_csv("X_countyname.csv").drop(
    [
        "Unnamed: 0",
    ],
    axis=1,
)

<IPython.core.display.Javascript object>

In [4]:
input = "Lauderdale County, Alabama"

<IPython.core.display.Javascript object>

In [5]:
X_minus_selected = X_countyname[X_countyname["countyname"] != input]

<IPython.core.display.Javascript object>

In [6]:
# ensure all timeseries are all of the same length(86 days)
check_len_timeseries = []
counties = []

for county in X_minus_selected["countyname"].unique():
    subset = X_minus_selected[X_minus_selected["countyname"] == county]
    if (
        subset[["new_case_rate", "gps_away_from_home", "spend_all"]].to_numpy().shape[0]
        == 86
    ):
        check_len_timeseries.append(
            subset[["new_case_rate", "gps_away_from_home", "spend_all"]].to_numpy()
        )
        counties.append(county)

<IPython.core.display.Javascript object>

In [8]:
len(counties)

1308

<IPython.core.display.Javascript object>

In [9]:
# create X
X = (
    np.concatenate(check_len_timeseries, axis=0).reshape(1308, 86, 3).astype(np.float32)
)  # sample, timesteps, features

<IPython.core.display.Javascript object>

In [10]:
# Step 2. Read file
y_countyname = pd.read_csv("y_countyname.csv").drop(
    [
        "Unnamed: 0",
    ],
    axis=1,
)

<IPython.core.display.Javascript object>

In [11]:
y_minus_selected = y_countyname[y_countyname["countyname"] != input]

<IPython.core.display.Javascript object>

In [12]:
# create y
total_perc_change = []

for county in counties:
    subset = y_minus_selected[y_minus_selected["countyname"] == county]
    total_perc_change.append(
        reduce(lambda x, y: x + y + x * y, subset["emp_incbelowmed"], 1)
    )

y = np.array(total_perc_change).astype(np.float32)

<IPython.core.display.Javascript object>

In [13]:
# split to train, test, split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

<IPython.core.display.Javascript object>

# add selected county back to test data

In [14]:
X_selected = X_countyname[X_countyname["countyname"] == input][
    ["new_case_rate", "gps_away_from_home", "spend_all"]
].to_numpy()

<IPython.core.display.Javascript object>

In [15]:
X_test.shape

(262, 86, 3)

<IPython.core.display.Javascript object>

In [19]:
X_selected.shape

(86, 3)

<IPython.core.display.Javascript object>

In [20]:
# add selected back to test
X_test = np.vstack((X_test, X_selected.reshape(1, 86, 3)))

<IPython.core.display.Javascript object>

In [23]:
y_subset = y_countyname[y_countyname["countyname"] == input]
y_selected = reduce(lambda x, y: x + y + x * y, y_subset["emp_incbelowmed"], 1)

<IPython.core.display.Javascript object>

In [28]:
y_selected = np.array(y_selected).astype(np.float32)

<IPython.core.display.Javascript object>

In [33]:
y_test = np.append(y_test, y_selected)

<IPython.core.display.Javascript object>

In [34]:
n_timesteps, n_features, n_outputs = (
    X_train.shape[1],
    X_train.shape[2],
    1,
)

<IPython.core.display.Javascript object>

In [35]:
def create_model(verbose=0, epochs=10, batch_size=32):
    model = Sequential()
    # solves what is called the vanishing gradient problem whereby
    # the neural network would not be able to feed back important gradient information
    # from the output layer back to the input layer
    model.add(
        Conv1D(
            filters=10,
            kernel_size=1,
            activation="relu",  # popular with regression neural nets
            input_shape=(n_timesteps, n_features),
        )
    )
    model.add(Conv1D(filters=10, kernel_size=1, activation="relu"))
    model.add(Dropout(0.5))
    # model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation="relu"))  # try activation='elu' ???
    # Initializers define the way to set the initial random weights of Keras layers.
    model.add(Dense(n_outputs, kernel_initializer="normal", activation="linear"))
    return model

<IPython.core.display.Javascript object>

In [36]:
# create the model
model = create_model()
model.compile(
    loss="mean_absolute_error", optimizer="adam", metrics=["mean_absolute_error"]
)
model.fit(X_train, y_train)
weights = model.get_weights()



<IPython.core.display.Javascript object>

In [37]:
## create single item model
single_item_model = create_model(batch_size=1)
single_item_model.set_weights(weights)
single_item_model.compile(
    loss="mean_absolute_error", optimizer="adam", metrics=["mean_absolute_error"]
)

<IPython.core.display.Javascript object>

In [40]:
single_item_model.predict(
    np.array(X_test[-1], ndmin=3)
)  # pass counties.index('countyname')

array([[-0.271874]], dtype=float32)

<IPython.core.display.Javascript object>