In [None]:
import pandas as pd
import numpy as np
import csv

from datetime import datetime, date
import matplotlib.pyplot as plt

input_features = [
    'pct_cmnty_cli',
    'pct_cli',
    'pct_cli_anosmia_ageusia',
    'pct_hh_cli',
]

In [None]:
newcases = (pd.read_csv("../mcmc_model_data/time_series_Confirmed_CMU_fips.csv")).iloc[:,1:]
newcases.iloc[:,2:] = np.array(newcases.iloc[:,2:]) - np.array(newcases.iloc[:,1:-1])
indnames = newcases.pop("FIPS")
newcases = newcases.rename(index = indnames,
                           columns = lambda x: datetime.strptime(x, "%m/%d/%y").strftime("%Y-%m-%d")).T
newcases = newcases.rolling(7).mean().iloc[6:,:].T

R = pd.read_csv("../mcmc_model_data/RValuesCMU_fips.csv", index_col='fips')
R.pop("Jurisdiction")
R = R.loc[R.index != 44003,:]
fips = np.array([12086, 4013, 6037, 12011,
                 48201, 48113, 6059, 32003,
                 12057, 12095, 6065, 48029,
                 6071, 12099, 12031,
                 48439, 6073, 48215])
idx = [x in fips for x in R.index]
R = R.loc[idx,:]
R = R.sort_index()

# The index for newcases is the fips value
idx = [x in fips for x in newcases.index]
newcases = newcases.loc[idx,:]
newcases = newcases.sort_index()
newcases_dates = [datetime.strptime(x,"%Y-%m-%d").date() for x in newcases.columns]

cmudf = pd.read_csv("../overall-county-smoothed.csv").query("gender=='overall' & age_bucket=='overall'")

# Set the index as the identifier for the extracted features
def extract_feature(col_name):
    df = pd.pivot_table(cmudf, values=col_name, index='fips', columns='date',
                        fill_value = 0, aggfunc = np.mean)
    idx = [x in fips for x in df.index]
    df = df.loc[idx,:]
    df = df.sort_index()
    return df

cli = extract_feature("smoothed_pct_cli")

R_dates = [datetime.strptime(x,"%Y-%m-%d").date() for x in R.columns[1:]]
cli_dates = [datetime.strptime(x,"%Y-%m-%d").date() for x in cli.columns]
R = R.loc[:,[False,*[np.min(cli_dates) <= d <= np.max(cli_dates) for d in R_dates]]]

R_dates = [datetime.strptime(x,"%Y-%m-%d").date() for x in R.columns]
newcases = newcases.loc[:,[*[np.min(cli_dates) <= d <= np.max(cli_dates) for d in newcases_dates]]]
newcases_dates = [datetime.strptime(x,"%Y-%m-%d").date() for x in newcases.columns]
newcases = newcases.loc[:,[*[np.min(R_dates) <= d <= np.max(R_dates) for d in newcases_dates]]]

np_newcases = newcases.to_numpy().clip(min=0)
np_newcases = np_newcases.reshape(np_newcases.shape[0], np_newcases.shape[1], 1)

np_inp = np.ndarray([*R.shape, 0], dtype = np.float32)
for name in input_features:
    feature = extract_feature(f"smoothed_{name}_weighted")
    feature_dates = [datetime.strptime(x,"%Y-%m-%d").date() for x in feature.columns]
    feature = feature.loc[:,[np.min(R_dates) <= d <= np.max(R_dates) for d in feature_dates]]
    np_inp = np.concatenate((np_inp, feature.to_numpy().reshape([*feature.shape,1])), axis=2)
np_inp = np.concatenate((np_inp, np_newcases), axis=2)
np_R = R.to_numpy()

In [None]:
d = next(i for i,d in enumerate(R_dates) if d >= date(2020, 6, 17))
pred_inp = np_inp[:,range(d-20,d+1),:]
for i in range(pred_inp.shape[0]):
    pred_inp[i,:,-1] = pred_inp[i,:,-1] / pred_inp[i,:,-1].max() * 100
pred_inp /= 100
pred_inp.shape

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
cli_model = keras.models.load_model("cli_model")
cli_R = 3.9287667 * cli_model.predict(pred_inp)
cases_model = keras.models.load_model("cases_model")
cases_R = 3.9287667 * cases_model.predict(pred_inp[:,:,-1].reshape((18,21,1)))

In [None]:
actual_R = np_R[:,range(d + 1, d + 22)]
df = pd.DataFrame({"Prediction Cases": cases_R[:,6],
                   "Prediction CLI": cli_R[:,6],
                   "Measured": actual_R[:,6],
                   "Constant R": actual_R[:,0],})
#(actual_R - pred_R)[:,6]
df

In [None]:
from datetime import timedelta
tmp = pd.DataFrame(dict([(date(2020,6,18) + timedelta(days=i), cases_R[:,i]) for i in range(0, cases_R.shape[1])]),
                   index = fips)
tmp.to_csv("cases_Rs_0617.csv", index_label = "fips")
tmp = pd.DataFrame(dict([(date(2020,6,18) + timedelta(days=i), cli_R[:,i]) for i in range(0, cli_R.shape[1])]),
                   index = fips)
tmp.to_csv("cli_Rs_0617.csv", index_label = "fips")
tmp