In [None]:
import plotly.express as px
import numpy as np
import pandas as pd

import pymc3 as pm
import theano
import theano.tensor as tt
import theano.tensor.slinalg

import matplotlib.pyplot as plt 

from urllib.request import urlopen
import json

import geopandas
import ast

import os

import sys
sys.path.append('../src/')

from utils.ckm_plotting import plot_rt, gen_dropdown
from utils.state_abbreviations import state_abbr_map, state_abbr_map_r

from generate_rt import create_case_pop_df
from generate_rt import calc_p_delay, confirmed_to_onset, adjust_onset_for_right_censorship
from generate_rt import MCMCModel

import gc

from generate_rt import df_from_model, create_and_run_model, regional_rt_model

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from src.utils.p_delay_default import P_DELAY

In [None]:
from src.utils.load_data import load_uk_population_df
from src.utils.load_data import load_uk_confirmed_cases_df

In [None]:
import geopandas as gp

In [None]:
uk_cases_df = load_uk_confirmed_cases_df()
uk_cases_df = uk_cases_df.rename(columns={'areaName':'county'})
uk_cases_df.head()

In [None]:
mapping_df = pd.read_csv('../data/uk/ulta_region_mapping.csv')
ltla_region_map = mapping_df[['LAD18CD','RGN18NM']].drop_duplicates().set_index('LAD18CD').to_dict()['RGN18NM']
mapping_df.head()

In [None]:
uk_cases_df['State'] = uk_cases_df['areaCode'].map(ltla_region_map)

uk_cases_df = uk_cases_df.rename(columns={'specimenDate':'date'})

uk_cases_df['date'] = pd.to_datetime(uk_cases_df['date'])

In [None]:
uk_pop_df = load_uk_population_df(population_df_path='../data/uk/population.xls')
uk_pop_df.head()

In [None]:
pop_fips_col = 'areaCode'
case_fips_col = 'areaCode'

case_county_col = 'county'
case_state_col = 'State'

cum_cases_col = 'totalLabConfirmedCases'
date_col = 'date'

cases_pop_df = pd.merge(
        left=uk_cases_df,
        right=uk_pop_df.rename(columns={pop_fips_col:case_fips_col}),
        left_on=case_fips_col,
        right_on=case_fips_col,
        how='left'
    ).drop_duplicates()


cases_pop_df['County_State'] = cases_pop_df[case_county_col].str.title()\
            + ' ' + cases_pop_df[case_state_col].str.upper()
# cases_pop_df['active_cases'] = cases_pop_df['cases'] - cases_pop_df['cases'].shift(14).fillna(0)
# cases_pop_df['new_cases'] = cases_pop_df['cases'].diff()

##############################################################


append_list = []
for n, g in cases_pop_df.groupby('County_State'):
    g.sort_values(date_col, inplace=True)
    g['new_cases'] = g[cum_cases_col].diff().rolling(7,
        win_type='gaussian',
        min_periods=1,
        center=True).mean(std=2).round()
    g['active_cases'] = g[cum_cases_col] - g[cum_cases_col].shift(14).fillna(0)
    append_list.append(g)
cases_pop_df = pd.concat(append_list)


In [None]:
cases_pop_df[case_state_col] = cases_pop_df[case_state_col].str.upper()

In [None]:
cases_pop_df.head()

In [None]:
FILTERED_STATES = cases_pop_df['State'].unique().tolist()[:10]

agg_level = 'County_State'

OUTPUT_PATH = '../../DATA/UK/'

label = 'county'

if not os.path.exists(OUTPUT_PATH):
        os.mkdir(OUTPUT_PATH)

err_list_overall = []
for i, STATE in enumerate(FILTERED_STATES):
    
        print (f'{STATE} : {i+1} of {len(FILTERED_STATES)} states...')
        subset_df = cases_pop_df[cases_pop_df['State'] == STATE]

        print(f'DEBUGGING: {subset_df.shape}')
        if (subset_df.shape[0]) == 0:
            err_list_overall.append(STATE)
            print (f'{STATE} appears to be missing from the data set.')

        results, err_list = regional_rt_model(subset_df, 
                case_col='new_cases', 
                region_col=agg_level,
                output_path=OUTPUT_PATH + f'rt_{label}_{STATE}.csv'
                )

        err_list_overall = err_list_overall + err_list


In [None]:
ncols = 4
nrows = int(np.ceil(results.index.levels[0].shape[0] / ncols))

fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(14, nrows*3),
    sharey='row')

for ax, (county_state, result) in zip(axes.flat, results.groupby('region')):
    plot_rt(county_state, result.droplevel(0), ax)

fig.tight_layout()
fig.set_facecolor('w')

In [None]:
rt_county_df = pd.concat([pd.read_csv(OUTPUT_PATH+f) for f in os.listdir(OUTPUT_PATH) if 'rt_county_' in f], ignore_index=True)
for col in ['areaCode', 'State']:
    region_code_map = cases_pop_df[['County_State',col]].set_index('County_State').to_dict()[col]
    rt_county_df[col] = rt_county_df['region'].map(region_code_map)

rt_county_df['state'] = rt_county_df['State'].apply(lambda x: '_'.join(x.split()))
# rt_county_df['date'] = pd.to_datetime(rt_county_df['date'], errors='coerce')
rt_county_df = rt_county_df.rename(columns={'areaCode':'countyFIPS'})
rt_county_df.to_csv('../../DATA/UK/rt_county.csv')

rt_county_df.head()

# Rt Plots

In [None]:
SAMPLE_FREQUENCY = 7# in days
DATE_SUBSET = [date for i, date in enumerate(np.sort(rt_county_df.date.unique().tolist())) if i%SAMPLE_FREQUENCY==0]

In [None]:
import ast

fips_col = 'lad19cd'
ltlas_df = gp.read_file('https://c19pub.azureedge.net/assets/geo/ltlas_v1.geojson')
ltlas_df = ltlas_df.rename(columns={fips_col:'id'})
ltlas_df.head()

In [None]:
ckm_color_palette = [
    'rgb(208,209,230)',
    'rgb(232,189,233)',
    'rgb(222,159,223)',
    'rgb(202,100,204)',
    'rgb(168,56,170)',
    'rgb(138,46,140)',
    'rgb(124,41,125)',
    'rgb(109,36,111)',
    'rgb(95,32,96)',
    'rgb(66,22,66)',
    'rgb(51,17,52)',
]

color_palette = ckm_color_palette

custom_color_scale = []
for i,colors in enumerate(color_palette):
    custom_color_scale += [[i/len(color_palette), colors]]
    custom_color_scale += [[(i+1)/len(color_palette), colors]]

In [None]:
#rt_county_df['date'] = pd.to_datetime(rt_county_df['date'], format="%Y-%m-%d",errors='coerce')
fig = px.choropleth(
    data_frame=rt_county_df,#[(rt_county_df['date']==rt_county_df['date'].max())],
    locations='countyFIPS',
    geojson=ast.literal_eval(ltlas_df.to_json()),
    hover_name='region',
    animation_frame='date',
    featureidkey='properties.id',
    color='mean',
    color_continuous_scale=custom_color_scale

)

fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
rt_county_df.date.head()

In [None]:
len(rt_county_df['date'].unique().tolist())

In [None]:
ltlas_df..apply(lambda x: type(x)).value_counts(1,dropna=False)

In [None]:
rt_county_df.region.apply(lambda x: type(x)).value_counts(1,dropna=False)

In [None]:
uk_cases_df.county.unique().tolist()

In [None]:
import requests

cdf = requests.get('https://coronavirus.data.gov.uk/downloads/json/coronavirus-cases_latest.json').json()

In [None]:
rt_county_df.head()

In [None]:
pd.DataFrame(cdf['countries']).areaName.value_counts(1)

In [None]:
test_df = pd.read_json('https://c19downloads.azureedge.net/downloads/data/countries_latest.json', orient='index')
test_df.head()

In [None]:
q = requests.get('https://c19downloads.azureedge.net/downloads/data/countries_latest.json')
q.json()['']

In [None]:
ast.literal_eval(q.json()['S92000003'] )

In [None]:
gp.read_file('https://c19downloads.azureedge.net/downloads/data/countries_latest.json')

In [None]:
'''
- ``'split'`` : dict like
      ``{index -> [index], columns -> [columns], data -> [values]}``
    - ``'records'`` : list like
      ``[{column -> value}, ... , {column -> value}]``
    - ``'index'`` : dict like ``{index -> {column -> value}}``
    - ``'columns'`` : dict like ``{column -> {index -> value}}``
    - ``'values'`` : just the values array'''

In [None]:
from utils.rt_plotting import map_rt
from tqdm import tqdm

In [None]:
from utils.rt_plotting import animate_state, animate_country

In [None]:
SAMPLE_FREQUENCY = 7# in days
DATE_SUBSET = [date for i, date in \
    enumerate(np.sort(rt_county_df.date.unique().tolist())) if i%SAMPLE_FREQUENCY==0]
OUTPUT_DIR = '../output/'
COUNTRY_NAME = 'UK'
print ("Creating Country Plots....")
## Country Map
COUNTRY_DIR = OUTPUT_DIR+f'{COUNTRY_NAME}/country/'
if not os.path.exists(COUNTRY_DIR):
    os.mkdir(COUNTRY_DIR)

for l in ['state','county']:


    fig = animate_country(
                    # state=st, 
                    data_df=rt_county_df,
                    geojson_df=ltlas_df,
                    disp_col='mean',
                    date_list=DATE_SUBSET,
                    animate=False,
                    save_path=COUNTRY_DIR+f'country_{l}_static.html',
                    scope=None
                )
# fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
rt_county_df['state'].unique().tolist()

In [None]:
# rt_test = pd.read_csv('../data/rt_county/rt_county.csv')
rt_test = pd.read_csv('../../DATA/UK/rt_county.csv')
rt_test.head()

In [None]:
rt_test.date.apply(lambda x: len(x)).unique().tolist()

In [None]:
list(cases_pop_df.groupby(['State']).County_State.nunique().to_dict().keys())