## Imports, data, checks

In [294]:
import numpy as np
import requests
import pandas as pd
import bokeh
import datetime
import colorcet as cc
import json
import itertools
import time
from datetime import datetime as dt, date
from jinja2 import Environment, FileSystemLoader
from urllib.request import urlopen
from bokeh.models import CategoricalColorMapper, NumeralTickFormatter, HoverTool
from bokeh.models import ColumnDataSource, Grid, LinearAxis, Plot, VBar, Range1d, Span, BoxAnnotation, Label, LabelSet
from bokeh.plotting import output_notebook, figure
from bokeh.io import reset_output, show, output_file, save, curdoc
from bokeh.layouts import column, row, gridplot
from bokeh.palettes import *
from bokeh.transform import linear_cmap
from bokeh.embed import autoload_static, components, json_item
from bokeh.resources import CDN, INLINE


The vaccine, cases, and deaths source data were relatively easy to grab diretly from the [Larimer county dashboard](https://www.larimer.org/health/communicable-disease/coronavirus-covid-19/larimer-county-positive-covid-19-numbers#/app?tab=risk) as the CSVs download through urls.

In [295]:
larimer_vac_source = pd.read_csv(
    'https://speedtest.larimer.org/covid/index.php?file=vaccinations&csv')

larimer_cases_source = pd.read_csv(
    'https://speedtest.larimer.org/covid/cases.csv', parse_dates=['ReportedDate'])

larimer_deaths_source = pd.read_csv(
    'https://larimer-county-data-lake.s3-us-west-2.amazonaws.com/Public/covid/covid_deaths.csv?t=1631890252549')


The hospitalization data was much more tricky (at least finding a simple solution was tricky) I spent several hours in webscraping research and attempts purgatory. I checked BeautifulSoup, html5lib, lxml, etc. in multiple combinations and none of them had straightforward solutions because the table for hospitalizations is actually rendered through javascript so there is nothing to scrape without actually clicking the buttons. I started down the Selenium and phantomjs path but it seemed like a nightmare. I found this lifesaving article at [Towards Data Science](https://towardsdatascience.com/data-science-skills-web-scraping-javascript-using-python-97a29738353f) which shows how to find specific XHR request urls in the browser developer tools. The requested URL for the rendered table is a pretty vanilla json and not behind any authorization so there is a pretty clean way to get to it. Praise Satan I didn't have to use Selenium.  

In [296]:
url = 'https://larimer-county-data-lake.s3-us-west-2.amazonaws.com/Public/covid/covid_patient_trend.json?t=1632506827395'

response = urlopen(url)
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
larimer_hosp_source = pd.json_normalize(d['data'])


In [297]:
# make .csv backups of source data

larimer_vac_source.to_csv('larimer_vac_backup.csv')

larimer_cases_source.to_csv('larimer_cases_backup.csv')

larimer_deaths_source.to_csv('larimer_deaths_backup.csv')

larimer_hosp_source.to_csv('larimer_hosp_backup.csv')


Re-read the backup CSVs so that the notebook runs locally from this point forward.

In [298]:
larimer_vac = pd.read_csv('larimer_vac_backup.csv')

larimer_cases = pd.read_csv('larimer_cases_backup.csv')

larimer_deaths = pd.read_csv('larimer_deaths_backup.csv')

larimer_hosp = pd.read_csv('larimer_hosp_backup.csv')


The source date introduced a random single row from 1970 which is propagating to all the resulting DFs and visualizations so I have to add this dumb piece of code to remove it.

So now we have all of our dataframes

In [299]:
display(larimer_vac)

display(larimer_cases)

display(larimer_deaths)

display(larimer_hosp)


Unnamed: 0.1,Unnamed: 0,Date,daily number of doses received by Larimer County residents,total number of doses recevied by residents,daily number of residents receiving first dose,total number of residents receiving first dose,daily number of residents vaccinated,total number of residents vaccinated,daily number of 70+ vaccinated,total number of 70+ vaccinated,...,daily number of Latinx residents vaccinated,total of Latinx residents vaccinated,daily number of White non-Latinx residents vaccinated,total of White non-Latinx residents vaccinated,daily number of non-White non-Latinx residents vaccinated,total of non-White non-Latinx residents vaccinated,dailyUnknown,totalUnknown,daily_additional_doses,total_additional_doses
0,0,12/14/2020,32,32,32,32,1,1,0.0,0,...,0.0,0,1,1,0.0,0,,0,0,0
1,1,12/15/2020,13,45,13,45,0,1,,0,...,,0,0,1,0.0,0,,0,0,0
2,2,12/16/2020,303,348,303,348,0,1,0.0,0,...,0.0,0,0,1,0.0,0,0.0,0,0,0
3,3,12/17/2020,990,1338,990,1338,0,1,0.0,0,...,0.0,0,0,1,0.0,0,0.0,0,0,0
4,4,12/18/2020,1056,2394,1056,2394,2,3,0.0,0,...,0.0,0,2,3,0.0,0,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430,430,2/17/2022,163,613701,30,256316,43,235740,1.0,34737,...,8.0,13995,31,198755,3.0,14310,1.0,8680,92,138221
431,431,2/18/2022,487,614188,106,256422,110,235850,3.0,34740,...,15.0,14010,64,198819,20.0,14330,11.0,8691,271,138492
432,432,2/19/2022,212,614400,26,256448,61,235911,0.0,34740,...,12.0,14022,41,198860,2.0,14332,6.0,8697,127,138619
433,433,2/20/2022,67,614467,16,256464,12,235923,0.0,34740,...,0.0,14022,10,198870,0.0,14332,2.0,8699,40,138659


Unnamed: 0.1,Unnamed: 0,CaseCount,ReportedDate,Sex,Age,Type,City
0,0,1,2020-03-09,Female,52.0,Confirmed,Johnstown
1,1,2,2020-03-15,Male,49.0,Confirmed,Fort Collins
2,2,3,2020-03-17,Female,53.0,Confirmed,Fort Collins
3,3,4,2020-03-17,Female,94.0,Confirmed,Loveland
4,4,5,2020-03-18,Male,49.0,Confirmed,Fort Collins
...,...,...,...,...,...,...,...
75121,75121,77422,2022-02-24,Male,21.0,Confirmed,Fort Collins
75122,75122,77423,2022-02-24,Female,36.0,Confirmed,Fort Collins
75123,75123,77424,2022-02-24,Female,44.0,Confirmed,Berthoud
75124,75124,77425,2022-02-24,Female,45.0,Confirmed,Berthoud


Unnamed: 0.1,Unnamed: 0,death_id,death_date,age,gender,city,case_status,count
0,0,a0U5w00000edbfjEAA,2020-03-09,91,Female,Loveland,Probable,1
1,1,a0U5w00000edbfiEAA,2020-03-13,95,Female,Loveland,Probable,2
2,2,a0U5w00000edbfOEAQ,2020-03-15,90,Female,Loveland,Probable,3
3,3,a0U5w00000edbfJEAQ,2020-03-25,87,Female,Fort Collins,Confirmed,4
4,4,a0U5w00000edbfMEAQ,2020-03-25,74,Female,Loveland,Confirmed,5
...,...,...,...,...,...,...,...,...
462,462,a0U5w00000iX3ehEAC,2022-02-03,59,Male,Loveland,Confirmed,463
463,463,a0U5w00000fpAoDEAU,2022-02-03,90,Female,Estes Park,Confirmed,464
464,464,a0U5w00000fpAoOEAU,2022-02-03,94,Female,Berthoud,Confirmed,465
465,465,a0U5w00000fpBnzEAE,2022-02-05,89,Male,Fort Collins,Confirmed,466


Unnamed: 0.1,Unnamed: 0,Date,admission_count,kpi_admits_indicator,inpatient_count,kpi_patient_indicator,inpatient_count_pct_change
0,0,2020-03-31T00:00:00.000Z,,,47,0,
1,1,2020-04-01T00:00:00.000Z,,,46,0,
2,2,2020-04-02T00:00:00.000Z,,,46,0,
3,3,2020-04-03T00:00:00.000Z,2.0,0.0,46,0,
4,4,2020-04-04T00:00:00.000Z,1.0,0.0,42,0,
...,...,...,...,...,...,...,...
471,471,2022-02-16T00:00:00.000Z,5.0,0.0,40,0,-32.203390
472,472,2022-02-17T00:00:00.000Z,8.0,0.0,40,0,-32.203390
473,473,2022-02-18T00:00:00.000Z,1.0,0.0,34,0,-38.181818
474,474,2022-02-22T00:00:00.000Z,4.0,0.0,32,0,-25.581395


This looks like pretty good start. We'll have to make all the datetimes match and the **hospitalization** and **vaccine** data are daily totals while the **death** and **case counts** data is a case log (a row for each case) so we'll have to do some grouping to get that to match, that will come later.

## Explore, clean, manipulate

In [300]:
dfs = [larimer_vac, larimer_deaths, larimer_cases, larimer_hosp]


def get_obj_col():
    for df in dfs:
        print(list(df.select_dtypes(['object']).columns))


get_obj_col()


['Date']
['death_id', 'death_date', 'gender', 'city', 'case_status']
['ReportedDate', 'Sex', 'Type', 'City']
['Date']


---
I did this and don't like it
```python

dfs = [larimer_vac, larimer_deaths, larimer_cases, larimer_hosp]
df_names = ['larimer_vac', 'larimer_deaths', 'larimer_cases', 'larimer_hosp']


def get_obj_col():
    for df in dfs:
        obj_cols.append(list(df.select_dtypes(['object']).columns))
    zip(df_names, dfs)
    
obj_cols = []
get_obj_col()
zipped_list = zip(df_names, obj_cols)
print(tuple(zipped_list)
```
---

In [301]:
print(larimer_cases.dtypes)
print(larimer_hosp.dtypes)


Unnamed: 0        int64
CaseCount         int64
ReportedDate     object
Sex              object
Age             float64
Type             object
City             object
dtype: object
Unnamed: 0                      int64
Date                           object
admission_count               float64
kpi_admits_indicator          float64
inpatient_count                 int64
kpi_patient_indicator           int64
inpatient_count_pct_change    float64
dtype: object


Convert date columns from each df to datetimes

In [302]:
larimer_vac['Date'] = pd.to_datetime(larimer_vac['Date']).dt.tz_localize(None)
larimer_deaths['Date'] = pd.to_datetime(
    larimer_deaths['death_date']).dt.tz_localize(None)
larimer_cases['Date'] = pd.to_datetime(
    larimer_cases['ReportedDate']).dt.tz_localize(None)
larimer_hosp['Date'] = pd.to_datetime(
    larimer_hosp['Date']).dt.tz_localize(None)


```pd.to_datetime``` was sufficient for most of the dfs but the hospital data was TZ aware and I wanted all of them to match so had to add the ```.dt.tz_localize(None)``` 

In [303]:
def check_date_type():
    for df in dfs:
        print(list(df.select_dtypes(['datetime64']).columns))


check_date_type()


['Date']
['Date']
['Date']
['Date']


The source randomly introduced a dumbass entry from 1970 in the first row which was propogating to all the following DFs and visualizations so I had to remove it here.

In [304]:
larimer_hosp = larimer_hosp[larimer_hosp['Date']
                            > pd.to_datetime('2020-03-30')]
larimer_hosp.head()


Unnamed: 0.1,Unnamed: 0,Date,admission_count,kpi_admits_indicator,inpatient_count,kpi_patient_indicator,inpatient_count_pct_change
0,0,2020-03-31,,,47,0,
1,1,2020-04-01,,,46,0,
2,2,2020-04-02,,,46,0,
3,3,2020-04-03,2.0,0.0,46,0,
4,4,2020-04-04,1.0,0.0,42,0,


In [305]:
# create daily cases from case log
daily_cases = larimer_cases.groupby(['Date']).count().reset_index()

display(daily_cases)
display(daily_cases.dtypes)
print(f"Total case check {daily_cases['CaseCount'].sum()}")
display(daily_cases.describe())


Unnamed: 0.1,Date,Unnamed: 0,CaseCount,ReportedDate,Sex,Age,Type,City
0,2020-03-09,1,1,1,1,1,1,1
1,2020-03-15,1,1,1,1,1,1,1
2,2020-03-17,2,2,2,2,2,2,2
3,2020-03-18,1,1,1,1,1,1,1
4,2020-03-19,2,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...
706,2022-02-20,48,48,48,48,48,48,48
707,2022-02-21,54,54,54,54,54,54,54
708,2022-02-22,91,91,91,91,90,91,91
709,2022-02-23,70,70,70,70,70,70,70


Date            datetime64[ns]
Unnamed: 0               int64
CaseCount                int64
ReportedDate             int64
Sex                      int64
Age                      int64
Type                     int64
City                     int64
dtype: object

Total case check 75126


Unnamed: 0.1,Unnamed: 0,CaseCount,ReportedDate,Sex,Age,Type,City
count,711.0,711.0,711.0,711.0,711.0,711.0,711.0
mean,105.662447,105.662447,105.662447,105.662447,105.509142,105.662447,105.662447
std,144.263652,144.263652,144.263652,144.263652,144.140609,144.263652,144.263652
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,19.0,19.0,19.0,19.0,19.0,19.0,19.0
50%,67.0,67.0,67.0,67.0,67.0,67.0,67.0
75%,128.5,128.5,128.5,128.5,128.5,128.5,128.5
max,1099.0,1099.0,1099.0,1099.0,1097.0,1099.0,1099.0


In [306]:
# create daily deaths from death log
daily_deaths = larimer_deaths.groupby(['Date']).count().reset_index()

display(daily_deaths)
display(daily_deaths.dtypes)
print(f"Total death check {daily_deaths['count'].sum()}")
display(daily_deaths.describe())


Unnamed: 0.1,Date,Unnamed: 0,death_id,death_date,age,gender,city,case_status,count
0,2020-03-09,1,1,1,1,1,1,1,1
1,2020-03-13,1,1,1,1,1,1,1,1
2,2020-03-15,1,1,1,1,1,1,1,1
3,2020-03-25,2,2,2,2,2,2,2,2
4,2020-03-29,2,2,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...,...
274,2022-01-31,2,2,2,2,2,2,2,2
275,2022-02-02,1,1,1,1,1,1,1,1
276,2022-02-03,3,3,3,3,3,3,3,3
277,2022-02-05,1,1,1,1,1,1,1,1


Date           datetime64[ns]
Unnamed: 0              int64
death_id                int64
death_date              int64
age                     int64
gender                  int64
city                    int64
case_status             int64
count                   int64
dtype: object

Total death check 467


Unnamed: 0.1,Unnamed: 0,death_id,death_date,age,gender,city,case_status,count
count,279.0,279.0,279.0,279.0,279.0,279.0,279.0,279.0
mean,1.673835,1.673835,1.673835,1.673835,1.673835,1.673835,1.673835,1.673835
std,1.078433,1.078433,1.078433,1.078433,1.078433,1.078433,1.078433,1.078433
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
max,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0


In [307]:
daily_cases.set_index('Date', inplace=True)

daily_deaths.set_index('Date', inplace=True)

larimer_vac.set_index('Date', inplace=True)

larimer_hosp.set_index('Date', inplace=True)


In [308]:
# daily_cases.index = pd.to_datetime(daily_cases.index)
# daily_cases = daily_cases.resample("1D").mean()
# daily_cases


**Try this**

```python
x.dt = pd.to_datetime(x.dt)
```
One-liner using mostly @ayhan's ideas while incorporating stack/unstack and fill_value

```python
x.set_index(
    ['dt', 'user']
).unstack(
    fill_value=0
).asfreq(
    'D', fill_value=0
).stack().sort_index(level=1).reset_index()
```
**or this might be better**
```python
s.asfreq('D'))
```


In [309]:
larimer_hosp['admission_count'] = larimer_hosp['admission_count'].astype(
    "Int64")
larimer_hosp


Unnamed: 0_level_0,Unnamed: 0,admission_count,kpi_admits_indicator,inpatient_count,kpi_patient_indicator,inpatient_count_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-31,0,,,47,0,
2020-04-01,1,,,46,0,
2020-04-02,2,,,46,0,
2020-04-03,3,2,0.0,46,0,
2020-04-04,4,1,0.0,42,0,
...,...,...,...,...,...,...
2022-02-16,471,5,0.0,40,0,-32.203390
2022-02-17,472,8,0.0,40,0,-32.203390
2022-02-18,473,1,0.0,34,0,-38.181818
2022-02-22,474,4,0.0,32,0,-25.581395


In [310]:
larimer_hosp[larimer_hosp.index.duplicated()]


Unnamed: 0_level_0,Unnamed: 0,admission_count,kpi_admits_indicator,inpatient_count,kpi_patient_indicator,inpatient_count_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-12-15,428,6,0.0,81,1,5.194805
2021-12-15,429,6,0.0,81,1,8.0
2021-12-15,430,6,0.0,81,1,5.194805


This weird 'Unamed:0" column appeared when I switched to using dfs from the backup CSVs so I had to drop it in place to make the following duplicate drops work.


In [311]:
larimer_hosp.drop(['Unnamed: 0'], axis=1, inplace=True)


In [312]:
larimer_hosp.drop_duplicates(keep=False, inplace=True)


In [313]:
larimer_hosp[larimer_hosp.index.duplicated()]


Unnamed: 0_level_0,admission_count,kpi_admits_indicator,inpatient_count,kpi_patient_indicator,inpatient_count_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [314]:
daily_cases_filled = daily_cases.asfreq('D', fill_value=0)
daily_deaths_filled = daily_deaths.asfreq('D', fill_value=0)
larimer_vac_filled = larimer_vac.asfreq('D', fill_value=0)
larimer_hosp_filled = larimer_hosp.asfreq('D', fill_value=0)


## Quantify missing data

In [315]:
print(daily_cases_filled.isna().sum().sum())
print(daily_deaths_filled .isna().sum().sum())
print(larimer_vac_filled .isna().sum().sum())
print(larimer_hosp_filled.isna().sum().sum())


0
0
20
38


In [316]:
larimer_hosp_filled = larimer_hosp_filled.fillna(0)
larimer_vac_filled = larimer_vac_filled.fillna(0)


In [317]:
print(daily_cases_filled.isna().sum().sum())
print(daily_deaths_filled .isna().sum().sum())
print(larimer_vac_filled .isna().sum().sum())
print(larimer_hosp_filled.isna().sum().sum())


0
0
0
0


In [318]:
display(daily_cases_filled)
display(daily_deaths_filled)
display(larimer_vac_filled)
display(larimer_hosp_filled)


Unnamed: 0_level_0,Unnamed: 0,CaseCount,ReportedDate,Sex,Age,Type,City
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-03-09,1,1,1,1,1,1,1
2020-03-10,0,0,0,0,0,0,0
2020-03-11,0,0,0,0,0,0,0
2020-03-12,0,0,0,0,0,0,0
2020-03-13,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...
2022-02-20,48,48,48,48,48,48,48
2022-02-21,54,54,54,54,54,54,54
2022-02-22,91,91,91,91,90,91,91
2022-02-23,70,70,70,70,70,70,70


Unnamed: 0_level_0,Unnamed: 0,death_id,death_date,age,gender,city,case_status,count
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03-09,1,1,1,1,1,1,1,1
2020-03-10,0,0,0,0,0,0,0,0
2020-03-11,0,0,0,0,0,0,0,0
2020-03-12,0,0,0,0,0,0,0,0
2020-03-13,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...
2022-02-02,1,1,1,1,1,1,1,1
2022-02-03,3,3,3,3,3,3,3,3
2022-02-04,0,0,0,0,0,0,0,0
2022-02-05,1,1,1,1,1,1,1,1


Unnamed: 0_level_0,Unnamed: 0,daily number of doses received by Larimer County residents,total number of doses recevied by residents,daily number of residents receiving first dose,total number of residents receiving first dose,daily number of residents vaccinated,total number of residents vaccinated,daily number of 70+ vaccinated,total number of 70+ vaccinated,daily number of 70+ at least one dose,...,daily number of Latinx residents vaccinated,total of Latinx residents vaccinated,daily number of White non-Latinx residents vaccinated,total of White non-Latinx residents vaccinated,daily number of non-White non-Latinx residents vaccinated,total of non-White non-Latinx residents vaccinated,dailyUnknown,totalUnknown,daily_additional_doses,total_additional_doses
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-14,0,32,32,32,32,1,1,0.0,0,1.0,...,0.0,0,1,1,0.0,0,0.0,0,0,0
2020-12-15,1,13,45,13,45,0,1,0.0,0,0.0,...,0.0,0,0,1,0.0,0,0.0,0,0,0
2020-12-16,2,303,348,303,348,0,1,0.0,0,2.0,...,0.0,0,0,1,0.0,0,0.0,0,0,0
2020-12-17,3,990,1338,990,1338,0,1,0.0,0,11.0,...,0.0,0,0,1,0.0,0,0.0,0,0,0
2020-12-18,4,1056,2394,1056,2394,2,3,0.0,0,15.0,...,0.0,0,2,3,0.0,0,0.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-17,430,163,613701,30,256316,43,235740,1.0,34737,1.0,...,8.0,13995,31,198755,3.0,14310,1.0,8680,92,138221
2022-02-18,431,487,614188,106,256422,110,235850,3.0,34740,2.0,...,15.0,14010,64,198819,20.0,14330,11.0,8691,271,138492
2022-02-19,432,212,614400,26,256448,61,235911,0.0,34740,0.0,...,12.0,14022,41,198860,2.0,14332,6.0,8697,127,138619
2022-02-20,433,67,614467,16,256464,12,235923,0.0,34740,0.0,...,0.0,14022,10,198870,0.0,14332,2.0,8699,40,138659


Unnamed: 0_level_0,admission_count,kpi_admits_indicator,inpatient_count,kpi_patient_indicator,inpatient_count_pct_change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-31,0,0.0,47,0,0.000000
2020-04-01,0,0.0,0,0,0.000000
2020-04-02,0,0.0,0,0,0.000000
2020-04-03,2,0.0,46,0,0.000000
2020-04-04,1,0.0,42,0,0.000000
...,...,...,...,...,...
2022-02-19,0,0.0,0,0,0.000000
2022-02-20,0,0.0,0,0,0.000000
2022-02-21,0,0.0,0,0,0.000000
2022-02-22,4,0.0,32,0,-25.581395


In [319]:
display(len(larimer_vac_filled))
display(len(larimer_hosp_filled))
display(len(daily_cases_filled))
display(len(daily_deaths_filled))


435

695

718

700

In [320]:
# valid_entries = larimer_vac.count()
# total_rows = len(larimer_vac.index)
# missing_data = total_rows - valid_entries
# missing_data


```python
merge_ordered(df1,
              df2,
              fill_method="ffill",
              on='column',
              how='outer'
```

- [x] Experimenting with merging on 'Date' column but it's been put back as an int instead of a datetime so may need to re-type that in all the DFs
- [x] Need to rename the date column in one of the frames so they can all be merged

In [321]:
# daily_cases_filled['Date'] = pd.to_datetime(daily_cases_filled['Date']).dt.tz_localize(None)
# daily_deaths_filled['Date'] = pd.to_datetime(daily_deaths_filled['Date']).dt.tz_localize(None)
# larimer_hosp_filled['Date'] = pd.to_datetime(larimer_hosp_filled['Date']).dt.tz_localize(None)
# larimer_vac_filled['Date'] = pd.to_datetime(larimer_vac_filled['Date']).dt.tz_localize(None)


In [322]:
death_case = pd.merge_ordered(
    daily_deaths_filled,
    daily_cases_filled,
    fill_method=None,
    on='Date',
    how='outer')

death_case


Unnamed: 0,Date,Unnamed: 0_x,death_id,death_date,age,gender,city,case_status,count,Unnamed: 0_y,CaseCount,ReportedDate,Sex,Age,Type,City
0,2020-03-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,1,1,1,1,1,1
1,2020-03-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0
2,2020-03-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0
3,2020-03-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0
4,2020-03-13,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
713,2022-02-20,,,,,,,,,48,48,48,48,48,48,48
714,2022-02-21,,,,,,,,,54,54,54,54,54,54,54
715,2022-02-22,,,,,,,,,91,91,91,91,90,91,91
716,2022-02-23,,,,,,,,,70,70,70,70,70,70,70


In [323]:
death_case_hosp = pd.merge_ordered(
    death_case,
    larimer_hosp_filled,
    fill_method=None,
    on='Date',
    how='outer')

death_case_hosp


Unnamed: 0,Date,Unnamed: 0_x,death_id,death_date,age,gender,city,case_status,count,Unnamed: 0_y,...,ReportedDate,Sex,Age,Type,City,admission_count,kpi_admits_indicator,inpatient_count,kpi_patient_indicator,inpatient_count_pct_change
0,2020-03-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,...,1,1,1,1,1,,,,,
1,2020-03-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0,0,0,0,0,,,,,
2,2020-03-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0,0,0,0,0,,,,,
3,2020-03-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0,0,0,0,0,,,,,
4,2020-03-13,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0,...,0,0,0,0,0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
713,2022-02-20,,,,,,,,,48,...,48,48,48,48,48,0,0.0,0.0,0.0,0.000000
714,2022-02-21,,,,,,,,,54,...,54,54,54,54,54,0,0.0,0.0,0.0,0.000000
715,2022-02-22,,,,,,,,,91,...,91,91,90,91,91,4,0.0,32.0,0.0,-25.581395
716,2022-02-23,,,,,,,,,70,...,70,70,70,70,70,2,0.0,26.0,0.0,-33.333333


In [324]:
combo_df = pd.merge_ordered(
    death_case_hosp,
    larimer_vac_filled,
    fill_method=None,
    on='Date',
    how='outer')

combo_df


Unnamed: 0,Date,Unnamed: 0_x,death_id,death_date,age,gender,city,case_status,count,Unnamed: 0_y,...,daily number of Latinx residents vaccinated,total of Latinx residents vaccinated,daily number of White non-Latinx residents vaccinated,total of White non-Latinx residents vaccinated,daily number of non-White non-Latinx residents vaccinated,total of non-White non-Latinx residents vaccinated,dailyUnknown,totalUnknown,daily_additional_doses,total_additional_doses
0,2020-03-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,...,,,,,,,,,,
1,2020-03-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
2,2020-03-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
3,2020-03-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
4,2020-03-13,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
713,2022-02-20,,,,,,,,,48,...,0.0,14022.0,10.0,198870.0,0.0,14332.0,2.0,8699.0,40.0,138659.0
714,2022-02-21,,,,,,,,,54,...,9.0,14031.0,28.0,198898.0,4.0,14336.0,2.0,8701.0,67.0,138726.0
715,2022-02-22,,,,,,,,,91,...,,,,,,,,,,
716,2022-02-23,,,,,,,,,70,...,,,,,,,,,,


The source data has now accidentally added a few data in 2023 instead of 2022 so now I have to do this to truncate the data frame to "today", which is probably a good idea anyway.
Well they finally fixed this so then this broke the next step if the data for today hadn't been updated yet. 

In [325]:
# today = pd.Timestamp.today().normalize()
# today_index = combo_df.index[combo_df['Date']==today].to_list()
# today_index = today_index[1] 
# print(today_index)
# combo_df = combo_df.truncate(before=None, after=today_index)

In [326]:
for col in combo_df.columns:
    print(col)


Date
Unnamed: 0_x
death_id
death_date
age
gender
city
case_status
count
Unnamed: 0_y
CaseCount
ReportedDate
Sex
Age
Type
City
admission_count
kpi_admits_indicator
inpatient_count
kpi_patient_indicator
inpatient_count_pct_change
Unnamed: 0
daily number of doses received by Larimer County residents
total number of doses recevied by residents
daily number of residents receiving first dose
total number of residents receiving first dose
daily number of residents vaccinated
total number of residents vaccinated
daily number of 70+ vaccinated
total number of 70+ vaccinated
daily number of 70+ at least one dose
total number of 70+ at least one dose
daily number of Latinx residents vaccinated
total of Latinx residents vaccinated
daily number of White non-Latinx residents vaccinated
total of White non-Latinx residents vaccinated
daily number of non-White non-Latinx residents vaccinated
total of non-White non-Latinx residents vaccinated
dailyUnknown
totalUnknown
daily_additional_doses
total_additi

In [327]:
combo_df.rename(columns={'count': 'Daily Death Count',
                         'daily number of doses received by Larimer County residents': 'Daily doses',
                         'CaseCount': 'Daily Cases',
                         'admission_count': 'Daily Hospitalizations',
                         'inpatient_count': 'Total Patient Count'
                         }, inplace=True)


In [328]:
combo_df[['Date', 'Daily doses', 'Daily Cases',
          'Daily Hospitalizations', 'Total Patient Count', 'Daily Death Count']]


Unnamed: 0,Date,Daily doses,Daily Cases,Daily Hospitalizations,Total Patient Count,Daily Death Count
0,2020-03-09,,1,,,1.0
1,2020-03-10,,0,,,0.0
2,2020-03-11,,0,,,0.0
3,2020-03-12,,0,,,0.0
4,2020-03-13,,0,,,1.0
...,...,...,...,...,...,...
713,2022-02-20,67.0,48,0,0.0,
714,2022-02-21,134.0,54,0,0.0,
715,2022-02-22,,91,4,32.0,
716,2022-02-23,,70,2,26.0,


In [329]:
print(combo_df.isna().sum().sum())


6202


In [330]:
combo_df = combo_df.fillna(0)
print(combo_df.isna().sum().sum())


0


In [331]:
combo_df[['Date', 'Daily doses', 'Daily Cases',
          'Daily Hospitalizations', 'Daily Death Count']]


Unnamed: 0,Date,Daily doses,Daily Cases,Daily Hospitalizations,Daily Death Count
0,2020-03-09,0.0,1,0,1.0
1,2020-03-10,0.0,0,0,0.0
2,2020-03-11,0.0,0,0,0.0
3,2020-03-12,0.0,0,0,0.0
4,2020-03-13,0.0,0,0,1.0
...,...,...,...,...,...
713,2022-02-20,67.0,48,0,0.0
714,2022-02-21,134.0,54,0,0.0
715,2022-02-22,0.0,91,4,0.0
716,2022-02-23,0.0,70,2,0.0


In [332]:
combo_df[['7-day avg doses',
          '7-day avg Cases',
          '7-day avg Hospitalizations']] = combo_df[['Daily doses',
                                                     'Daily Cases',
                                                     'Daily Hospitalizations']].rolling(7).mean().round(0)

combo_df[['7-Day avg Deaths']] = combo_df[['Daily Death Count']
                                          ].rolling(7).mean().round(3)

combo_df[['7-Day avg Total Patient Count']] = combo_df[['Total Patient Count']
                                          ].rolling(7).mean().round(0)                                          


combo_df[['Date',
          'Daily doses',
          '7-day avg doses',
          'Daily Cases',
          '7-day avg Cases',
          'Daily Hospitalizations',
          '7-day avg Hospitalizations',
          'Daily Death Count',
          '7-Day avg Deaths',
          'Total Patient Count',
          '7-Day avg Total Patient Count']]


Unnamed: 0,Date,Daily doses,7-day avg doses,Daily Cases,7-day avg Cases,Daily Hospitalizations,7-day avg Hospitalizations,Daily Death Count,7-Day avg Deaths,Total Patient Count,7-Day avg Total Patient Count
0,2020-03-09,0.0,,1,,0,,1.0,,0.0,
1,2020-03-10,0.0,,0,,0,,0.0,,0.0,
2,2020-03-11,0.0,,0,,0,,0.0,,0.0,
3,2020-03-12,0.0,,0,,0,,0.0,,0.0,
4,2020-03-13,0.0,,0,,0,,1.0,,0.0,
...,...,...,...,...,...,...,...,...,...,...,...
713,2022-02-20,67.0,242.0,48,103.0,0,3.0,0.0,0.0,0.0,28.0
714,2022-02-21,134.0,227.0,54,95.0,0,3.0,0.0,0.0,0.0,22.0
715,2022-02-22,0.0,184.0,91,82.0,4,3.0,0.0,0.0,32.0,21.0
716,2022-02-23,0.0,152.0,70,74.0,2,2.0,0.0,0.0,26.0,19.0


In [333]:
print(combo_df.isna().sum().sum())


30


In [334]:
combo_df = combo_df.fillna(0)
print(combo_df.isna().sum().sum())


0


In [335]:
# Make backup of final combo df
combo_df.to_csv('combo_df_backup.csv')
combo_df


Unnamed: 0,Date,Unnamed: 0_x,death_id,death_date,age,gender,city,case_status,Daily Death Count,Unnamed: 0_y,...,total of non-White non-Latinx residents vaccinated,dailyUnknown,totalUnknown,daily_additional_doses,total_additional_doses,7-day avg doses,7-day avg Cases,7-day avg Hospitalizations,7-Day avg Deaths,7-Day avg Total Patient Count
0,2020-03-09,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-03-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-03-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-03-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-03-13,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
713,2022-02-20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48,...,14332.0,2.0,8699.0,40.0,138659.0,242.0,103.0,3.0,0.0,28.0
714,2022-02-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,54,...,14336.0,2.0,8701.0,67.0,138726.0,227.0,95.0,3.0,0.0,22.0
715,2022-02-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,91,...,0.0,0.0,0.0,0.0,0.0,184.0,82.0,3.0,0.0,21.0
716,2022-02-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,70,...,0.0,0.0,0.0,0.0,0.0,152.0,74.0,2.0,0.0,19.0


In [336]:
combo_df[['Date',
          'Daily doses',
          '7-day avg doses',
          'Daily Cases',
          '7-day avg Cases',
          'Daily Hospitalizations',
          '7-day avg Hospitalizations',
          'Daily Death Count',
          '7-Day avg Deaths',
          'Total Patient Count']]


Unnamed: 0,Date,Daily doses,7-day avg doses,Daily Cases,7-day avg Cases,Daily Hospitalizations,7-day avg Hospitalizations,Daily Death Count,7-Day avg Deaths,Total Patient Count
0,2020-03-09,0.0,0.0,1,0.0,0,0.0,1.0,0.0,0.0
1,2020-03-10,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0
2,2020-03-11,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0
3,2020-03-12,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0
4,2020-03-13,0.0,0.0,0,0.0,0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
713,2022-02-20,67.0,242.0,48,103.0,0,3.0,0.0,0.0,0.0
714,2022-02-21,134.0,227.0,54,95.0,0,3.0,0.0,0.0,0.0
715,2022-02-22,0.0,184.0,91,82.0,4,3.0,0.0,0.0,32.0
716,2022-02-23,0.0,152.0,70,74.0,2,2.0,0.0,0.0,26.0


## Visualize

### Hover example

```python
hover = HoverTool()
hover.tooltips = [
    ("Totals", "@TONS_HE High Explosive / @TONS_IC Incendiary / @TONS_FRAG Fragmentation")]

hover.mode = 'vline'

p.add_tools(hover)

show(p)
```

### Color mapper

```python
#Use the field name of the column source
mapper = linear_cmap(field_name='y', palette=Spectral6 ,low=min(y) ,high=max(y))

source = ColumnDataSource(dict(x=x,y=y))

p = figure(width=300, height=300, title="Linear Color Map Based on Y")

p.circle(x='x', y='y', line_color=mapper,color=mapper, fill_alpha=1, size=12, source=source)

color_bar = ColorBar(color_mapper=mapper['transform'], width=8)

p.add_layout(color_bar, 'right')

```


### Saving Pattern

```python
output_notebook()

plot = figure(plot_width=600, plot_height=600, tools='pan,box_zoom,reset')

plot.square(x=[1, 2, 4, 8, 10], y=[6, 2, 18, 4, 9], size=20)


show(plot)
output_file('bokeh_save.html')
save(plot)
reset_output()
```

In [337]:
# Print the color palette we're using then flip it over turn it and reverse it and make sure it worked
print(RdBu[11])
BuRd = RdBu[11][::-1]
print(BuRd)


('#053061', '#2166ac', '#4393c3', '#92c5de', '#d1e5f0', '#f7f7f7', '#fddbc7', '#f4a582', '#d6604d', '#b2182b', '#67001f')
('#67001f', '#b2182b', '#d6604d', '#f4a582', '#fddbc7', '#f7f7f7', '#d1e5f0', '#92c5de', '#4393c3', '#2166ac', '#053061')


In [338]:
# Get and initialize the html template we are using to save our figures
env = Environment(loader=FileSystemLoader('templates'))
fig_template = env.get_template('fig_template.html')


In [339]:
# Set some annotations variables that can be used in all plots

def add_annos(p):
    vac_start = datetime.date(2020, 12, 14)
    vac_line = Span(location=vac_start,
            dimension='height',
            line_dash='dashed')
    vac_label = Label(x=vac_start, 
                text_font_size='11px',
                y=450, 
                x_offset=5, 
                y_offset=5, 
                y_units='screen', 
                text='Vaccines\nAvailable',
                name='vac_label')
    delta_start = datetime.date(2021, 6, 1)               
    delta_end = datetime.date(2021, 12, 1)
    delta_box = BoxAnnotation(left=delta_start,
                        right=delta_end,
                        fill_color="red",
                        fill_alpha=0.2,
                        bottom=0,
                        bottom_units='data')
    delta_label = Label(x=delta_start,
                y=420,
                text_font_size='11px',
                x_offset=5,                         
                y_offset=5, 
                y_units='screen', 
                text='Delta',
                name='delta_label')                     
    omi_start = datetime.date(2021, 12, 1)               
    omi_end = date.today()
    omi_box = BoxAnnotation(left=omi_start,
                    right=omi_end,
                    bottom=0,
                    fill_color='#0072B2',
                    bottom_units='data')
    omi_label = Label(x=omi_start,
                y=400,
                text_font_size='11px',
                x_offset=5,                         
                y_offset=5, 
                y_units='screen', 
                text='Omicron',
                name='omi_label')    
    layouts = [vac_line, vac_label, delta_box, delta_label, omi_box, omi_label]
    
    for l in layouts:
      p.add_layout(l)                              

In [365]:
output_notebook()

lar_vac_data = ColumnDataSource(combo_df)


TOOLS = 'pan, wheel_zoom, box_zoom, reset, save'

TOOLTIPS = [('Date', '@Date{ %F}'),
            ('Daily Doses', '@{Daily doses}'),
            ('7-day avg doses', '@{7-day avg doses}')]


mapper = linear_cmap(field_name='Daily doses',
                     palette=BuRd, low=(None), high=(None))

daily_vac_figure_combo = figure(title='Daily Vaccinations',
                                x_axis_type="datetime",
                                height=550,
                                sizing_mode='stretch_width',
                                tools=TOOLS)


daily_vac_figure_combo.vbar(x='Date',
                            top='Daily doses',
                            width=datetime.timedelta(days=0.5),
                            alpha=0.6,
                            color=mapper,
                            legend_label='Daily Doses',
                            source=lar_vac_data)

line = daily_vac_figure_combo.line(x='Date',
                                   y='7-day avg doses',
                                   line_width=2,
                                   color='#000000',
                                   legend_label='7-Day avg Doses',
                                   source=lar_vac_data)

add_annos(daily_vac_figure_combo)

daily_vac_figure_combo.legend.location = 'top_left'
# daily_vac_figure_combo.add_layout(vac_line)
# daily_vac_figure_combo.add_layout(delta_box)
# daily_vac_figure_combo.add_layout(omi_box)
# daily_vac_figure_combo.add_layout(vac_label)
# daily_vac_figure_combo.add_layout(delta_label)

vac_hover = HoverTool()
vac_hover.tooltips = TOOLTIPS
vac_hover.mode = 'vline'
vac_hover.formatters = {'@Date': 'datetime'}
vac_hover.renderers = [line]

daily_vac_figure_combo.add_tools(vac_hover)

#daily_vac_figure_combo.add_tools(HoverTool(tooltips=[("Date", "@x"),('Doses','@top')]))


In [366]:
show(daily_vac_figure_combo)
save(daily_vac_figure_combo, title='Vaccination Plot',
     filename='larimer_fig_docs/vac_mod.html', template=fig_template)
reset_output()


This was my attempt at a js soclution

```python
js, vac_tag = autoload_static(daily_vac_figure_combo, CDN, "fig_embeds/vac.js") # path doesn't matter; won't be using
    
file = open('fig_embeds/vac.js', 'w') # filename doesn't matter either, use whatever you like
file.write(js)
file.close()
print(vac_tag)
```

In [342]:

output_notebook()

lar_vac_data = ColumnDataSource(combo_df)


TOOLS = 'pan, wheel_zoom, box_zoom, reset, save'

TOOLTIPS = [('Date', '@Date{ %F}'),
            ('Daily Cases', '@{Daily Cases}'),
            ('7-day avg Cases', '@{7-day avg Cases}')]


mapper = linear_cmap(field_name='Daily Cases', palette=list(
    reversed(cc.fire)), low=(None), high=(None))

daily_case_figure_combo = figure(title='Daily Cases',
                                 x_axis_type="datetime",
                                 height=550,
                                 sizing_mode='stretch_width',
                                 tools=TOOLS)


daily_case_figure_combo.vbar(x='Date',
                             top='Daily Cases',
                             width=datetime.timedelta(days=0.5),
                             alpha=0.8,
                             color=mapper,
                             source=lar_vac_data)

                            
line = daily_case_figure_combo.line(x='Date',
                                    y='7-day avg Cases',
                                    line_width=2,
                                    color='#000000',
                                    source=lar_vac_data)


add_annos(daily_case_figure_combo)

case_hover = HoverTool()
case_hover.tooltips = TOOLTIPS
case_hover.mode = 'vline'
case_hover.formatters = {'@Date': 'datetime'}
case_hover.renderers = [line]

daily_case_figure_combo.add_tools(case_hover)


In [343]:
show(daily_case_figure_combo)
save(daily_case_figure_combo, title='Daily Case Plot',
     filename='larimer_fig_docs/case_mod.html', template=fig_template)
reset_output()


In [344]:
output_notebook()

lar_vac_data = ColumnDataSource(combo_df)

TOOLS = 'pan, wheel_zoom, box_zoom, reset, save'

TOOLTIPS = [('Date', '@Date{ %F}'),
            ('Daily Hospitalizations', '@{Daily Hospitalizations}'),
            ('7-day avg Hospitalizations', '@{7-day avg Hospitalizations}')]


mapper = linear_cmap(field_name='Daily Hospitalizations',
                     palette=list(reversed(cc.fire)), low=(None), high=(None))

daily_hosp_figure_combo = figure(title='Daily Hospitalizations',
                                 x_axis_type="datetime",
                                 height=550,
                                 sizing_mode='stretch_width',
                                 tools=TOOLS)


daily_hosp_figure_combo.vbar(x='Date',
                             top='Daily Hospitalizations',
                             width=datetime.timedelta(days=0.5),
                             alpha=0.5,
                             color=mapper,
                             source=lar_vac_data)


line = daily_hosp_figure_combo.line(x='Date',
                                    y='7-day avg Hospitalizations',
                                    line_width=2,
                                    color='#000000',
                                    source=lar_vac_data)

add_annos(daily_hosp_figure_combo)


hosp_hover = HoverTool()

hosp_hover.tooltips = TOOLTIPS

hosp_hover.mode = 'vline'

hosp_hover.formatters = {'@Date': 'datetime'}

hosp_hover.renderers = [line]

daily_hosp_figure_combo.add_tools(hosp_hover)


In [345]:
show(daily_hosp_figure_combo)
save(daily_hosp_figure_combo, title='Hospitalization Plot',
     filename='larimer_fig_docs/hosp_mod.html', template=fig_template)
reset_output()


In [346]:
output_notebook()

lar_vac_data = ColumnDataSource(combo_df)


TOOLS = 'pan, wheel_zoom, box_zoom, reset, save'

TOOLTIPS = [('Date', '@Date{ %F}'),
            ('7-Day avg Total Patient Count', '@{7-Day avg Total Patient Count}')]


mapper = linear_cmap(field_name='7-Day avg Total Patient Count', palette=list(
    reversed(cc.fire)), low=(None), high=(None))

total_patient_figure = figure(title='7-Day avg Total Patient Count',
                              x_axis_type="datetime",
                              height=550,
                              sizing_mode='stretch_width',
                              tools=TOOLS)


total_patient_figure.vbar(x='Date',
                          top='Total Patient Count',
                          width=datetime.timedelta(days=0.5),
                          alpha=0.5,
                          color=mapper,
                          source=lar_vac_data)

pat_line = total_patient_figure.line(x='Date',
                                     y='7-Day avg Total Patient Count',
                                     alpha=0.9,
                                     line_width=2,
                                     color='#000000',
                                     source=lar_vac_data)

add_annos(total_patient_figure)

patient_hover = HoverTool()

patient_hover.tooltips = TOOLTIPS

patient_hover.mode = 'vline'

patient_hover.formatters = {'@Date': 'datetime'}

patient_hover.renderers = [pat_line]

total_patient_figure.add_tools(patient_hover)


In [347]:
show(total_patient_figure)
save(total_patient_figure, title='Patient Plot',
     filename='larimer_fig_docs/patient_mod.html', template=fig_template)
reset_output()


In [348]:
output_notebook()

lar_vac_data = ColumnDataSource(combo_df)

reset_output()
output_notebook()

TOOLS = 'pan, wheel_zoom, box_zoom, reset, save'

TOOLTIPS = [('Date', '@Date{ %F}'),
            ('Daily Deaths', '@{Daily Death Count}'),
            ('7-day avg Deaths', '@{7-Day avg Deaths}')]


mapper = linear_cmap(field_name='Daily Death Count', palette=list(
    reversed(cc.fire)), low=(None), high=(None))

daily_death_figure_combo = figure(title='Daily Deaths',
                                  x_axis_type="datetime",
                                  height=550,
                                  sizing_mode='stretch_width',
                                  tools=TOOLS)


daily_death_figure_combo.vbar(x='Date',
                              top='Daily Death Count',
                              width=datetime.timedelta(days=0.5),
                              alpha=0.8,
                              color=mapper,
                              source=lar_vac_data)


line = daily_death_figure_combo.line(x='Date',
                                     y='7-Day avg Deaths',
                                     line_width=2,
                                     color='#000000',
                                     source=lar_vac_data)

add_annos(daily_death_figure_combo)

death_hover = HoverTool()

death_hover.tooltips = TOOLTIPS

death_hover.mode = 'vline'

death_hover.formatters = {'@Date': 'datetime'}

death_hover.renderers = [line]

daily_death_figure_combo.add_tools(death_hover)


In [349]:
show(daily_death_figure_combo)
save(daily_death_figure_combo, title='Death Plot',
     filename='larimer_fig_docs/death_mod.html', template=fig_template)
reset_output()


- [ ] Get overall plot layout


## Combo Figure layout

Trying ```gridplot```

### Layout example

```python
show(row(column(fig1, fig2), column(fig3)))
```

In [371]:
output_notebook()

lar_vac_data_g = ColumnDataSource(combo_df)


TOOLS = 'pan, wheel_zoom, box_zoom, reset, hover, save'

TOOLTIPS = [('Date', '@Date{ %F}'),
            ('Daily Doses', '@{Daily doses}'),
            ('7-day avg doses', '@{7-day avg doses}'),
            ('Daily Cases', '@{Daily Cases}'),
            ('7-day avg Cases', '@{7-day avg Cases}'),
            ('Daily Hospitalizations', '@{Daily Hospitalizations}'),
            ('7-day avg Hospitalizations', '@{7-day avg Hospitalizations}'),
            ('Total Patient Count', '@{Total Patient Count}'),
            ('Daily Deaths', '@{Daily Death Count}'),
            ('7-day avg Deaths', '@{7-Day avg Deaths}')]


blue_palette= BuRd
red_palette = list(reversed(cc.fire))

daily_vac_figure_grid = figure(title='Daily Vaccinations',
                               x_axis_type="datetime",
                               tools=TOOLS)

daily_vac_figure_grid.vbar(x='Date',
                           top='Daily doses',
                           width=datetime.timedelta(days=0.5),
                           alpha=0.3,
                           color=linear_cmap(field_name='Daily doses', 
                                             palette=blue_palette, 
                                             low=(None), 
                                             high=(None)),
                           source=lar_vac_data_g)

v_line = daily_vac_figure_grid.line(x='Date',
                                  y='7-day avg doses',
                                  line_width=2,
                                  color='#000000',
                                  source=lar_vac_data_g)

daily_case_figure_grid = figure(title='Daily Cases',
                                x_axis_type="datetime",
                                tools=TOOLS)

daily_case_figure_grid.vbar(x='Date',
                            top='Daily Cases',
                            width=datetime.timedelta(days=0.5),
                            alpha=0.8,
                            color=linear_cmap(field_name='Daily Cases', 
                                              palette=red_palette, 
                                              low=(None), 
                                              high=(None)),
                            source=lar_vac_data_g)

c_line = daily_case_figure_grid.line(x='Date',
                                   y='7-day avg Cases',
                                   line_width=2,
                                   color='#000000',
                                   source=lar_vac_data_g)

daily_hosp_figure_grid = figure(title='Daily Hospitalizations',
                                x_axis_type="datetime",
                                tools=TOOLS)


daily_hosp_figure_grid.vbar(x='Date',
                            top='Daily Hospitalizations',
                            width=datetime.timedelta(days=0.5),
                            alpha=0.5,
                            color=linear_cmap(field_name='Daily Hospitalizations', 
                                              palette=red_palette,
                                              low=(None), 
                                              high=(None)),
                            source=lar_vac_data_g)

h_line = daily_hosp_figure_grid.line(x='Date',
                                   y='7-day avg Hospitalizations',
                                   line_width=2,
                                   color='#000000',
                                   source=lar_vac_data_g)

total_patient_figure_grid = figure(title='Total Patient Count',
                                   x_axis_type="datetime",
                                   tools=TOOLS)

total_patient_figure_grid.vbar(x='Date',
                               top='Total Patient Count',
                               width=datetime.timedelta(days=0.5),
                               alpha=0.5,
                               color=linear_cmap(field_name='Total Patient Count', 
                                                 palette=red_palette, 
                                                 low=(None), 
                                                 high=(None)),
                               source=lar_vac_data_g)

pat_line = total_patient_figure_grid.line(x='Date',
                                          y='7-Day avg Total Patient Count',
                                          line_width=2,
                                          color='#000000',
                                          source=lar_vac_data_g)

daily_death_figure_grid = figure(title='Daily Deaths',
                                 x_axis_type="datetime",
                                 tools=TOOLS)

daily_death_figure_grid.vbar(x='Date',
                             top='Daily Death Count',
                             width=datetime.timedelta(days=0.5),
                             alpha=0.8,
                             color=linear_cmap(field_name='Daily Death Count', 
                                               palette=red_palette, 
                                               low=(None), 
                                               high=(None)),
                             source=lar_vac_data_g)

d_line = daily_death_figure_grid.line(x='Date',
                                    y='7-Day avg Deaths',
                                    line_width=2,
                                    color='#000000',
                                    source=lar_vac_data_g)

for figs in [daily_vac_figure_grid, daily_case_figure_grid, daily_death_figure_grid, daily_hosp_figure_grid, total_patient_figure_grid]:
    add_annos(figs)
    figs.select(name='vac_label').text = ''
    figs.select(name='delta_label').text = ''
    figs.select(name='omi_label').text = ''

    



hover_vac = daily_vac_figure_grid.select_one(HoverTool)
hover_case = daily_case_figure_grid.select_one(HoverTool)
hover_hosp = daily_hosp_figure_grid.select_one(HoverTool)
hover_pat = total_patient_figure_grid.select_one(HoverTool)
hover_death = daily_death_figure_grid.select_one(HoverTool)

hover_vac.tooltips = TOOLTIPS
hover_case.tooltips = TOOLTIPS
hover_hosp.tooltips = TOOLTIPS
hover_pat.tooltips = TOOLTIPS
hover_death.tooltips = TOOLTIPS

hover_vac.formatters = {'@Date': 'datetime'}
hover_case.formatters = {'@Date': 'datetime'} 
hover_hosp.formatters = {'@Date': 'datetime'} 
hover_pat.formatters = {'@Date': 'datetime'} 
hover_death.formatters = {'@Date': 'datetime'}

hover_vac.renderers = [v_line]
hover_case.renderers = [c_line] 
hover_hosp.renderers = [h_line] 
hover_pat.renderers = [pat_line] 
hover_death.renderers = [d_line] 

total_patient_figure_grid.x_range = daily_case_figure_grid.x_range 
daily_hosp_figure_grid.x_range = daily_case_figure_grid.x_range
daily_vac_figure_grid.x_range = daily_case_figure_grid.x_range 
daily_death_figure_grid.x_range = daily_case_figure_grid.x_range


grid_fig = gridplot(
    children=[daily_case_figure_grid, total_patient_figure_grid, daily_hosp_figure_grid,
              daily_vac_figure_grid, daily_death_figure_grid],
    ncols=2,
    height=400,
    sizing_mode='scale_width',
)

show(grid_fig)
save(grid_fig, title='Death Plot',
     filename='larimer_fig_docs/grid_fig.html', template=fig_template)
reset_output()

## Overlay Viz with Muted Legend Lines

<details>
    <summary>This is what I was doing first before I realized I could iterate to create plots</summary>
    
```python
v_line = muted_overlay_fig.line(x='Date',
                                y='7-day avg doses',
                                line_width=2,
                                source=lar_vac_data_g)


c_line = muted_overlay_fig.line(x='Date',
                                y='7-day avg Cases',
                                line_width=2,
                                source=lar_vac_data_g)


h_line = muted_overlay_fig.line(x='Date',
                                y='7-day avg Hospitalizations',
                                line_width=2,
                                source=lar_vac_data_g)


pat_line = muted_overlay_fig.line(x='Date',
                                  y='7-Day avg Total Patient Count',
                                  line_width=2,
                                  source=lar_vac_data_g)


d_line = muted_overlay_fig.line(x='Date',
                                y='7-Day avg Deaths',
                                line_width=1,
                                color='#000000',
                                source=lar_vac_data_g,
                                y_range_name="low")
```
</details>

In [373]:
output_notebook()

palette = Spectral4
colors = itertools.cycle(palette)

lar_vac_data_g = ColumnDataSource(combo_df)


TOOLS = 'pan, wheel_zoom, box_zoom, reset, hover, save'


TOOLTIPS = [
    ('Date', '@Date{ %F}'),
    # ('7-day avg doses', '@{7-day avg doses}'),
    ('7-day avg Cases', '@{7-day avg Cases}'),
    ('7-day avg Hospitalizations', '@{7-day avg Hospitalizations}'),
    ('7-Day avg Total Patient Count', '@{7-Day avg Total Patient Count}'),
    ('7-day avg Deaths', '@{7-Day avg Deaths}')
]


muted_overlay_fig = figure(title='All 7-Day Averages with interactive legend',
                           x_axis_type="datetime",
                           height=550,
                           sizing_mode='stretch_width',
                           tools=TOOLS)

muted_overlay_fig.yaxis.visible = False

for y, color, y_range in zip(['7-day avg Cases', '7-day avg Hospitalizations', '7-Day avg Total Patient Count', '7-Day avg Deaths'], Spectral4, ['high', 'low', 'high', 'low']):
    muted_overlay_fig.line(x='Date', y=y, line_width=2, color=color,
                           legend_label=y, y_range_name=y_range, source=lar_vac_data_g)

muted_overlay_fig.extra_y_ranges = {"low": Range1d(start=0, end=15),
                                    "high": Range1d(start=0, end=800)}
muted_overlay_fig.add_layout(LinearAxis(
    y_range_name="low", axis_label='Deaths/Hospitalizations'), 'left')
muted_overlay_fig.add_layout(LinearAxis(
    y_range_name="high", axis_label='Cases/Patients'), 'left')


add_annos(muted_overlay_fig)


muted_overlay_fig.legend.location = "top_left"
muted_overlay_fig.legend.click_policy = "mute"
muted_overlay_fig.select(type=BoxAnnotation).y_range_name = 'high'
muted_overlay_fig.select(type=BoxAnnotation).y_range_name = 'high'

hover_overlay = muted_overlay_fig.select_one(HoverTool)
hover_overlay.tooltips = TOOLTIPS
hover_overlay.formatters = {'@Date': 'datetime'}
hover_overlay.mode = 'mouse'


In [374]:

show(muted_overlay_fig)
save(muted_overlay_fig, title='Overlay',
     filename='larimer_fig_docs/all_avgs.html', template=fig_template)
reset_output()


  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")


## **Bookmark, TODO, Notes**

### TODO

- [x] Make dataframes
- [x] Convert datetimes
- [x] rename date columns
- [x] Find Nans
- [x] Re-freq and fill blanks
- [x] Turn logs into daily totals
- [x] Save backup CSVs
- [x] Combine dataframe
- [x] Make plots
- [x] Figure out hovertips
- [x] Refactor saving pattern
 
 ### **Notes**
Sample for iterating through different offsets 
```python
df["Input"].corr(df["Output"].shift(-1), method = 'pearson', min_periods = 1) #1
```
and more iteration 
```python
 xcov_monthly = [crosscorr(datax, datay, lag=i) for i in range(12)]
```
from [here](https://stackoverflow.com/questions/33171413/cross-correlation-time-lag-correlation-with-pandas)


In [None]:
def crosscorr(datax, datay, lag=0):
    """ Lag-N cross correlation. 
    Parameters
    ----------
    lag : int, default 0
    datax, datay : pandas.Series objects of equal length

    Returns
    ----------
    crosscorr : float
    """
    return datax.corr(datay.shift(lag)).round(3)


In [None]:
print(combo_df['Daily Cases'].dtypes)
print(combo_df['Daily Death Count'].dtypes)
print(combo_df['Daily Hospitalizations'].dtypes)
print(combo_df['7-day avg Cases'].dtypes)
print(combo_df['7-Day avg Deaths'].dtypes)
print(combo_df['7-day avg Hospitalizations'].dtypes)

In [372]:
num_days = (15)

daily_case_death_lag = [crosscorr(combo_df['Daily Cases'].astype(float), combo_df['Daily Death Count'], lag=i) for i in range(num_days)]
daily_hosp_death_lag = [crosscorr(combo_df['Daily Hospitalizations'].astype(float), combo_df['Daily Death Count'], lag=i) for i in range(num_days)]

case_hosp_lag = [crosscorr(combo_df['7-day avg Cases'], combo_df['7-day avg Hospitalizations'], lag=i) for i in range(num_days)]
case_death_lag = [crosscorr(combo_df['7-day avg Cases'], combo_df['7-Day avg Deaths'], lag=i) for i in range(num_days)]
hosp_death_lag = [crosscorr(combo_df['7-day avg Hospitalizations'], combo_df['7-Day avg Deaths'], lag=i) for i in range(num_days)]

data = {"Offset Days": [l for l in range(num_days)],
        "Daily cases to  deaths": daily_case_death_lag,
        "Daily Hosp to  deaths" : daily_hosp_death_lag,
        "Rolling cases to  Hosp": case_hosp_lag,
        "Rolling cases to  deaths": case_death_lag,
        "Rolling Hosp to  deaths":  hosp_death_lag
}

lag_corr_df = pd.DataFrame(data)

lag_corr_df


Unnamed: 0,Offset Days,Daily cases to deaths,Daily Hosp to deaths,Rolling cases to Hosp,Rolling cases to deaths,Rolling Hosp to deaths
0,0,0.292,0.268,0.629,0.443,0.639
1,1,0.305,0.299,0.618,0.432,0.634
2,2,0.271,0.292,0.606,0.42,0.626
3,3,0.266,0.237,0.592,0.407,0.621
4,4,0.245,0.211,0.578,0.393,0.613
5,5,0.206,0.188,0.562,0.379,0.602
6,6,0.226,0.277,0.547,0.365,0.594
7,7,0.232,0.261,0.532,0.352,0.586
8,8,0.232,0.269,0.518,0.338,0.575
9,9,0.207,0.286,0.503,0.324,0.56


In [355]:
output_notebook()

palette = Spectral4
colors = itertools.cycle(palette)

lar_vac_data_g = ColumnDataSource(combo_df)


TOOLS = 'pan, wheel_zoom, box_zoom, reset, save'


# TOOLTIPS = [('7-day avg Cases', '@{7-day avg Cases}'),
#             ('7-day avg Hospitalizations', '@{7-day avg Hospitalizations}'),
#             ('7-day avg Deaths', '@{7-Day avg Deaths}')]


scatter_fig = figure(title='Rolling scatters',
                     tools=TOOLS)
                           
for x, y, color, in zip(['7-day avg Cases', '7-day avg Cases','7-day avg Hospitalizations'],
                        ['7-day avg Hospitalizations', '7-Day avg Deaths', '7-Day avg Deaths'],
                        Spectral4):
     scatter_fig.scatter(x=x, y=y, color=color, legend_label=x+":"+y, source=lar_vac_data_g)

scatter_fig2 = figure(title='Hospitalizations v Deaths',
                     tools=TOOLS)

scatter_fig2.scatter(x='7-day avg Hospitalizations', 
                     y='7-Day avg Deaths',
                     color=Spectral4[2], 
                     source=lar_vac_data_g)

scatter_grid = gridplot(
     children=[scatter_fig, scatter_fig2],
     ncols=2,
     height=400,
     sizing_mode='scale_width',
     merge_tools=False
     )

show(scatter_grid)
#save(muted_overlay_fig, title='Overlay',
     #filename='larimer_fig_docs/all_avgs.html', template=fig_template)
reset_output()