In [None]:
import altair as alt
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

alt.data_transformers.disable_max_rows()

def days_between(d1, d2):
    if (isinstance(d1, str) and isinstance(d2, str)):
        d1 = datetime.strptime(d1, "%m-%d-%Y")
        d2 = datetime.strptime(d2, "%m-%d-%Y")
        return (d2 - d1).days
    return None;

In [None]:
total_cases_df       = pd.read_csv("../91-DIVOC/pages/covid-visualization/jhu-data.csv",index_col=0)
empty_rows           = total_cases_df[(total_cases_df["Confirmed"]==0)&(total_cases_df["Recovered"]==0)&(total_cases_df["Active"]==0)&(total_cases_df["Deaths"]==0)].index
total_cases_df       = total_cases_df.drop(empty_rows, axis=0)

# Tabulate the top K countries with the highest total number of Confirmed cases
K              = 20  
topKcountries  = list(total_cases_df.groupby("Country_Region").sum()["Confirmed"].nlargest(20).index)
cases_df       = total_cases_df[total_cases_df["Country_Region"].isin(topKcountries)]

# Compute days since N = 50 confirmed cases; drop those with no N confirmed cases
N              = 50
days_since_N                   = cases_df[cases_df["Confirmed"]>N].groupby("Country_Region")["Date"].min().to_dict()
cases_df['Date N Confirmed']   = cases_df.apply  (lambda x: days_since_N.get(x['Country_Region']),axis=1)
cases_df                       = cases_df.dropna (subset=['Date N Confirmed'])
cases_df['Days_from_N']        = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date"]), axis = 1)
cases_df

In [None]:
# Adding in information about lockdowns & possibly drop rows without lockdowns

quarantine_df   = pd.read_csv("quarantine-activity.csv",index_col=0).set_index('Country_Region')
full_lockdown   = quarantine_df[quarantine_df['Lockdown Type'] == 'Full'].groupby("Country_Region")['Date Enacted'].min().to_dict()

# Add date of full lockdown
cases_df['Date of Full Lockdown']     = cases_df.apply  (lambda x: full_lockdown.get(x['Country_Region']),axis=1)
cases_df['lockdown_day_from_N']    = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date of Full Lockdown"]), axis = 1)

# Drop rows for those without a full lockdown 
cases_df                              = cases_df.dropna (subset=['Date of Full Lockdown'])

cases_df

In [None]:
# Logarithmic Axis (Stephen's prettier version)
chart_df = cases_df.loc[(cases_df.Days_from_N >= 0) * (cases_df.Days_from_N <= 32)]

nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Days_from_N'], empty='none')

legend_selection = alt.selection_multi(fields=['Country_Region'], bind='legend')


shared_encodings = dict(
    x=alt.X("Days_from_N:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("Confirmed:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    tooltip=["Country_Region"],
    opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.2)),
)

chart = alt.Chart(chart_df, width=1000, height=500)
lockdown = alt.Chart(lockdown_df, width=1000, height=500)
lines = chart.mark_line(size=3).encode(
    **shared_encodings
)
points = chart.mark_point(size=90, filled=True).encode(
    **shared_encodings
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = chart.mark_point().encode(
    x='Days_from_N:Q',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# Draw points on the line, and highlight based on selection
tooltip_points = lines.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
tooltip_text = lines.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'tooltip_text:N', alt.value(' '))
).transform_calculate(
    tooltip_text='datum.Country_Region + ": " + datum.Confirmed'
)
# Draw a rule at the location of the selection
tooltip_rules = chart.mark_rule(color='gray').encode(
    x='Days_from_N:Q',
).transform_filter(
    nearest
)
lockdown_rules = chart.mark_rule(strokeDash=[2,1]).encode(
        x=alt.X("Days_from_N:Q"),
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.2)),
#         opacity=alt.condition('datum.Days_from_N == datum.lockdown_day_from_N', 
#                               alt.value(1), alt.value(0))
).transform_filter(
    'datum.Days_from_N == datum.lockdown_day_from_N'
)
lockdown_tooltip=lockdown_rules.mark_text(align='left', dx=5, dy=-220).encode(
    text=alt.condition(nearest, 'lockdown_tooltip_text:N', alt.value(' '))
).transform_calculate(
    lockdown_tooltip_text='datum.Country_Region + " locked down"'
)
alt.layer(
    lines,
    selectors,
    tooltip_points,
    tooltip_text,
    tooltip_rules,
    lockdown_rules,
    lockdown_tooltip,
    points
).add_selection(legend_selection)