In [None]:
import altair as al
import numpy as npt
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

alt.data_transformers.disable_max_rows()

def days_between(d1, d2):
    if (isinstance(d1, str) and isinstance(d2, str)):
        d1 = datetime.strptime(d1, "%m-%d-%Y")
        d2 = datetime.strptime(d2, "%m-%d-%Y")
        return int((d2 - d1).days)
    return None;

In [None]:
total_cases_df       = pd.read_csv("../91-DIVOC/pages/covid-visualization/jhu-data.csv",index_col=0)
empty_rows           = total_cases_df[(total_cases_df["Confirmed"]==0)&(total_cases_df["Recovered"]==0)&(total_cases_df["Active"]==0)&(total_cases_df["Deaths"]==0)].index
total_cases_df       = total_cases_df.drop(empty_rows, axis=0)

# Tabulate the top K countries with the highest total number of Confirmed cases
K              = 20  
topKcountries  = list(total_cases_df.groupby("Country_Region").sum()["Confirmed"].nlargest(20).index)
cases_df       = total_cases_df[total_cases_df["Country_Region"].isin(topKcountries)]

# Compute days since N = 50 confirmed cases; drop those with no N confirmed cases
N              = 50
days_since_N                   = cases_df[cases_df["Confirmed"]>N].groupby("Country_Region")["Date"].min().to_dict()
cases_df['Date N Confirmed']   = cases_df.apply  (lambda x: days_since_N.get(x['Country_Region']),axis=1)
cases_df                       = cases_df.dropna (subset=['Date N Confirmed'])
cases_df['Days_from_N']        = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date"]), axis = 1)
# cases_df

In [None]:
# Adding in information about lockdowns & possibly drop rows without lockdowns

quarantine_df   = pd.read_csv("quarantine-activity.csv",index_col=0).set_index('Country_Region')
full_lockdown   = quarantine_df[quarantine_df['Lockdown Type'] == 'Full'].groupby("Country_Region")['Date Enacted'].min().to_dict()

# Add date of full lockdown
cases_df['Date of Full Lockdown']     = cases_df.apply  (lambda x: full_lockdown.get(x['Country_Region']),axis=1)
cases_df['lockdown_day_from_N']    = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date of Full Lockdown"]), axis = 1)

# Drop rows for those without a full lockdown 
cases_df                              = cases_df.dropna (subset=['Date of Full Lockdown'])

# cases_df

In [None]:
track_lockdown_value = {}
track_y_intercept = {}
track_lockdown_x = {}
def check_if_lockdown(c, lockdown, current, conf):
    if (lockdown >= current and current >= 0):
        # lockdown is in the future
        if (c in track_lockdown_x): 
            if (track_lockdown_x.get(c) <= current):
                # updating last known value prior to lockdown
                track_lockdown_value[c] = conf
                track_lockdown_x[c] = current
        else:
            # same logic: updating last known value prior to lockdown 
            track_lockdown_value[c] = conf
            track_lockdown_x[c] = current
        if (current == 0):
            # track y intercept
            track_y_intercept[c] = conf
    return True;
# Compute lockdown value, lockdown x, and y intercept
cases_df.apply  (lambda x: check_if_lockdown (x['Country_Region'], x['lockdown_day_from_N'], x['Days_from_N'], x['Confirmed']), axis=1)
cases_df['Intercept']                 = cases_df.apply  (lambda x: track_y_intercept.get(x['Country_Region']),axis=1)
cases_df['Lockdown_x']                = cases_df.apply  (lambda x: track_lockdown_x.get(x['Country_Region']),axis=1)
cases_df['Lockdown_value']            = cases_df.apply  (lambda x: track_lockdown_value.get(x['Country_Region']),axis=1)
# Intercept * a^Lockdown_x == Lockdown_value ==> log(a) = log(Lockdown_value/Intercept)/Lockdown_x
cases_df['Lockdown_slope'] = np.exp(np.log(cases_df.Lockdown_value / cases_df.Intercept) / cases_df.Lockdown_x)

In [None]:
# Logarithmic Axis (Stephen's prettier version)
chart_df = cases_df.loc[(cases_df.Days_from_N >= 0) * (cases_df.Days_from_N <= 32)]
for country in full_lockdown:
    if country not in days_since_N:
        continue
    val_to_insert = days_between(days_since_N[country], full_lockdown[country])
    # insert some dummy rows w/ Days_from_N == lockdown_day_from_N to get tooltip_rules w/ mouseover to work properly
    new_row = pd.Series({'Country_Region': country,
                         'lockdown_day_from_N': val_to_insert,
                         'Days_from_N': val_to_insert,
                        })
    chart_df = chart_df.append(new_row, ignore_index=True)

chart_df['x'] = chart_df.Days_from_N
chart_df['y'] = chart_df.Confirmed

nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')

legend_selection = alt.selection_multi(fields=['Country_Region'], bind='legend')

brush = alt.selection_interval(name="brush", encodings=['x'])

shared_encodings = dict(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
)

chart = alt.Chart(chart_df, width=1000, height=500)

def _add_model_transformation_fields(base):
    return base.transform_calculate(
        model_y='datum.Lockdown_value * pow(datum.Lockdown_slope, datum.x - datum.Lockdown_x)'
    ).transform_filter(
        'datum.x >= datum.Lockdown_x'
    ).transform_filter(
        'datum.Confirmed !== null'
    ).transform_filter(
        'datum.model_y <= 100000'
    )

lines = chart.mark_line(size=3).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.1)),
).transform_filter(
    'datum.Confirmed !== null'
).add_selection(legend_selection).add_selection(brush)

model_lines = _add_model_transformation_fields(
    chart.mark_line(size=3, strokeDash=[1,1]).encode(
        x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
        y=alt.Y("model_y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.1)),
    )
)


points = chart.mark_point(size=90, filled=True).encode(
    x=alt.X("x:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("y:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    opacity=alt.condition(legend_selection, alt.value(.4), alt.value(0.1)),
).transform_filter(
    'datum.Confirmed !== null'
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = chart.mark_point().encode(
    x='x:Q',
    opacity=alt.value(0),
).add_selection(nearest)

# Draw points on the line, and highlight based on selection
tooltip_points = points.mark_point(filled=True).encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
tooltip_text = points.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'tooltip_text:N', alt.value(' ')),
    opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.1)),
).transform_calculate(
    tooltip_text='datum.Country_Region + ": " + datum.y'
)
# Draw a rule at the location of the selection
tooltip_rules = chart.mark_rule(color='gray').encode(
    x='x:Q',
).transform_filter(
    nearest
)

brush_rule = chart.mark_rule(color='red').encode(
    x='brush_mid:Q'
).transform_filter(
    'isDefined(brush.x)'
)

brush_text = brush_rule.mark_text(align='center', dx=5, dy=-255).encode(
    text=alt.value('Line of no intervention'),
    color=alt.value('red')
).transform_filter(
    'isDefined(brush.x)'
)

lockdown_rules = chart.mark_rule(strokeDash=[7,3]).encode(
        x='x:Q',
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.1)),
).transform_filter(
    'datum.x == datum.lockdown_day_from_N'
)

lockdown_tooltip=lockdown_rules.mark_text(align='left', dx=5, dy=-220).encode(
    text=alt.condition(nearest, 'lockdown_tooltip_text:N', alt.value(' '))
).transform_calculate(
    lockdown_tooltip_text='datum.Country_Region + " locked down"'
)
alt.layer(
    lines,
    model_lines,
    selectors,
    tooltip_points,
    tooltip_text,
#     tooltip_rules,
    brush_rule,
    brush_text,
    lockdown_rules,
    lockdown_tooltip,
    points
).transform_calculate(
    brush_mid='isDefined(brush.x) ? 0.5 * (brush.x[0] + brush.x[1]) : 100'
)