In [1]:
import pandas as pd
import os
import altair as alt

In [2]:
data_path = '~/repos/covid_visuals/mobility/google/'
filename = 'Global_Mobility_Report.zip'

mob = pd.read_csv(
    os.path.join(data_path, filename),
    compression='zip',
    parse_dates=['date'],
    low_memory=False
)

In [3]:
# truncate column names
mob = mob.rename(columns={
    'retail_and_recreation_percent_change_from_baseline': 'retail_recreation',
    'grocery_and_pharmacy_percent_change_from_baseline': 'grocery_pharmacy',
    'parks_percent_change_from_baseline': 'parks',
    'transit_stations_percent_change_from_baseline': 'transit_stations',
    'workplaces_percent_change_from_baseline': 'workplaces',
    'residential_percent_change_from_baseline': 'residential'
})

In [4]:
# filter for US states
mob = mob.loc[
    (mob.country_region_code == 'US') &
    (mob.sub_region_1.notna()) &
    (mob.sub_region_2.isna()) # return statewide only
].reset_index(drop=True)

In [5]:
activity = 'workplaces'

In [6]:
def get_lags(df, activity=activity):
    """
    Creates df of varying days back values.
    """
    # filter
    df = df.loc[:, ['sub_region_1', 'date', activity]]
    
    # add lags
    df['30_days_back'] = df[activity].shift(30)
    df['100_days_back'] = df[activity].shift(100)
    df['180_days_back'] = df[activity].shift(180)
    
    # rename columns
    df = df.rename(columns={
    'sub_region_1': 'state',
    activity: 'day_of'
    })
    
    # melt all variable columns into one
    df = df.melt(
        id_vars=[
            'state',
            'date'
        ],
        value_vars=[
            'day_of',
            '30_days_back',
            '100_days_back',
            '180_days_back'
        ],
        var_name='lag',
        value_name='change_from_baseline'
    )
    
    # sort and reset index
    df = df.sort_values(by=['state', 'date']).reset_index(drop=True)
    
    return df

In [7]:
def draw_plot(df, days_back='30'):
    """
    Choose 30, 100, 180 days back.
    """
    lag = days_back + '_days_back'
    
    chart = alt.layer(
        data=df.loc[
            (df['lag'].isin(['day_of', lag])) &
            (df['date'] == df.date.max())
        ]
    )

    lines = alt.Chart().mark_line(color='#c51b8a').encode(
        x='change_from_baseline:Q',
        y='state:N',
        detail='state:N'
    )

    points = alt.Chart().mark_point(
        size=100,
        opacity=1,
        filled=True
    ).encode(
        x='change_from_baseline:Q',
        y='state:N',
        color=alt.Color(
            'lag:N',
            scale=alt.Scale(
                domain=['day_of', lag],
                range=['#fa9fb5', '#7a0177']
            )
        )
    )

    (chart + lines + points).properties(
        title=[f"{activity} mobility {days_back}-day change by state", f"As of {df['date'].max()}"]
    ).display()

In [8]:
db_change = get_lags(mob)

In [9]:
draw_plot(db_change, days_back='30')