# Normalize virus deaths by mobility changes

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import sys; sys.path.append('../')
from src.data_loader.data_loader import load_google_mobility, load_deaths, load_interventions, load_counties, load_google_mobility_time_series
from src.utils.dates import get_today, switch_date_format
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
data_dir = Path('../data')
processed_dir = data_dir / 'processed'

In [3]:
# Time series data
mobility, mobility_date = load_google_mobility()

# Static data
counties, counties_date = load_counties()

In [4]:
def make_mobility_time_series(
    subset=['retail_and_recreation'],
    standardize_dates=True
):
    """
    subset : array-like
        subset of regions to average together. Elements are of
        ['retail_and_recreation','grocery_and_pharmacy', 'parks',
        'transit_stations','workplaces','residential']
    """
    all_regions = ['retail_and_recreation','grocery_and_pharmacy', 'parks',
        'transit_stations','workplaces','residential']
    all_regions = [s + '_percent_change_from_baseline' for s in all_regions]
    subset = [s + '_percent_change_from_baseline' for s in subset]
    mobility,date = load_google_mobility()
    # Drop rows with na in column(s) of interest
    mobility.dropna(axis='rows', subset=subset, inplace=True)
    # Average column(s) of interest and make a new column
    mobility['mean_percent_change'] = mobility[subset].mean(axis='columns')
    # Remove all old data columns
    mobility.drop(axis='columns',labels=all_regions, inplace=True)
    # Pivot to form time series
    mobility = mobility.pivot_table(
                    index=['state', 'county'],
                    columns='date',
                    values='mean_percent_change',
                    aggfunc='first'
                ).reset_index()
    
    if standardize_dates:
        mobility.rename(
            columns={c:switch_date_format(c,"%Y-%m-%d") for c in mobility.columns},
            inplace=True
        )

    return(mobility,date)

## Processed mobility data

In [5]:
# State FIPS and acronym information
state_info = counties[counties['FIPS'] % 1000 == 0].dropna(axis='columns')
state_info.to_csv(processed_dir / f'state_info_{counties_date}.csv')

In [6]:
# Get mobility in time series forma
mobility_ts, mobility_ts_date = make_mobility_time_series()

In [7]:
# Use state acronyms
mobility_ts['state'] = mobility_ts['state'].map({name:acro for name,acro in zip(state_info['Area_Name'], state_info['State'])})

In [8]:
mobility_ts.head(2)

date,state,county,02-15,02-16,02-17,02-18,02-19,02-20,02-21,02-22,...,04-02,04-03,04-04,04-05,04-06,04-07,04-08,04-09,04-10,04-11
0,AL,Autauga County,5.0,0.0,8.0,-2.0,-2.0,-8.0,-3.0,1.0,...,-24.0,-24.0,-39.0,-53.0,-30.0,-35.0,-31.0,-29.0,-30.0,-37.0
1,AL,Baldwin County,17.0,8.0,9.0,7.0,12.0,5.0,15.0,15.0,...,-40.0,-39.0,-48.0,-55.0,-46.0,-47.0,-42.0,-41.0,-41.0,-44.0


In [9]:
mobility_ts =counties[['FIPS','State','Area_Name']].merge(mobility_ts, how='inner', right_on=['state','county'], left_on=['State','Area_Name']).drop(labels=['State','Area_Name'],axis=1)

In [10]:
mobility_ts.head(2)

Unnamed: 0,FIPS,state,county,02-15,02-16,02-17,02-18,02-19,02-20,02-21,...,04-02,04-03,04-04,04-05,04-06,04-07,04-08,04-09,04-10,04-11
0,1001,AL,Autauga County,5.0,0.0,8.0,-2.0,-2.0,-8.0,-3.0,...,-24.0,-24.0,-39.0,-53.0,-30.0,-35.0,-31.0,-29.0,-30.0,-37.0
1,1003,AL,Baldwin County,17.0,8.0,9.0,7.0,12.0,5.0,15.0,...,-40.0,-39.0,-48.0,-55.0,-46.0,-47.0,-42.0,-41.0,-41.0,-44.0


In [11]:
# Save to processed data
mobility_ts.to_csv(processed_dir / f'mobility_time_series_{mobility_date}.csv',index=False)