## Libraries and Data Import 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import seaborn as sns

In [2]:
deviance = pd.read_csv('deviance_df.csv')[['districtId','lockdown_z','summer_z']]
static_data = pd.read_csv('processed_static_data.csv')
mobility = pd.read_csv('mobility_features.csv')
all_data = pd.read_csv('all_data_with_intervals.csv')

### Static explanators? 

In [3]:
static_corr = pd.merge(deviance, static_data, on='districtId', suffixes=('','_copy')).corr()[['lockdown_z','summer_z']]

In [4]:
static_corr[abs(static_corr)> .09]

Unnamed: 0,lockdown_z,summer_z
districtId,,
lockdown_z,1.0,-0.104732
summer_z,-0.104732,1.0
SES_highschool_dropouts,,
SES_residence_vacancies,,
SES_unemployment,,
RUR_surrounding_pop,,
SES_average_income,,
RUR_distance_to_urban_centers,,
SES_male_life_expectancy,0.128861,


## Mobility feature wrangling, baseline creation 

Re-summing here to avoid beaurocracy for doing it at the source.

In [5]:
features_to_resum = ['Count_internal','Count_incoming']

for f in features_to_resum:
    for p in ['','_p_pop']:
        var_work = f + '_workday' + p
        var_off = f + '_day_off' + p
        mobility[f + p] = mobility[var_work] + mobility[var_off]

Since we have now look at two years (2019 and 2020), the week number is no longer unique. Thus, I add the year to get a week identifier in YYWW format:

In [6]:
mobility['yyww'] = mobility.year * 100 + mobility.week_no - 200000
bad_weeks = [1940,2001,2040]
#weeks where we did not have 7 days of data, 
#these were at the edges of timeframes we have,
#with 1927 as beginning of 2019 data luckily starting on a monday.
mobility = mobility[~mobility.yyww.isin(bad_weeks)].copy()

There are multiple candidates for the end-point of the baseline interval: The first week of february (first reported cases in Germany), New Years 2019, or anything until the lockdown:

In [7]:
til_february = mobility[mobility.yyww < 2006].copy()
baseline_2019 = mobility[mobility.yyww < 2000].copy()
til_lockdown = mobility[mobility.yyww < 2011].copy()

In [8]:
means, SDs = [], []
for df in [til_february,baseline_2019,til_lockdown]:
    bl_m = df.groupby('districtId')[['Count_incoming','Count_internal']].mean()
    bl_s = df.groupby('districtId')[['Count_incoming','Count_internal']].std()
    means.append(bl_m)
    SDs.append(bl_s)
    
means_df = means[0]
for i in range(1, len(means)):
    
    means_df = pd.merge(means_df, means[i], left_index = True, right_index = True, suffixes = ('',str(i)))
    
SDs_df = SDs[0]
for i in range(1, len(SDs)):
    
    SDs_df = pd.merge(SDs_df, SDs[i], left_index = True, right_index = True, suffixes = ('',str(i)))

In [9]:
print('Means:\n',means_df.corr(),
      '\n\n\nSDs:\n',SDs_df.corr())

Means:
                  Count_incoming  Count_internal  Count_incoming1  \
Count_incoming         1.000000        0.792243         0.999824   
Count_internal         0.792243        1.000000         0.795857   
Count_incoming1        0.999824        0.795857         1.000000   
Count_internal1        0.792031        0.999945         0.795749   
Count_incoming2        0.999932        0.790446         0.999585   
Count_internal2        0.792361        0.999971         0.795929   

                 Count_internal1  Count_incoming2  Count_internal2  
Count_incoming          0.792031         0.999932         0.792361  
Count_internal          0.999945         0.790446         0.999971  
Count_incoming1         0.795749         0.999585         0.795929  
Count_internal1         1.000000         0.790200         0.999857  
Count_incoming2         0.790200         1.000000         0.790596  
Count_internal2         0.999857         0.790596         1.000000   


SDs:
                  Count_

Baseline only 2019 data or including January or up until the lockdown does not make a real difference I'll stick with the 2019 baseline since New Years feels like the most natural cut-off.

In [10]:
vars_to_include = baseline_2019.filter(like='Count').columns

In [11]:
base_mean = baseline_2019.groupby('districtId')[vars_to_include].mean().reset_index()

In [12]:
base_std = baseline_2019.groupby('districtId')[vars_to_include].std().reset_index()

In [13]:
baseline = pd.merge(base_mean, base_std, on = 'districtId', suffixes = ('_mean','_std'))

In [14]:
all_data = all_data.merge(baseline, on='districtId').merge(deviance, on='districtId')

In [15]:
for c in vars_to_include:
    all_data[c+'_z'] = (all_data[c] - all_data[c+'_mean']) / all_data[c+'_std']

Now, we have all mobility features as relative changes to baseline in the data set! Let's see what we find!

## Mobility changes as PD explanators? 

In [16]:
def plot_district_mobi(districtId,count_type='internal'):
    dist_data = all_data[all_data.districtId == districtId].copy()
    fig = go.Figure()
    count_var = 'Count_'+count_type+'_z'
    fig.add_trace(go.Scatter(x=dist_data.week_no,
                            y=dist_data[count_var]))
    fig.show()

In [17]:
def mobility_change_indices(bool_filter=None):
        
    if bool_filter is not None:
        in_df = all_data[bool_filter].copy()
    else:
        in_df = all_data.copy()

    out_df = pd.DataFrame()
    
    for aggr in ['max','min','mean','median','std']:
        for mobi_type in ['internal','incoming']:
            var_name = 'Count_' + mobi_type + '_z'
            res = eval('all_data.groupby(\'districtId\')[\''+var_name+'\'].'+aggr+'()')
            out_df[mobi_type + '_' + aggr] = res
    
    return out_df.reset_index()
            
            
        
    

In [18]:
overall_MC = mobility_change_indices()
lockdown_MC = mobility_change_indices(all_data.active_lockdown)
summer_MC = mobility_change_indices(all_data.summer_vac)

mobi_changes = overall_MC.merge(lockdown_MC, 
                                on='districtId', 
                                suffixes=('','_lockdown')).merge(summer_MC,
                                                                on='districtId',
                                                                suffixes=('_overall','_summer'))

In [19]:
deviance_MC = deviance.merge(mobi_changes)

In [20]:
cor = deviance_MC.corr()[['lockdown_z','summer_z']]
cor[abs(cor) > .09]

Unnamed: 0,lockdown_z,summer_z
districtId,,
lockdown_z,1.0,-0.104732
summer_z,-0.104732,1.0
internal_max_overall,,
incoming_max_overall,,
internal_min_overall,,
incoming_min_overall,,
internal_mean_overall,,
incoming_mean_overall,,
internal_median_overall,,


As the Germans say: Außer Spesen nix gewesen, no measure of mobility reduction behavior had any correlation w/ the deviances

In [21]:
since_first_death = all_data[all_data.AnzahlTodesfall_cumul > 0].copy()
since_first_case = all_data[all_data.AnzahlFall_cumul > 0].copy()
deviance_timing = deviance.set_index('districtId')
deviance_timing['since_first_case'] = since_first_case.groupby('districtId').size()
deviance_timing['since_first_death'] = since_first_death.groupby('districtId').size()
deviance_timing.since_first_death.replace(np.nan, 0, inplace=True)
cor = deviance_timing.corr()[['lockdown_z','summer_z']]
cor[abs(cor) > .09]

Unnamed: 0,lockdown_z,summer_z
lockdown_z,1.0,-0.104732
summer_z,-0.104732,1.0
since_first_case,,
since_first_death,,-0.135581
