In [22]:
import sys
sys.path.insert(1, '..')
from utils import *

%matplotlib inline

In [2]:
with open(join('..', '..', 'config.yml')) as f:
    config = yaml.safe_load(f)
    
pwd = config['pwd']
dpath = config['storage']
patterns = config['pattern-data-path']
util_datasets = config['util_datasets']
datasets = config['datasets']
colors = config['colors']

FIG_SAVE_DIR = join(pwd, 'figures')
RESULT_SAVE_DIR = join(pwd, 'results')

AREA_NAME = 'NYC'
network_dir = join(pwd, dpath, 'mobility-networks', f'{AREA_NAME}-CBG-CBG-Nets-msa')
nets = load_networks(network_dir)

100%|████████████████████████████████████████████████████████████████████████████████| 107/107 [00:51<00:00,  2.10it/s]


In [3]:
msa_counties = pd.read_csv(join(pwd, util_datasets, AREA_NAME, datasets['nyc-msa']))['fips'].astype(np.int64)

In [4]:
census_data = join(pwd, util_datasets, datasets['census-data'])
census_quartiles, census_raw = get_census_attributes(census_data, msa_counties)

In [5]:
nyc_census = census_raw[(census_raw.census_block_group // 10**7).isin(nyc_borough_fips_codes)]

In [17]:
common_cbgs_across_nets = []
for net in nets:
    nodes = net[1].vs['name']
    common_cbgs_across_nets.append(set(nodes))
common_cbgs_across_nets = list(reduce(set.intersection, common_cbgs_across_nets))
common_cbgs_across_nets = [cbg for cbg in common_cbgs_across_nets if int(cbg[:5]) in nyc_borough_fips_codes.keys()]

In [18]:
wattr = 'visits'
tdate = datetime(2020, 1, 1)
raw_mobility_values = []
for net_pair in tqdm(nets[nets[:, 0] > tdate]):
    date = net_pair[0]
    net = net_pair[1]
        
    total_outflow = net.strength(common_cbgs_across_nets, mode='out', weights=wattr, loops=True)
    
    for ind, cbg in enumerate(common_cbgs_across_nets):
        raw_mobility_values.append({
            'date': date,
            'cbg': cbg,
            'mobility': total_outflow[ind]
        })
    
mobility_df = pd.DataFrame(raw_mobility_values)
mobility_df.head()

100%|██████████████████████████████████████████████████████████████████████████████████| 54/54 [00:01<00:00, 33.26it/s]


Unnamed: 0,date,cbg,mobility
0,2020-01-06,360610114012,319.0
1,2020-01-06,360811059003,497.0
2,2020-01-06,360811333004,541.0
3,2020-01-06,360811085002,510.0
4,2020-01-06,360050016002,367.0


In [57]:
def combine_mobility_with_demographics(mob_df, census_df, how='inner'):
    return pd.merge(mob_df, census_df, left_on='cbg', right_on='census_block_group', how=how)

def calc_pct_change(init_value, final_value):
    return (final_value-init_value)/init_value*100

reg_periods = {
    'before-covid': {
        'begin-date': datetime(2020, 1, 1),
        'end-date': datetime(2020, 2, 28),
    },
    'first-phase': {
        'begin-date': datetime(2020, 3, 1),
        'end-date': datetime(2020, 6, 8)
    },
    'summer-term': {
        'begin-date': datetime(2020, 6, 8),
        'end-date': datetime(2020, 10, 30)
    }
}

period_rel = [('before-covid', 'first-phase'), ('first-phase', 'summer-term')]

# first/last num of weeks to consider in each period
num_weeks_to_consider = 2

In [20]:
reg_dir = join(RESULT_SAVE_DIR, 'mobility')

In [55]:
feature_names = ['income', 'white_population', 'asian_population', 'hispanic_population', 'education', 'mean_time_to_work']
for period in reg_periods:
    begin_date = reg_periods[period]['begin-date']
    end_date = reg_periods[period]['end-date']
    
    begin_period_mob = mobility_df[(mobility_df.date >= begin_date) & (mobility_df.date <= begin_date + timedelta(weeks=num_weeks_to_consider))]
    end_period_mob = mobility_df[(mobility_df.date <= end_date) & (mobility_df.date >= end_date - timedelta(weeks=num_weeks_to_consider))]
    
    start_mob = begin_period_mob.groupby('cbg')['mobility'].median()
    end_mob = end_period_mob.groupby('cbg')['mobility'].median()
    
    mob_change = calc_pct_change(start_mob, end_mob).replace(np.inf, np.nan).replace(-np.inf, np.nan).dropna()
    mob_change.index = mob_change.index.astype(np.int64)
    feature_mat = combine_mobility_with_demographics(pd.DataFrame(mob_change).reset_index(), census_raw)
        
    y = pd.DataFrame(stats.zscore(feature_mat['mobility'], nan_policy='omit'), columns=['mobility'])
    X = pd.DataFrame(stats.zscore(feature_mat[feature_names], nan_policy='omit'), columns=feature_names)
    X = sm.add_constant(X)
    
    result = sm.OLS(y, X, missing='drop').fit()
    with open(join(reg_dir, f'mobility-norm-nyc-{period}.txt'), 'w') as f:
        f.write(result.summary().as_text())

In [60]:
feature_names = ['white_population', 'asian_population', 'hispanic_population', 'education', 'mean_time_to_work']
# if false, evaluate the change between start and end of each period
# if true, between medians of periods
between_periods = False
for start, end in period_rel:
    start_start_date = reg_periods[start]['begin-date']
    start_end_date = reg_periods[start]['end-date']
    
    end_start_date = reg_periods[end]['begin-date']
    end_end_date = reg_periods[end]['end-date']
    
    begin_period_mob = mobility_df[(mobility_df.date >= start_start_date) & (mobility_df.date <= start_end_date)]
    end_period_mob = mobility_df[(mobility_df.date >= end_start_date) & (mobility_df.date <= end_end_date)]
    
    start_mob = begin_period_mob.groupby('cbg')['mobility'].median()
    end_mob = end_period_mob.groupby('cbg')['mobility'].median()
    
    mob_change = calc_pct_change(start_mob, end_mob).replace(np.inf, np.nan).replace(-np.inf, np.nan).dropna()
    mob_change.index = mob_change.index.astype(np.int64)
    feature_mat = combine_mobility_with_demographics(pd.DataFrame(mob_change).reset_index(), census_raw)
        
    y = pd.DataFrame(stats.zscore(feature_mat['mobility'], nan_policy='omit'), columns=['mobility'])
    X = pd.DataFrame(stats.zscore(feature_mat[feature_names], nan_policy='omit'), columns=feature_names)
    X = sm.add_constant(X)
    
    result = sm.OLS(y, X, missing='drop').fit()
    with open(join(reg_dir, f'mobility-norm-nyc-between-{start}-{end}.txt'), 'w') as f:
        f.write(result.summary().as_text())