In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import igraph as ig
import os
import yaml
from os.path import join
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
from scipy.stats import entropy
from scipy import stats
from functools import reduce
import sys
sys.path.insert(1, '..')
from utils import *
from collections import Counter
import geopandas as gpd
import matplotlib.ticker as ticker
import geopandas as gpd
from scipy.spatial import distance
import matplotlib.gridspec as gridspec

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
FIG_SAVE_DIR = join('..', '..', 'figures')
RESULT_SAVE_DIR = join('..', '..', 'results')

In [3]:
with open(join('..', '..', 'config.yml')) as f:
    config = yaml.safe_load(f)
    
pwd = config['pwd']
dpath = config['storage']
patterns = config['pattern-data-path']

AREA_NAME = 'NYC'
NETWORK_DIR = join(pwd, dpath, 'Nets', f'{AREA_NAME}-CBG-CBG-Nets')
nets = [(datetime.strptime(f, '%Y-%m-%d'), datetime.strptime(f, '%Y-%m-%d').isocalendar()[1], ig.Graph.Read_Pickle(join(NETWORK_DIR, f))) 
            for f in tqdm(os.listdir(NETWORK_DIR))]
nets = np.array(sorted(nets, key=lambda x: x[0]))

100%|████████████████████████████████████████████████████████████████████████████████| 107/107 [00:37<00:00,  2.82it/s]


In [4]:
cases_df = pd.read_csv(join(pwd, 'util_datasets', 'nyc-cases-by-cbgs.csv'))
cases_df.Date = pd.to_datetime(cases_df.Date)
cases_df.cbg = cases_df.cbg.astype(str)

In [5]:
tdate = cases_df.Date.min() - timedelta(weeks=2)

In [6]:
cent_metrics = [
    ('Closeness', ig.Graph.closeness, {'cutoff': 3, 'normalized': True}), 
    ('Betweenness', ig.Graph.betweenness, {'cutoff': 3, 'directed': True}), 
    ('Eigenvector', ig.Graph.eigenvector_centrality, {'directed': True, 'scale': True}),
    ('In-Degree', 'in', None),
    ('Out-Degree', 'out', None),
    ('Self-Visit-Ratio', 'self', None)
]

tnets = nets[nets[:, 0] >= tdate]

date2vec = {} 
for tp in tqdm(tnets):
    date = tp[0]
    tnet = tp[-1]
    cbgs = tnet.vs['name']
    row_entries = []
    for cent_metric_name, metric, params in cent_metrics:
        if not isinstance(metric, str) and params:
            if cent_metric_name != 'Eigenvector':
                values = pd.Series(metric(tnet, vertices=cbgs, **params), index=cbgs)
            else:
                values = pd.Series(metric(tnet, **params), index=cbgs)
        elif isinstance(metric, str) and not params:
            if metric != 'self':
                values = pd.Series(tnet.strength(cbgs, loops=False, mode=metric, weights='visits'), index=cbgs)
            else:
                self_visits = np.subtract(tnet.strength(cbgs, loops=True, mode='in', weights='visits'), tnet.strength(mode='in', loops=False, weights='visits'))
                out_visits = np.array(tnet.strength(cbgs, loops=False, mode='out', weights='visits'))
                total_visits = self_visits + out_visits + 0.001

                values = pd.Series(self_visits/total_visits, index=cbgs)
        row_entries.append(values)
        
    date2vec[date] = pd.concat(row_entries, axis=1).reset_index()

100%|██████████████████████████████████████████████████████████████████████████████████| 44/44 [07:16<00:00,  9.93s/it]


In [7]:
mob_dates = cases_df.groupby('Date').groups
data = []

for mob_date, ind in tqdm(mob_dates.items()):
    td = mob_date - timedelta(weeks=2)
    if td in date2vec:
        vec = date2vec[td]
        data.append(pd.merge(cases_df.loc[ind, ['Norm_Cases', 'cbg']], vec, how='inner', left_on='cbg', right_on='index').drop(['index', 'cbg'], axis=1))

100%|██████████████████████████████████████████████████████████████████████████████████| 48/48 [00:00<00:00, 75.03it/s]


In [8]:
cases_vec = pd.concat(data, axis=0).reset_index(drop=True)
cases_vec.columns = ['cases', 'closeness', 'betweenness', 'eigenvector', 'in-degree', 'out-degree', 'self-visit-ratio']

In [19]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

X_features = ['betweenness', 'in-degree', 'out-degree', 'self-visit-ratio']
X = StandardScaler().fit_transform(cases_vec[X_features])
X = pd.DataFrame(X, columns=X_features)
y = cases_vec['cases']

X = sm.add_constant(X)
result = sm.OLS(y,X).fit()

In [20]:
result.summary()

0,1,2,3
Dep. Variable:,cases,R-squared:,0.023
Model:,OLS,Adj. R-squared:,0.023
Method:,Least Squares,F-statistic:,1564.0
Date:,"Tue, 26 Oct 2021",Prob (F-statistic):,0.0
Time:,11:11:07,Log-Likelihood:,-555530.0
No. Observations:,260348,AIC:,1111000.0
Df Residuals:,260343,BIC:,1111000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.5633,0.004,390.271,0.000,1.555,1.571
betweenness,0.0083,0.005,1.543,0.123,-0.002,0.019
in-degree,-0.0910,0.005,-17.650,0.000,-0.101,-0.081
out-degree,0.3109,0.004,70.380,0.000,0.302,0.320
self-visit-ratio,0.1715,0.004,40.730,0.000,0.163,0.180

0,1,2,3
Omnibus:,229367.171,Durbin-Watson:,0.424
Prob(Omnibus):,0.0,Jarque-Bera (JB):,29731814.673
Skew:,3.713,Prob(JB):,0.0
Kurtosis:,54.823,Cond. No.,2.27
