In [50]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

In [3]:
import geopandas as gpd 
import folium
from geopy.geocoders import Nominatim
import folium.plugins
import branca
import branca.colormap as cm
from folium.plugins import MarkerCluster

In [11]:
DATA_PATH = Path('../../../data/preprocessed/safegraph/social-distancing/FIPS/450/2020')

In [61]:
county_fips = '45039'
state_fips = '45'

In [128]:
month = '06'
day = '02'

df = pd.read_csv(DATA_PATH / f'{month}/{day}/2020-{month}-{day}-social-distancing.csv')

In [129]:
df['county_fips'] = df['origin_census_block_group'].apply(lambda x: str(x)[:5])
df['state_fips'] = df['origin_census_block_group'].apply(lambda x: str(x)[:2])

In [130]:
df.head()

Unnamed: 0,origin_census_block_group,date_range_start,date_range_end,device_count,distance_traveled_from_home,bucketed_distance_traveled,median_dwell_at_bucketed_distance_traveled,completely_home_device_count,median_home_dwell_time,bucketed_home_dwell_time,...,median_non_home_dwell_time,candidate_device_count,bucketed_away_from_home_time,median_percentage_time_home,bucketed_percentage_time_home,mean_home_dwell_time,mean_non_home_dwell_time,mean_distance_traveled_from_home,county_fips,state_fips
0,450410007002,2020-06-02T00:00:00-04:00,2020-06-03T00:00:00-04:00,66,1176,"{""16001-50000"":4,""0"":21,"">50000"":5,""2001-8000""...","{""16001-50000"":25,"">50000"":159,""<1000"":18,""200...",20,584,"{""721-1080"":12,""361-720"":10,""61-360"":4,""<60"":2...",...,44,154,"{""21-45"":1,""541-600"":1,""46-60"":1,""721-840"":1,""...",93,"{""0-25"":13,""76-100"":40,""51-75"":4,""26-50"":1}",602,276,2934,45041,45
1,450450028053,2020-06-02T00:00:00-04:00,2020-06-03T00:00:00-04:00,136,2046,"{""16001-50000"":9,""0"":32,"">50000"":8,""2001-8000""...","{""16001-50000"":41,"">50000"":171,""<1000"":45,""200...",34,754,"{""721-1080"":25,""361-720"":14,""61-360"":12,""<60"":...",...,218,281,"{""21-45"":8,""481-540"":1,""541-600"":2,""46-60"":3,""...",71,"{""0-25"":41,""76-100"":65,""51-75"":22,""26-50"":8}",693,339,4543,45045,45
2,450630210092,2020-06-02T00:00:00-04:00,2020-06-03T00:00:00-04:00,825,4156,"{""16001-50000"":176,""0"":208,"">50000"":60,""2001-8...","{""16001-50000"":45,"">50000"":53,""<1000"":162,""200...",207,847,"{""721-1080"":198,""361-720"":115,""61-360"":58,""<60...",...,138,1410,"{""21-45"":45,""481-540"":46,""541-600"":27,""46-60"":...",83,"{""0-25"":146,""76-100"":459,""51-75"":176,""26-50"":44}",801,286,11843,45063,45
3,450830231012,2020-06-02T00:00:00-04:00,2020-06-03T00:00:00-04:00,115,4058,"{""16001-50000"":20,""0"":30,"">50000"":7,""2001-8000...","{""16001-50000"":69,"">50000"":283,""<1000"":91,""200...",28,779,"{""721-1080"":29,""361-720"":13,""61-360"":9,""<60"":3...",...,104,261,"{""21-45"":4,""481-540"":3,""541-600"":1,""46-60"":3,""...",74,"{""0-25"":29,""76-100"":56,""51-75"":25,""26-50"":5}",664,265,4574,45083,45
4,450190049012,2020-06-02T00:00:00-04:00,2020-06-03T00:00:00-04:00,63,625,"{""16001-50000"":3,""0"":15,"">50000"":10,""2001-8000...","{""16001-50000"":246,"">50000"":167,""<1000"":81,""20...",18,767,"{""721-1080"":14,""361-720"":3,""61-360"":3,""<60"":23...",...,84,121,"{""21-45"":1,""481-540"":1,""46-60"":2,""721-840"":1,""...",73,"{""0-25"":20,""76-100"":29,""51-75"":8,""26-50"":2}",654,350,4185,45019,45


In [131]:
df2 = df[df['county_fips'] == county_fips]
df3 = df[df['state_fips'] == state_fips]

In [132]:
def transition_matrix(df):

    census_block_group_ids = df['origin_census_block_group'].unique()
    cbg_idx = {cbg_id: idx for idx, cbg_id in enumerate(census_block_group_ids)}

    num_block_groups = len(census_block_group_ids)

    travel_amounts = np.zeros((num_block_groups, num_block_groups))

    for i in range(len(df)):
        row = df.iloc[i]

        origin_id = row['origin_census_block_group']
        if origin_id in cbg_idx:
            origin_idx = cbg_idx[origin_id]
        else:
            continue
        #if i < 1:
        #    print(f'origin_id: {origin_id}')

        dests = json.loads(row['destination_cbgs'])
        #if i < 1:
        #    print(dests)
        for dest_id, num_devices in dests.items():
            dest_id = int(dest_id)
            if dest_id in cbg_idx:
                #if i < 10:
                #    print(f'{origin_id} traveled to {dest_id}: {num_devices} devices')    
                dest_idx = cbg_idx[dest_id]
                travel_amounts[dest_idx][origin_idx] += num_devices

    return travel_amounts

## Travel Within the State

In [133]:
travel_amounts = transition_matrix(df3)
travel_amounts

array([[ 53.,   0.,   0., ...,   0.,   0.,   0.],
       [  0., 105.,   0., ...,   1.,   0.,   0.],
       [  0.,   0., 731., ...,   0.,  19.,   0.],
       ...,
       [  0.,   0.,   0., ..., 105.,   0.,   0.],
       [  0.,   1.,  93., ...,   0., 269.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,  45.]])

In [134]:
# Number of entries filled:
len(travel_amounts.nonzero()[0])

211365

In [135]:
# Number of block groups:
num_block_groups = travel_amounts.shape[0]
num_block_groups

3042

In [136]:
# Portion of the matrix that's filled:
len(travel_amounts.nonzero()[0]) / (num_block_groups * num_block_groups)

0.02284097584507234

## Travel Within the County

In [137]:
travel_amounts = transition_matrix(df2)
travel_amounts

array([[ 75.,   7.,   1.,   0.,   2.,   1.,   3.,   4.,   0.,   3.,   0.,
          0.,   3.,   0.,   4.,   0.,   5.,   3.],
       [  1.,  80.,   1.,   0.,   0.,   0.,   0.,   2.,   0.,   1.,   0.,
          4.,   3.,   0.,  10.,   0.,   4.,   1.],
       [  5.,   3.,  60.,   0.,   1.,   0.,   0.,   5.,   0.,   2.,   0.,
          3.,   2.,   2.,   2.,   0.,   2.,   3.],
       [  2.,   0.,   1.,  75.,   5.,   0.,   0.,   0.,   0.,   2.,   5.,
          1.,   0.,   0.,   0.,   0.,   1.,   1.],
       [  1.,   0.,   0.,   2.,  68.,   0.,   1.,   0.,   0.,   1.,   2.,
          1.,   0.,   0.,   2.,   0.,   1.,   0.],
       [  1.,   3.,   0.,   1.,   0.,  23.,   0.,   0.,   0.,   1.,   1.,
          2.,   1.,   0.,   2.,   4.,   0.,   1.],
       [  3.,   0.,   2.,   0.,   0.,   1.,  45.,   1.,   4.,   1.,   1.,
          0.,   2.,   2.,   1.,   0.,   0.,   0.],
       [  3.,   2.,   5.,   0.,   0.,   0.,   1.,  45.,   0.,   3.,   0.,
          2.,   5.,   1.,   4.,   0.,   5.,   1.],


In [138]:
# Number of entries filled:
len(travel_amounts.nonzero()[0])

201

In [139]:
# Number of block groups:
num_block_groups = travel_amounts.shape[0]
num_block_groups

18

In [140]:
# Portion of the matrix that's filled:
len(travel_amounts.nonzero()[0]) / (num_block_groups * num_block_groups)

0.6203703703703703

In [141]:
eigenvalues, eigenvectors = np.linalg.eig(travel_amounts)

In [142]:
eigenvalues

array([112.3553429 +0.j        ,  93.95015892+0.j        ,
        22.7023659 +0.j        ,  76.56050878+0.j        ,
        74.31196212+0.j        ,  28.27232842+0.j        ,
        66.76162863+0.48561725j,  66.76162863-0.48561725j,
        36.00901683+0.j        ,  60.45693782+1.26960614j,
        60.45693782-1.26960614j,  40.83896984+0.j        ,
        42.60756229+0.j        ,  46.50382053+0.j        ,
        52.14231728+1.97042413j,  52.14231728-1.97042413j,
        49.92407045+0.j        ,  51.24212554+0.j        ])

In [143]:
largest_cbg_idx = eigenvectors[0].argmax()

In [144]:
df2['origin_census_block_group'].unique()[largest_cbg_idx]

450399605002

In [145]:
len(df2['origin_census_block_group'].unique())

18