In [30]:
import pandas as pd
import xarray as xr
import xsimlab as xs
from episimlab.partition import partition
from episimlab.setup.coords import InitDefaultCoords

# Load `travel.csv`

In [2]:
tr2020 = pd.read_csv('/Users/kpierce/COVID19/safegraph_mobility/2020_travel_for_contact_partitioning.csv')

In [3]:
tr2020.head()

Unnamed: 0.1,Unnamed: 0,source,destination,age,n,date,destination_type
0,0,76511,76511,<5,47.377358,2020-05-30,local
1,1,76511,76511,18-49,638.320755,2020-05-30,local
2,2,76511,76511,5-17,202.754717,2020-05-30,local
3,3,76511,76511,50-64,223.641509,2020-05-30,local
4,4,76511,76511,65+,164.037736,2020-05-30,local


In [4]:
mar2020_tr = tr2020[(tr2020['date'] < '2020-04-01') & (tr2020['date'] >= '2020-03-01')]

In [5]:
mar2020_tr.head()

Unnamed: 0.1,Unnamed: 0,source,destination,age,n,date,destination_type
30555,30555,76511,76511,<5,35.053846,2020-03-11,local
30556,30556,76511,76511,18-49,472.284615,2020-03-11,local
30557,30557,76511,76511,5-17,150.015385,2020-03-11,local
30558,30558,76511,76511,50-64,165.469231,2020-03-11,local
30559,30559,76511,76511,65+,121.369231,2020-03-11,local


In [27]:
mar2020_tr.to_csv('/Users/kpierce/episimlab/data/mar2020_travel.csv')

# Load `contact.csv`

In [20]:
polymod = pd.read_csv('~/COVID19/SEIR-city/data/Cities_Data/ContactMatrixAll_5AgeGroups.csv', header=None)

In [21]:
polymod.columns = ['<5', '5-17', '18-49', '50-64', '65+']

In [22]:
polymod['index'] = ['<5', '5-17', '18-49', '50-64', '65+']

In [23]:
polymod_melt = polymod.melt(id_vars='index', value_vars=['<5', '5-17', '18-49', '50-64', '65+'])

In [25]:
polymod_melt = polymod_melt.rename(columns={'index': 'age1', 'variable': 'age2', 'value': 'daily_per_capita_contacts'})

In [26]:
polymod_melt

Unnamed: 0,age1,age2,daily_per_capita_contacts
0,<5,<5,2.160941
1,5-17,<5,0.597341
2,18-49,<5,0.382203
3,50-64,<5,0.352397
4,65+,<5,0.189756
5,<5,5-17,2.164117
6,5-17,5-17,8.14697
7,18-49,5-17,2.431392
8,50-64,5-17,1.8851
9,65+,5-17,0.892909


In [28]:
polymod_melt.to_csv('/Users/kpierce/episimlab/data/polymod_contacts.csv')

# Partition contacts

In [75]:
import multiprocessing as mp
import tempfile
from datetime import datetime

In [111]:
empty_df = pd.DataFrame()

In [112]:
empty_df

In [141]:
dummy_date = '2022-01-01'
try:
    assert empty_df.empty == False
except AssertionError as e:
    e.args += (('No travel data for date {}.'.format(dummy_date), ))
    raise

AssertionError: No travel data for date 2022-01-01.

In [126]:
empty_df.empty

True

In [140]:
assert polymod_melt.empty == False

In [119]:
empty_df.empty == False

False

In [104]:
date_list = sorted(mar2020_tr['date'].unique(), reverse=True)

In [105]:
current_date = date_list.pop()

In [106]:
current_date

'2020-03-01'

In [107]:
date_list.pop()

'2020-03-02'

In [108]:
date_list.pop()

'2020-03-03'

In [109]:
empty_list = []

In [110]:
empty_list.pop()

IndexError: pop from empty list

In [99]:
travel_20200311 = mar2020_tr[mar2020_tr['date'] == '2020-03-11']

In [100]:
travel_20200311.head()

Unnamed: 0.1,Unnamed: 0,source,destination,age,n,date,destination_type
30555,30555,76511,76511,<5,35.053846,2020-03-11,local
30556,30556,76511,76511,18-49,472.284615,2020-03-11,local
30557,30557,76511,76511,5-17,150.015385,2020-03-11,local
30558,30558,76511,76511,50-64,165.469231,2020-03-11,local
30559,30559,76511,76511,65+,121.369231,2020-03-11,local


In [101]:
travel_20200311.to_csv('/Users/kpierce/episimlab/data/20200311_travel.csv')

In [79]:
def partition_dates(df, date):

    # get a dataframe subset and save as tmpfile
    date_df = df[df['date'] == date]
    date_temp = tempfile.NamedTemporaryFile()
    date_df.to_csv(date_temp)
    
    # partition dates
    part = partition.Partition(
        travel_fp=date_temp.name,
        contacts_fp='/Users/kpierce/episimlab/data/polymod_contacts.csv',
        age_group=['<5', '5-17', '18-49', '50-64', '65+'],
        risk_group=['high', 'low']
    )
    part.initialize()
    part.contact_partitions['date'] = date
    
    # close tempfile
    date_temp.close()
    
    return part.contact_partitions

In [80]:
mar2020_tr['date'].unique()[0:4]

array(['2020-03-11', '2020-03-02', '2020-03-04', '2020-03-17'],
      dtype=object)

In [81]:
start = datetime.now()
print('Start time is {}'.format(start))
tasks = [(mar2020_tr, i) for i in mar2020_tr['date'].unique()]
short_tasks = [(mar2020_tr, mar2020_tr['date'].unique()[0])]
pool = mp.Pool(1)
results = [pool.apply_async(partition_dates, t) for t in short_tasks]
pool.close()
partitioned_dfs = []
for r in results:
    partitioned_dfs.append(r.get())
stop = datetime.now()
print('Stop time is {}'.format(stop))
print('Total time is {}'.format(stop-start))

Start time is 2021-05-11 08:10:28.609346
Stop time is 2021-05-11 09:02:36.847143
Total time is 0:52:08.237797


In [82]:
partitioned_dfs

[            i      j  age_i  age_j  partitioned_per_capita_contacts  \
 0       76511  76511  18-49  18-49                         0.385829   
 1       76511  76530  18-49  18-49                         0.016355   
 2       76511  76537  18-49  18-49                         0.027251   
 3       76511  76574  18-49  18-49                         0.084098   
 4       76511  76578  18-49  18-49                         0.002905   
 ...       ...    ...    ...    ...                              ...   
 153870  78957  78757     <5     <5                         0.000072   
 153871  78957  78758     <5     <5                         0.000366   
 153872  78957  78759     <5     <5                         0.000692   
 153873  78957  78953     <5     <5                         0.001171   
 153874  78957  78957     <5     <5                         0.133104   
 
               date  
 0       2020-03-11  
 1       2020-03-11  
 2       2020-03-11  
 3       2020-03-11  
 4       2020-03-11  
 .

In [83]:
partitioned_df_final = pd.concat(partitioned_dfs)

In [84]:
partitioned_df_final.head()

Unnamed: 0,i,j,age_i,age_j,partitioned_per_capita_contacts,date
0,76511,76511,18-49,18-49,0.385829,2020-03-11
1,76511,76530,18-49,18-49,0.016355,2020-03-11
2,76511,76537,18-49,18-49,0.027251,2020-03-11
3,76511,76574,18-49,18-49,0.084098,2020-03-11
4,76511,76578,18-49,18-49,0.002905,2020-03-11


In [85]:
partitioned_df_final[partitioned_df_final['age_i'] != partitioned_df_final['age_j']]

Unnamed: 0,i,j,age_i,age_j,partitioned_per_capita_contacts,date
6155,76511,76511,18-49,5-17,0.034543,2020-03-11
6156,76511,76530,18-49,5-17,0.003885,2020-03-11
6157,76511,76537,18-49,5-17,0.006473,2020-03-11
6158,76511,76574,18-49,5-17,0.019975,2020-03-11
6159,76511,76578,18-49,5-17,0.000690,2020-03-11
...,...,...,...,...,...,...
147715,78957,78757,<5,65+,0.000009,2020-03-11
147716,78957,78758,<5,65+,0.000048,2020-03-11
147717,78957,78759,<5,65+,0.000090,2020-03-11
147718,78957,78953,<5,65+,0.000152,2020-03-11


In [86]:
partitioned_df_final['date'].unique()

array(['2020-03-11'], dtype=object)

In [87]:
partitioned_df_final.to_csv('/Users/kpierce/episimlab/data/20200311_contact_matrix.csv')

In [89]:
repart = partition.Partition(
    travel_fp='/Users/kpierce/COVID19/safegraph_mobility/2020_travel_for_contact_partitioning.csv',
    contacts_fp='/Users/kpierce/episimlab/data/polymod_contacts.csv',
    age_group=['<5', '5-17', '18-49', '50-64', '65+'],
    risk_group=['high', 'low']
)

In [92]:
# assign some class attributes
repart.contact_partitions = partitioned_df_final
repart.age_group = ['<5', '5-17', '18-49', '50-64', '65+']
repart.age_dims = ['source_age', 'destination_age']

In [93]:
contact_matrix = repart.contact_matrix()

In [97]:
contact_matrix

In [98]:
contact_matrix.to_netcdf('/Users/kpierce/episimlab/data/20200311_contact_matrix.nc')