In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import pickle,math

#### Load discrete-time contacts generated by the mobility model 

In [2]:
#!gunzip contactsTurin.csv.gz
contact_raw2=pd.read_csv("contactsTubingen.csv")

In [3]:
contact_raw2

Unnamed: 0.1,Unnamed: 0,indiv_i,indiv_j,t_from,t_to,deltat
0,0,0,597,171.935842,172.105573,0.169731
1,1,0,2430,171.935842,172.142783,0.206941
2,2,0,3083,171.939103,172.142783,0.203680
3,3,0,3495,171.935842,172.142783,0.206941
4,4,0,3974,171.935842,172.029252,0.093410
...,...,...,...,...,...,...
3510145,3510145,9053,5140,149.224441,149.402139,0.177699
3510146,3510146,9053,5190,149.212259,149.266793,0.054534
3510147,3510147,9053,5278,149.212259,149.402139,0.189880
3510148,3510148,9053,6811,149.212259,149.402139,0.189880


Neglect contacts lasting less than 15 min

In [4]:
#keep resolution of 15min
contact_raw=contact_raw2[contact_raw2['deltat']>0.25]
contact_raw=contact_raw.reset_index()
contact_raw.drop(columns=['index'],inplace=True)
contact_raw

Unnamed: 0.1,Unnamed: 0,indiv_i,indiv_j,t_from,t_to,deltat
0,10,0,17,294.320705,295.562099,1.241394
1,11,0,209,74.228038,75.344297,1.116259
2,12,0,387,132.438909,134.285618,1.846709
3,14,0,388,73.498460,75.344297,1.845838
4,15,0,465,294.422407,295.562099,1.139691
...,...,...,...,...,...,...
2318077,3510137,9053,6916,327.855158,328.580144,0.724987
2318078,3510138,9053,7047,326.623392,327.587698,0.964306
2318079,3510139,9053,7384,327.435902,330.093194,2.657292
2318080,3510140,9053,7416,326.434779,327.217648,0.782869


In [5]:
maxtime=720
t_unit = 24
period = np.arange(0,maxtime+t_unit,t_unit*1.0)
print(period)

[  0.  24.  48.  72.  96. 120. 144. 168. 192. 216. 240. 264. 288. 312.
 336. 360. 384. 408. 432. 456. 480. 504. 528. 552. 576. 600. 624. 648.
 672. 696. 720.]


In [6]:
#cont_sqzd=[]
n_contacts=len(contact_raw)
indiv_i=contact_raw.indiv_i.to_numpy()
indiv_j=contact_raw.indiv_j.to_numpy()
t_from=contact_raw.t_from.to_numpy()
t_to=contact_raw.t_to.to_numpy()
dt=contact_raw.deltat.to_numpy()    
    

Contacts are weighted by their duration: in case they last more than one day, they are split and counted as multiple contacts of different duration

In [7]:
cont_sqzd_dict={}
for i in range(n_contacts):
    mlen=int(dt[i]//t_unit)
    for s in range(mlen+1):
        a=(indiv_i[i],indiv_j[i],int(t_from[i])//t_unit +s)
        if a in cont_sqzd_dict:
            cont_sqzd_dict[a]+=(1 if s<mlen else (dt[i]/t_unit - mlen))
        else:
            cont_sqzd_dict[a]=(1 if s<mlen else (dt[i]/t_unit - mlen)) 

The transmission probability in a single contact $(i,j)$ is estimated as $1-e^{-\beta \Delta t_{ij}}$, where $\Delta t_{ij}$ is the duration of the continuous-time contact. The parameter $\beta$ can be straightforwardly generalized to be individual-based and time-dependent. For testing the coarse-graining approximation, values between 0.5 and 1.5 have been used; in practice it should be inferred from data.

In [8]:
beta=1.0 #1.1383

In [9]:
cont_sqzd=[]
l=list(cont_sqzd_dict.keys())
for a in l:
    cont_sqzd+=[(a[0],a[1],a[2], 1-np.exp(-beta*cont_sqzd_dict[a]))]
    cont_sqzd+=[(a[1],a[0],a[2], 1-np.exp(-beta*cont_sqzd_dict[a]))]

In [10]:
cont2_sqzd = pd.DataFrame(cont_sqzd,columns=['i', 'j', 't', 'lambda'])

In [11]:
cont_sqzd_sorted=cont2_sqzd.sort_values(by=['t','i'],ascending=True)

In [12]:
cont_sqzd_sorted.to_csv('cont_miniTubingen.csv', index=False) 

In [13]:
cont_sqzd_sorted

Unnamed: 0,i,j,t,lambda
1954,4,753,0,0.045101
1960,4,778,0,0.056322
1962,4,1046,0,0.087391
1966,4,1186,0,0.048164
1976,4,1373,0,0.050473
...,...,...,...,...
3998775,9051,7946,29,0.046694
4550686,9051,3170,29,0.018438
4550748,9051,5307,29,0.031030
4550810,9051,7828,29,0.013198
