In [1]:
import glob
import os
from datetime import datetime, timedelta, date
from os import path

from igraph import *

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Various datasets to visualize

option = 1

if option == 1:
    base_folder = "./osc2020"
#     ext = "csv" # Extension of log file, either log or txt
#     ntot = 20   # Total number of participants
#     vstep = 2   # Time step for visualization
#     ostep = 1   # Time step for output to R
#     infer_new_cases_from_inf = True # Time for "new case" is inferred either from infection or removal time
#     asympt = 2  # Asymptomatic period
#     sympt = 2   # Symptomatic period
#     start_sim = datetime.strptime("2020-03-05 13:30", '%Y-%m-%d %H:%M')
#     stop_sim = datetime.strptime("2020-03-05 14:15", '%Y-%m-%d %H:%M')

In [51]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
color_dict = {0: "skyblue", 1: "lime", 2: "crimson"}

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output")
if not path.exists(output_folder):
    os.mkdir(output_folder)
    
# viz_delta = timedelta(minutes=vstep)
# out_delta = timedelta(minutes=ostep)  

In [61]:
# Load participants and histories

users = pd.read_csv(path.join(data_folder, "participants.csv")) 
histories = pd.read_csv(path.join(data_folder, "histories.csv"))

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()

# dfc = participants[participants["sim_id"] == 5]
# dictc = pd.Series(dfc.id.values,index=dfc.p2p_id).to_dict()

# dfs = participants[participants["sim_id"] == 6]
# dicts = pd.Series(dfs.id.values,index=dfs.p2p_id).to_dict()

# center group
# gr = 5 
# fn = "contacts-center.pdf"

#south group
# gr = 6 
# fn = "contacts-south.pdf"

# north group
gr = 7 
fn = "contacts-north.pdf"

dfn = users[users["sim_id"] == gr]
hn = histories[histories["sim_id"] == gr]
cn = hn[hn["type"] == "contact"]

dfn
p2pToId = pd.Series(dfn.id.values, index=dfn.p2p_id).to_dict()
idTop2p = pd.Series(dfn.p2p_id.values, index=dfn.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

print(len(dfn))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

126
126
126
126


In [62]:
# Generate contacts

node0 = cn.user_id.values
node1 = cn.peer_id.values

contacts = {}
for id0, p2p1 in zip(node0, node1):
    n0 = user_index[id0]
    if not p2p1 in p2pToId:
        print("Possible contact with group", p2pToSim[p2p1])
        continue
    n1 = user_index[p2pToId[p2p1]]
    if n1 < n0:
        t = n1
        n1 = n0
        n0 = t
    p01 = (n0, n1)
    if p01 in contacts:
        c = contacts[p01]
    else: 
        c = 0
    contacts[p01] = c + 1
    
print(contacts)

Possible contact with group 5
{(118, 124): 2, (44, 118): 6, (21, 59): 13, (9, 123): 2, (53, 118): 2, (49, 118): 1, (107, 118): 2, (24, 123): 1, (39, 123): 1, (85, 110): 94, (8, 85): 3, (8, 110): 13, (86, 110): 190, (85, 86): 4, (14, 84): 30, (31, 83): 230, (85, 125): 16, (2, 107): 3, (2, 49): 2, (84, 85): 4, (49, 101): 1, (84, 97): 1, (54, 84): 1, (48, 91): 1, (29, 84): 1, (52, 117): 3, (52, 54): 5}


In [63]:
# Generate infections and outcomes

inf = hn[hn["type"] == "infection"]
infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
outcomes = [0] * len(dfn)

for kid in infMap:
    src = infMap[kid]
    idx = user_index[kid]
    if "CASE0" in src:
        outcomes[idx] = 1
    if "PEER" in src:
        outcomes[idx] = 2

In [64]:
nvert = len(user_index)

g = Graph(directed=False)
g.add_vertices(nvert)

g.vs["outcome"] = outcomes
g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]

for p in contacts:
    n0 = p[0]
    n1 = p[1]    
    w = contacts[p]
    g.add_edges([(n0, n1)])

layout = g.layout("fr")
p = plot(g, os.path.join(output_folder, fn), 
         layout = layout, vertex_size = 12, edge_arrow_width = 1, edge_arrow_size = 0.5, bbox=(0, 0, 800, 800))