In [24]:
import glob
import os
from datetime import datetime, timedelta, date
from os import path

from igraph import *

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [32]:
# Various datasets to visualize

option = 2

if option == 1:
    base_folder = "./retreat2020"
    sim_id = 29
if option == 2:
    base_folder = "./cmutest"
    sim_id = 32
    
output_fn = "contacts.pdf"    
    
#     ext = "csv" # Extension of log file, either log or txt
#     ntot = 20   # Total number of participants
#     vstep = 2   # Time step for visualization
#     ostep = 1   # Time step for output to R
#     infer_new_cases_from_inf = True # Time for "new case" is inferred either from infection or removal time
#     asympt = 2  # Asymptomatic period
#     sympt = 2   # Symptomatic period
#     start_sim = datetime.strptime("2020-03-05 13:30", '%Y-%m-%d %H:%M')
#     stop_sim = datetime.strptime("2020-03-05 14:15", '%Y-%m-%d %H:%M')

In [33]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
color_dict = {0: "skyblue", 1: "lime", 2: "crimson"}

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output")
if not path.exists(output_folder):
    os.mkdir(output_folder)
    
# viz_delta = timedelta(minutes=vstep)
# out_delta = timedelta(minutes=ostep)  

In [34]:
# Load participants and histories

all_users = pd.read_csv(path.join(data_folder, "participants.csv")) 
all_events = pd.read_csv(path.join(data_folder, "histories.csv"))

users = all_users[all_users["sim_id"] == sim_id]
events = all_events[all_events["sim_id"] == sim_id]
contacts = events[events["type"] == "contact"]

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()
p2pToId = pd.Series(users.id.values, index=users.p2p_id).to_dict()
idTop2p = pd.Series(users.p2p_id.values, index=users.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

print(users)
print(len(users))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

     id                                p2p_id  sim_id
0   551  95a88f98-bca8-4884-90e8-1adfe792e6f1      32
1   553  5925ef2f-780b-4186-85e3-67a6e333ae77      32
2   554  5b0d44d7-8760-4062-8439-c61b925a5604      32
3   555  085c6f6c-c212-42a0-b106-3d0e8343a395      32
4   556  09432a8c-5892-4d69-ada3-9148031abdd8      32
5   557  35e2abf4-13b6-4809-9fb7-5bd41c1622ed      32
6   558  d6d97c0d-afde-4842-bdc5-60af1b1cbdd3      32
7   559  58df9ee7-8729-4206-876e-9d491480b49c      32
8   560  3cd022e9-6b3f-4843-a6b9-6b68b29b3245      32
9   561  2e385ca5-4105-4b8e-b41c-16ea60c9f831      32
10  562  3cb88365-abd8-4e6e-8cb5-86651ccb7d02      32
11  563  c346ea59-71b9-4af3-b96b-43cac31b8e76      32
12  564  b63275cf-0218-4f71-9e2b-20ee271fbcdd      32
13  565  190cc019-6518-4255-92d2-14012d586682      32
14  566  262f5280-3ff4-4e6e-8145-4652bfec81f1      32
15  567  95078318-78c5-4daf-999d-7c41b217e266      32
16  570  5f8c2cda-7cdc-47d7-8a38-cf3a80ebdcc0      32
17
17
17
17


In [35]:
# Generate contacts

node0 = contacts.user_id.values
node1 = contacts.peer_id.values
time = contacts.time.values
length = contacts.contact_length.values

contacts = {}
for id0, p2p1, t, l01 in zip(node0, node1, time, length):
    n0 = user_index[id0]
    if not p2p1 in p2pToId:
        print("Possible contact with group", p2pToSim[p2p1])
        continue
    n1 = user_index[p2pToId[p2p1]]
    
    t = datetime.fromtimestamp(t)
    
    if n1 < n0:
        t = n1
        n1 = n0
        n0 = t
    p01 = (n0, n1)
    if p01 in contacts:
        c = contacts[p01]
    else: 
        c = 0
    contacts[p01] = c + round(l01 / (60 * 1000))
    
print(contacts)

{(7, 13): 33.0, (8, 13): 11.0, (7, 16): 11.0, (1, 13): 6.0, (7, 12): 34.0, (7, 8): 3.0, (5, 13): 7.0, (2, 12): 8.0, (10, 13): 4.0, (3, 13): 5.0, (6, 13): 4.0, (9, 13): 6.0, (12, 13): 2.0, (2, 13): 6.0, (6, 7): 6.0, (5, 7): 6.0, (3, 7): 7.0, (7, 10): 1.0, (1, 6): 11.0, (5, 6): 13.0, (2, 6): 15.0, (7, 11): 10.0, (1, 7): 7.0, (3, 11): 2.0, (6, 9): 4.0, (11, 13): 4.0, (7, 9): 4.0, (2, 7): 2.0}


In [36]:
# Generate infections and outcomes

inf = events[events["type"] == "infection"]
infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
outcomes = [0] * len(users)

for kid in infMap:
    src = infMap[kid]
    idx = user_index[kid]
    if "CASE0" in src:
        outcomes[idx] = 1
    if "PEER" in src:
        outcomes[idx] = 2

In [37]:
nvert = len(user_index)

edges = []
weights = []
edgesw = []
for p in contacts:
    n0 = p[0]
    n1 = p[1]
    w = contacts[p]
    if 0 < w:
        edges += [(n0, n1)]
        weights += [w]
        edgesw += [(n0, n1, w)]

# https://stackoverflow.com/a/50430444
g = Graph(directed=False)
g.add_vertices(nvert)
g.add_edges(edges)
g.es['weight'] = weights
# g.es['label'] = weights

# g = Graph.TupleList(edgesw, weights=True)

g.vs["outcome"] = outcomes
g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]

print(g.is_weighted())

style = {}
style["bbox"] = (800, 800)
style["margin"] = 15
style["vertex_size"] = 8
style["vertex_label_size"] = 8
style["edge_curved"] = False
# https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_graphopt
style["layout"] = g.layout_fruchterman_reingold(weights=g.es["weight"])

p = plot(g, os.path.join(output_folder, output_fn), **style)

True


In [38]:
print("Number of vertices in the graph:", g.vcount())
print("Number of edges in the graph", g.ecount())
print("Is the graph directed:", g.is_directed())
print("Maximum degree in the graph:", g.maxdegree())
# print("Adjacency matrix:\n", g.get_adjacency())

Number of vertices in the graph: 17
Number of edges in the graph 28
Is the graph directed: False
Maximum degree in the graph: 12
