In [1]:
import glob
import os
from datetime import datetime, timedelta, date
from os import path

from igraph import *

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Various datasets to visualize

option = 1

if option == 1:
    base_folder = "./retreat2020"
    sim_id = 29
    
#     base_folder = "./cmutest"
#     sim_id = 32
    
#     ext = "csv" # Extension of log file, either log or txt
#     ntot = 20   # Total number of participants
#     vstep = 2   # Time step for visualization
#     ostep = 1   # Time step for output to R
#     infer_new_cases_from_inf = True # Time for "new case" is inferred either from infection or removal time
#     asympt = 2  # Asymptomatic period
#     sympt = 2   # Symptomatic period
#     start_sim = datetime.strptime("2020-03-05 13:30", '%Y-%m-%d %H:%M')
#     stop_sim = datetime.strptime("2020-03-05 14:15", '%Y-%m-%d %H:%M')

In [3]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
color_dict = {0: "skyblue", 1: "lime", 2: "crimson"}

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output")
if not path.exists(output_folder):
    os.mkdir(output_folder)
    
# viz_delta = timedelta(minutes=vstep)
# out_delta = timedelta(minutes=ostep)  

In [4]:
# Load participants and histories

users = pd.read_csv(path.join(data_folder, "participants.csv")) 
histories = pd.read_csv(path.join(data_folder, "histories.csv"))

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()

fn = "contacts.pdf"

dfn = users[users["sim_id"] == sim_id]
hn = histories[histories["sim_id"] == sim_id]
cn = hn[hn["type"] == "contact"]

dfn
p2pToId = pd.Series(dfn.id.values, index=dfn.p2p_id).to_dict()
idTop2p = pd.Series(dfn.p2p_id.values, index=dfn.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

print(len(dfn))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

17
17
17
17


In [5]:
# Generate contacts

node0 = cn.user_id.values
node1 = cn.peer_id.values
time = cn.time.values
length = cn.contact_length.values

contacts = {}
for id0, p2p1, t, l01 in zip(node0, node1, time, length):
    n0 = user_index[id0]
    if not p2p1 in p2pToId:
        print("Possible contact with group", p2pToSim[p2p1])
        continue
    n1 = user_index[p2pToId[p2p1]]
    
    t = datetime.fromtimestamp(t)
    
    if n1 < n0:
        t = n1
        n1 = n0
        n0 = t
    p01 = (n0, n1)
    if p01 in contacts:
        c = contacts[p01]
    else: 
        c = 0
    contacts[p01] = c + round(l01 / (60 * 1000))
    
print(contacts)

{(7, 13): 33.0, (8, 13): 11.0, (7, 16): 11.0, (1, 13): 6.0, (7, 12): 34.0, (7, 8): 3.0, (5, 13): 7.0, (2, 12): 8.0, (10, 13): 4.0, (3, 13): 5.0, (6, 13): 4.0, (9, 13): 6.0, (12, 13): 2.0, (2, 13): 6.0, (6, 7): 6.0, (5, 7): 6.0, (3, 7): 7.0, (7, 10): 1.0, (1, 6): 11.0, (5, 6): 13.0, (2, 6): 15.0, (7, 11): 10.0, (1, 7): 7.0, (3, 11): 2.0, (6, 9): 4.0, (11, 13): 4.0, (7, 9): 4.0, (2, 7): 2.0}


In [6]:
# Generate infections and outcomes

inf = hn[hn["type"] == "infection"]
infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
outcomes = [0] * len(dfn)

for kid in infMap:
    src = infMap[kid]
    idx = user_index[kid]
    if "CASE0" in src:
        outcomes[idx] = 1
    if "PEER" in src:
        outcomes[idx] = 2

In [7]:
nvert = len(user_index)

edges = []
weights = []
edgesw = []
for p in contacts:
    n0 = p[0]
    n1 = p[1]
    w = contacts[p]
    if 0 < w:
        edges += [(n0, n1)]
        weights += [w]
        edgesw += [(n0, n1, w)]

# https://stackoverflow.com/a/50430444
g = Graph(directed=False)
g.add_vertices(nvert)
g.add_edges(edges)
g.es['weight'] = weights
# g.es['label'] = weights

# g = Graph.TupleList(edgesw, weights=True)

g.vs["outcome"] = outcomes
g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]

print(g.is_weighted())

style = {}
style["bbox"] = (800, 800)
style["margin"] = 15
style["vertex_size"] = 8
style["vertex_label_size"] = 8
style["edge_curved"] = False
# https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_graphopt
style["layout"] = g.layout_fruchterman_reingold(weights=g.es["weight"])

p = plot(g, os.path.join(output_folder, fn), **style)

True


In [8]:
print("Number of vertices in the graph:", g.vcount())
print("Number of edges in the graph", g.ecount())
print("Is the graph directed:", g.is_directed())
print("Maximum degree in the graph:", g.maxdegree())
# print("Adjacency matrix:\n", g.get_adjacency())

Number of vertices in the graph: 17
Number of edges in the graph 28
Is the graph directed: False
Maximum degree in the graph: 12
