In [221]:
import os
from os import path
from datetime import datetime, timedelta, date
import pytz

from igraph import *

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [222]:
# Various datasets to visualize

option = 2

if option == 1:
    base_folder = "./retreat2020"
    sim_id = 29
    sim_tz = "US/Eastern"
if option == 2:
    base_folder = "./cmutest"
    sim_id = 32
    sim_tz = "US/Mountain"

In [223]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
color_dict = {0: "skyblue", 1: "crimson", 2: "crimson"}

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output")
if not path.exists(output_folder):
    os.mkdir(output_folder)
    
# https://howchoo.com/g/ywi5m2vkodk/working-with-datetime-objects-and-timezones-in-python
# https://itnext.io/working-with-timezone-and-python-using-pytz-library-4931e61e5152
timezone = pytz.timezone(sim_tz)

style = {}
style["bbox"] = (800, 800)
style["margin"] = 15
style["vertex_size"] = 20
style["vertex_label_size"] = 8
style["edge_curved"] = False

In [228]:
# Some utility functions

def get_contact_list(events):
    contacts = events[events["type"] == "contact"]

    node0 = contacts.user_id.values
    node1 = contacts.peer_id.values
    time = contacts.time.values
    length = contacts.contact_length.values

    clist = {}
    for id0, p2p1, t, l01 in zip(node0, node1, time, length):
        n0 = user_index[id0]
        if not p2p1 in p2pToId:
            print("Possible contact with group", p2pToSim[p2p1])
            continue
        n1 = user_index[p2pToId[p2p1]]
    
        t = datetime.fromtimestamp(t)
    
        if n1 < n0:
            t = n1
            n1 = n0
            n0 = t
        p01 = (n0, n1)
        if p01 in clist:
            c = clist[p01]
        else: 
            c = 0

        clist[p01] = c + round(l01 / (60 * 1000))
    
    return clist

def get_outcomes(events, outcomes0 = None):
    inf = events[events["type"] == "infection"]
    infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
    
    if outcomes0 == None:
         outcomes = [0] * len(users)
    else:            
        outcomes = outcomes0
        
    for kid in infMap:
        src = infMap[kid]
        idx = user_index[kid]
        if "CASE0" in src:
            outcomes[idx] = 1
        if "PEER" in src:
            outcomes[idx] = 2
    
    return outcomes

def get_network(contacts, outcomes):
    nvert = len(user_index)

    edges = []
    weights = []
    edgesw = []
    
    if 0 < len(contacts):
        for p in contacts:
            n0 = p[0]
            n1 = p[1]
            w = contacts[p]    
            if 0 < w:
                edges += [(n0, n1)]
                weights += [w]
                edgesw += [(n0, n1, w)]

    # https://stackoverflow.com/a/50430444
    g = Graph(directed=False)
    g.add_vertices(nvert)
    g.add_edges(edges)
    g.es['weight'] = weights
    # g.es['label'] = weights

    # g = Graph.TupleList(edgesw, weights=True)

    if outcomes:
        g.vs["outcome"] = outcomes
        g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]
    
    return g

def plot_network(g, output_fn):
    # https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_graphopt
    style["layout"] = g.layout_fruchterman_reingold(weights=g.es["weight"])
    p = plot(g, os.path.join(output_folder, output_fn), **style)

def print_network_properties(g):
    print("Number of vertices in the graph:", g.vcount())
    print("Number of edges in the graph", g.ecount())
    print("Is the graph directed:", g.is_directed())
    print("Maximum degree in the graph:", g.maxdegree())
#     print("Adjacency matrix:\n", g.get_adjacency())    

In [229]:
# Load participants and histories

all_users = pd.read_csv(path.join(data_folder, "participants.csv")) 
all_events = pd.read_csv(path.join(data_folder, "histories.csv"))

users = all_users[all_users["sim_id"] == sim_id]
events = all_events[all_events["sim_id"] == sim_id]

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()
p2pToId = pd.Series(users.id.values, index=users.p2p_id).to_dict()
idTop2p = pd.Series(users.p2p_id.values, index=users.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

# These should return the same value
print(len(users))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

32
32
32
32


In [230]:
contacts = get_contact_list(events)
outcomes = get_outcomes(events)
g = get_network(contacts, outcomes)
print_network_properties(g)
    
output_fn = "all.pdf"
plot_network(g, output_fn)

Number of vertices in the graph: 32
Number of edges in the graph 115
Is the graph directed: False
Maximum degree in the graph: 24


In [219]:
min_time = min(events['time'])
max_time = max(events['time'])

first_date = datetime.fromtimestamp(min_time, tz=timezone)
last_date = datetime.fromtimestamp(max_time, tz=timezone)

print("First event:", first_date)
print("Last event :", last_date)
print(first_date.tzinfo)

First event: 2020-09-25 11:06:56-04:00
Last event : 2020-09-25 16:59:38-04:00
US/Eastern


In [231]:
# Contacts over time

toutcomes = None

tdelta = 10 * 60

t = min_time
frame = 0
while t <= max_time:
    t0 = t
    t += tdelta
    print("FRAME", frame, ":", datetime.fromtimestamp(t0, tz=timezone), "-", datetime.fromtimestamp(t, tz=timezone))
    tevents = events[(t0 < events['time']) & (events['time'] <= t)]
    toutcomes = get_outcomes(tevents, toutcomes)
    tcontacts = get_contact_list(tevents)
    g = get_network(tcontacts, toutcomes)
    print_network_properties(g)

    td = datetime.fromtimestamp(t, tz=timezone)
    output_fn = "frame-" + str(frame) + td.strftime('-%H-%M') + ".pdf"
    p = plot(g, os.path.join(output_folder, output_fn), **style)

    frame += 1

FRAME 0 : 2020-09-25 11:06:56-04:00 - 2020-09-25 11:16:56-04:00
Number of vertices in the graph: 32
Number of edges in the graph 26
Is the graph directed: False
Maximum degree in the graph: 16
FRAME 1 : 2020-09-25 11:16:56-04:00 - 2020-09-25 11:26:56-04:00
Number of vertices in the graph: 32
Number of edges in the graph 8
Is the graph directed: False
Maximum degree in the graph: 5
FRAME 2 : 2020-09-25 11:26:56-04:00 - 2020-09-25 11:36:56-04:00
Number of vertices in the graph: 32
Number of edges in the graph 3
Is the graph directed: False
Maximum degree in the graph: 2
FRAME 3 : 2020-09-25 11:36:56-04:00 - 2020-09-25 11:46:56-04:00
Number of vertices in the graph: 32
Number of edges in the graph 8
Is the graph directed: False
Maximum degree in the graph: 6
FRAME 4 : 2020-09-25 11:46:56-04:00 - 2020-09-25 11:56:56-04:00
Number of vertices in the graph: 32
Number of edges in the graph 3
Is the graph directed: False
Maximum degree in the graph: 2
FRAME 5 : 2020-09-25 11:56:56-04:00 - 2020-