In [1]:
import os
from os import path
from datetime import datetime, timedelta, date
import pytz

from igraph import *
from PIL import Image, ImageDraw, ImageFont
import cv2

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Various datasets to visualize

option = 6

if option == 1:
    base_folder = "./retreat2020"
    sim_id = 29
    sim_tz = "US/Eastern"
if option == 2:
    base_folder = "./cmutest"
    sim_id = 32
    sim_tz = "US/Mountain"
if option == 3:
    base_folder = "./cmu"
    sim_id = 34
    sim_tz = "US/Mountain"
if option == 4:
    base_folder = "./fgcu"
    sim_id = 33
    sim_tz = "US/Eastern"
if option == 5:
    base_folder = "./csw1"
    sim_id = 36
    sim_tz = "US/Eastern"
if option == 6:
    base_folder = "./csw2"
    sim_id = 37
    sim_tz = "US/Eastern"     

In [3]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
color_dict = {0: "skyblue", 1: "crimson", 2: "orange"}

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output", "contacts")
if not path.exists(output_folder):
    os.makedirs(output_folder)
    
frame_format = "png"
create_movie = True
movie_fps = 2

# In seconds
time_delta = 15 * 60

# https://howchoo.com/g/ywi5m2vkodk/working-with-datetime-objects-and-timezones-in-python
# https://itnext.io/working-with-timezone-and-python-using-pytz-library-4931e61e5152
timezone = pytz.timezone(sim_tz)

# https://github.com/google/fonts/tree/master/apache
font = ImageFont.truetype("Roboto-Regular.ttf", size=24)

style = {}
style["bbox"] = (800, 800)
style["margin"] = 15
style["vertex_size"] = 7
style["vertex_label_size"] = 5
style["edge_curved"] = False

In [4]:
# Some utility functions

def get_contact_list(events):
    contacts = events[events["type"] == "contact"]
    infections = events[(events["type"] == "infection")]

    node0 = contacts.user_id.values
    node1 = contacts.peer_id.values
    length = contacts.contact_length.values

    clist = {}
    for id0, p2p1, l01 in zip(node0, node1, length):
        n0 = user_index[id0]
        if not p2p1 in p2pToId:
            print("Possible contact with group", p2pToSim[p2p1])
            continue
        n1 = user_index[p2pToId[p2p1]]
    
        if n1 < n0:
            t = n1
            n1 = n0
            n0 = t
        p01 = (n0, n1)
        if p01 in clist:
            c = clist[p01]
        else: 
            c = 0

        clist[p01] = c + round(l01 / (60 * 1000))
    
    # Adding contacts from transmissions if they are not registered as contacts already
    node0 = infections.user_id.values
    inf = infections.inf.values    
    for id0, peer1 in zip(node0, inf):
        n0 = user_index[id0]
        
        if "PEER" in peer1:
            p2p1 = peer1[peer1.index("[") + 1:peer1.index(":")]
            n1 = user_index[p2pToId[p2p1]]
            if n1 < n0:
                t = n1
                n1 = n0
                n0 = t
            p01 = (n0, n1)
            if not p01 in clist:
                clist[p01] = 10
        
    return clist

def get_outcomes(events, outcomes0 = None):
    inf = events[events["type"] == "infection"]
    infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
    
    if outcomes0 == None:
         outcomes = [0] * len(users)
    else:            
        outcomes = outcomes0
        
    for kid in infMap:
        src = infMap[kid]
        idx = user_index[kid]
        if "CASE0" in src:
            outcomes[idx] = 1
        if "PEER" in src:
            outcomes[idx] = 2
    
    return outcomes

def get_network(contacts, outcomes):
    nvert = len(user_index)

    edges = []
    weights = []
    edgesw = []
    
    if 0 < len(contacts):
        for p in contacts:
            n0 = p[0]
            n1 = p[1]
            w = contacts[p]    
            if 0 < w:
                edges += [(n0, n1)]
                weights += [w]
                edgesw += [(n0, n1, w)]

    # https://stackoverflow.com/a/50430444
    g = Graph(directed=False)
    g.add_vertices(nvert)
    g.add_edges(edges)
    g.es['weight'] = weights

    if outcomes:
        g.vs["outcome"] = outcomes
        g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]
    
    return g

def gen_layout(g):
    # https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_graphopt
    return g.layout_fruchterman_reingold(weights=g.es["weight"])

def plot_network(g, layout, title, fn):
    img_fn = os.path.join(output_folder, fn)
    style["layout"] = layout
    p = plot(g, img_fn, **style)
    
    if ".png" in fn and title:
        image = Image.open(img_fn)
        draw = ImageDraw.Draw(image)
        draw.text((10, 760), title, fill='rgb(0, 0, 0)', font=font)
        image.save(img_fn)

def print_network_properties(g):
    print("Number of vertices in the graph:", g.vcount())
    print("Number of edges in the graph", g.ecount())
    print("Is the graph directed:", g.is_directed())
    print("Maximum degree in the graph:", g.maxdegree())
#     print("Adjacency matrix:\n", g.get_adjacency())    

In [5]:
# Load participants and histories

all_users = pd.read_csv(path.join(data_folder, "participants.csv")) 
all_events = pd.read_csv(path.join(data_folder, "histories.csv"))

users = all_users[all_users["sim_id"] == sim_id]
events = all_events[all_events["sim_id"] == sim_id]

events["event_start"] = events["time"] - events["contact_length"]/1000
events["event_start"] = events["event_start"].astype(int)

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()
p2pToId = pd.Series(users.id.values, index=users.p2p_id).to_dict()
idTop2p = pd.Series(users.p2p_id.values, index=users.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

# These should return the same value
print(len(users))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

70
70
69
70


In [8]:
contacts = get_contact_list(events)
outcomes = get_outcomes(events)
g = get_network(contacts, outcomes)

layout = gen_layout(g)
plot_network(g, layout, "Entire simulation", "all." + frame_format)

print_network_properties(g)

Number of vertices in the graph: 70
Number of edges in the graph 513
Is the graph directed: False
Maximum degree in the graph: 46


In [9]:
min_time = min(events['time'])
max_time = max(events['time'])

first_date = datetime.fromtimestamp(min_time, tz=timezone)
last_date = datetime.fromtimestamp(max_time, tz=timezone)

print("First event:", first_date)
print("Last event :", last_date)
print(first_date.tzinfo)

First event: 2020-11-19 06:00:36-05:00
Last event : 2020-11-20 12:00:15-05:00
US/Eastern


In [10]:
# Contacts/transmissions over time

toutcomes = None

t = min_time
frame = 0
img_array = []
while t <= max_time:
    t0 = t
    t += time_delta
    print("FRAME", frame, ":", datetime.fromtimestamp(t0, tz=timezone), "-", datetime.fromtimestamp(t, tz=timezone))

    condition = ((t0 < events['event_start']) & (events['event_start'] <= t)) | ((t0 < events['time']) & (events['time'] <= t))
    tevents = events[condition]
    toutcomes = get_outcomes(tevents, toutcomes)
    tcontacts = get_contact_list(tevents)
    g = get_network(tcontacts, toutcomes)
#     print_network_properties(g)

    td = datetime.fromtimestamp(t, tz=timezone)
    layout = gen_layout(g)
    img_title = td.strftime('%B %d, %I:%M %p')
    img_fn =  "frame-" + str(frame) + "." + frame_format
    plot_network(g, layout, img_title, img_fn)

    if create_movie:
        img = cv2.imread(os.path.join(output_folder, img_fn))
        height, width, layers = img.shape
        size = (width,height)        
        img_array.append(img)
    
    frame += 1
    
if create_movie:
    print("Saving movie...")
    movie_fn = os.path.join(output_folder, 'movie.mp4')
    out = cv2.VideoWriter(movie_fn, cv2.VideoWriter_fourcc(*'MJPG'), movie_fps, size)    
    for img in img_array:
        out.write(img)
    out.release()
    print("Done.")    

FRAME 0 : 2020-11-19 06:00:36-05:00 - 2020-11-19 06:15:36-05:00
FRAME 1 : 2020-11-19 06:15:36-05:00 - 2020-11-19 06:30:36-05:00
FRAME 2 : 2020-11-19 06:30:36-05:00 - 2020-11-19 06:45:36-05:00
FRAME 3 : 2020-11-19 06:45:36-05:00 - 2020-11-19 07:00:36-05:00
FRAME 4 : 2020-11-19 07:00:36-05:00 - 2020-11-19 07:15:36-05:00
FRAME 5 : 2020-11-19 07:15:36-05:00 - 2020-11-19 07:30:36-05:00
FRAME 6 : 2020-11-19 07:30:36-05:00 - 2020-11-19 07:45:36-05:00
FRAME 7 : 2020-11-19 07:45:36-05:00 - 2020-11-19 08:00:36-05:00
FRAME 8 : 2020-11-19 08:00:36-05:00 - 2020-11-19 08:15:36-05:00
FRAME 9 : 2020-11-19 08:15:36-05:00 - 2020-11-19 08:30:36-05:00
FRAME 10 : 2020-11-19 08:30:36-05:00 - 2020-11-19 08:45:36-05:00
FRAME 11 : 2020-11-19 08:45:36-05:00 - 2020-11-19 09:00:36-05:00
FRAME 12 : 2020-11-19 09:00:36-05:00 - 2020-11-19 09:15:36-05:00
FRAME 13 : 2020-11-19 09:15:36-05:00 - 2020-11-19 09:30:36-05:00
FRAME 14 : 2020-11-19 09:30:36-05:00 - 2020-11-19 09:45:36-05:00
FRAME 15 : 2020-11-19 09:45:36-05:0