In [50]:
import os
from os import path
from datetime import datetime, timedelta, date
import pytz

from igraph import *
from PIL import Image, ImageDraw, ImageFont
import cv2

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [51]:
# Various datasets to visualize

option = 1

if option == 1:
    base_folder = "./retreat2020"
    sim_id = 29
    sim_tz = "US/Eastern"
if option == 2:
    base_folder = "./cmutest"
    sim_id = 32
    sim_tz = "US/Mountain"

In [52]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
color_dict = {0: "skyblue", 1: "crimson", 2: "orange"}

# Coded outcomes:
# 0 = not information
# 1 = became infected, but no outcome (yet) known
# 2 = recovered
# 3 = dead
# 4 = escaped
# 10 = infected others, but has not outcome, so it is considered an initial node
# color_dict = {0: "Lime Green", 1: "Dark Orange", 2: "Dodger Blue", 3: "Black", 4:"Orchid", 10:"White"}    

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output", "transmissions")
if not path.exists(output_folder):
    os.makedirs(output_folder)
    
frame_format = "pdf"
create_movie = False
movie_fps = 2

# https://howchoo.com/g/ywi5m2vkodk/working-with-datetime-objects-and-timezones-in-python
# https://itnext.io/working-with-timezone-and-python-using-pytz-library-4931e61e5152
timezone = pytz.timezone(sim_tz)

# https://github.com/google/fonts/tree/master/apache
font = ImageFont.truetype("Roboto-Regular.ttf", size=24)

style = {}
style["bbox"] = (800, 800)
style["margin"] = 15
style["vertex_size"] = 20
style["vertex_label_size"] = 8
style["edge_curved"] = False

In [57]:
# Some utility functions

def get_transmissions(events):
    infections = events[(events["type"] == "infection")]
    
    tlist = []
    infected = infections.user_id.values
    peers = infections.inf.values    
    for id1, peer0 in zip(infected, peers):
        n1 = user_index[id1]
        
        if "PEER" in peer0:
            p2p0 = peer0[peer0.index("[") + 1:peer0.index(":")]
            n0 = user_index[p2pToId[p2p0]]
            tlist += [(n0, n1)]
            
    return tlist    

def get_outcomes(events, outcomes0 = None):
    inf = events[events["type"] == "infection"]
    infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
    
    if outcomes0 == None:
         outcomes = [0] * len(users)
    else:            
        outcomes = outcomes0
        
    for kid in infMap:
        src = infMap[kid]
        idx = user_index[kid]
        if "CASE0" in src:
            outcomes[idx] = 1
        if "PEER" in src:
            outcomes[idx] = 2
    
    return outcomes

def get_network(transmissions, outcomes):
    nvert = len(user_index)
    
    g = Graph(directed=True)
    g.add_vertices(nvert)
    g.add_edges(transmissions)

#     for edge in transmissions:
#         n0 = edge[0]
#         n1 = edge[1]
#         if n0 in user_index and n1 in user_index:
#             g.add_edges([(user_index[n0], user_index[n1])])

    if outcomes:
        g.vs["outcome"] = outcomes
        g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]
    
    return g

def gen_layout(g):
    return g.layout("fr")

def plot_network(g, layout, title, fn):
    img_fn = os.path.join(output_folder, fn)
    
#     p = plot(g, os.path.join(output_folder, "frame-" + str(frame) + td.strftime('-%H-%M') + ".pdf"), 
#              layout = layout, vertex_size = 9, edge_arrow_width = 1, edge_arrow_size = 0.5, bbox=(0, 0, 800, 800))

    
    style["layout"] = layout
    p = plot(g, img_fn, **style)
    
    if ".png" in fn and title:
        image = Image.open(img_fn)
        draw = ImageDraw.Draw(image)
        draw.text((10, 760), title, fill='rgb(0, 0, 0)', font=font)
        image.save(img_fn)

def print_network_properties(g):
    print("Number of vertices in the graph:", g.vcount())
    print("Number of edges in the graph", g.ecount())
    print("Is the graph directed:", g.is_directed())
    print("Maximum degree in the graph:", g.maxdegree())
#     print("Adjacency matrix:\n", g.get_adjacency())    

In [58]:
# Load participants and histories

all_users = pd.read_csv(path.join(data_folder, "participants.csv")) 
all_events = pd.read_csv(path.join(data_folder, "histories.csv"))

users = all_users[all_users["sim_id"] == sim_id]
events = all_events[all_events["sim_id"] == sim_id]

events["event_start"] = events["time"] - events["contact_length"]/1000
events["event_start"] = events["event_start"].astype(int)

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()
p2pToId = pd.Series(users.id.values, index=users.p2p_id).to_dict()
idTop2p = pd.Series(users.p2p_id.values, index=users.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

# These should return the same value
print(len(users))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

32
32
32
32


In [59]:
min_time = min(events['time'])
max_time = max(events['time'])

first_date = datetime.fromtimestamp(min_time, tz=timezone)
last_date = datetime.fromtimestamp(max_time, tz=timezone)

print("First event:", first_date)
print("Last event :", last_date)
print(first_date.tzinfo)

First event: 2020-09-25 11:06:56-04:00
Last event : 2020-09-25 16:59:38-04:00
US/Eastern


In [60]:
# Contacts/transmissions over time

toutcomes = None

tdelta = 10 * 60

t = min_time
frame = 0
img_array = []
while t <= max_time:
    t0 = t
    t += tdelta
    print("FRAME", frame, ":", datetime.fromtimestamp(t0, tz=timezone), "-", datetime.fromtimestamp(t, tz=timezone))

    condition = events['time'] <= t
    tevents = events[condition]
    toutcomes = get_outcomes(tevents, toutcomes)
    transmissions = get_transmissions(tevents)
    g = get_network(transmissions, toutcomes)

    td = datetime.fromtimestamp(t, tz=timezone)
    layout = gen_layout(g)
    img_title = "Time = " + td.strftime('%H:%M')
    img_fn =  "frame-" + str(frame) + td.strftime('-%H-%M') + "." + frame_format
    plot_network(g, layout, img_title, img_fn)

    if create_movie:
        img = cv2.imread(os.path.join(output_folder, img_fn))
        height, width, layers = img.shape
        size = (width,height)        
        img_array.append(img)
    
    frame += 1
    
if create_movie:
    print("Saving movie...")
    movie_fn = os.path.join(output_folder, 'movie.mp4')
    out = cv2.VideoWriter(movie_fn, cv2.VideoWriter_fourcc(*'MJPG'), movie_fps, size)    
    for img in img_array:
        out.write(img)
    out.release()
    print("Done.")    

FRAME 0 : 2020-09-25 11:06:56-04:00 - 2020-09-25 11:16:56-04:00
FRAME 1 : 2020-09-25 11:16:56-04:00 - 2020-09-25 11:26:56-04:00
FRAME 2 : 2020-09-25 11:26:56-04:00 - 2020-09-25 11:36:56-04:00
FRAME 3 : 2020-09-25 11:36:56-04:00 - 2020-09-25 11:46:56-04:00
FRAME 4 : 2020-09-25 11:46:56-04:00 - 2020-09-25 11:56:56-04:00
FRAME 5 : 2020-09-25 11:56:56-04:00 - 2020-09-25 12:06:56-04:00
FRAME 6 : 2020-09-25 12:06:56-04:00 - 2020-09-25 12:16:56-04:00
FRAME 7 : 2020-09-25 12:16:56-04:00 - 2020-09-25 12:26:56-04:00
FRAME 8 : 2020-09-25 12:26:56-04:00 - 2020-09-25 12:36:56-04:00
FRAME 9 : 2020-09-25 12:36:56-04:00 - 2020-09-25 12:46:56-04:00
FRAME 10 : 2020-09-25 12:46:56-04:00 - 2020-09-25 12:56:56-04:00
FRAME 11 : 2020-09-25 12:56:56-04:00 - 2020-09-25 13:06:56-04:00
FRAME 12 : 2020-09-25 13:06:56-04:00 - 2020-09-25 13:16:56-04:00
FRAME 13 : 2020-09-25 13:16:56-04:00 - 2020-09-25 13:26:56-04:00
FRAME 14 : 2020-09-25 13:26:56-04:00 - 2020-09-25 13:36:56-04:00
FRAME 15 : 2020-09-25 13:36:56-04:0