In [1]:
import os
from os import path
from datetime import datetime, timedelta, date
import pytz

# https://igraph.discourse.group/t/igraph-for-python3-plotting-using-pycairo-on-mac-oscatalina-10-15-4/162
# Install with:
# conda install -c conda-forge python-igraph
from igraph import *

from PIL import Image, ImageDraw, ImageFont
import cv2

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors as clr
%matplotlib inline

In [2]:
# Various datasets to visualize

option = 7

if option == 1:
    base_folder = "./simulations/retreat2020"
    sim_id = 29
    sim_tz = "US/Eastern"
if option == 2:
    base_folder = "./simulations/cmutest"
    sim_id = 32
    sim_tz = "US/Mountain"
if option == 3:
    base_folder = "./simulations/cmu"
    sim_id = 34
    sim_tz = "US/Mountain"
if option == 4:
    base_folder = "./simulations/fgcu"
    sim_id = 33
    sim_tz = "US/Eastern"
if option == 5:
    base_folder = "./simulations/csw1"
    sim_id = 36
    sim_tz = "US/Eastern"
if option == 6:
    base_folder = "./simulations/csw2"
    sim_id = 37
    sim_tz = "US/Eastern"
if option == 7:
    base_folder = "./simulations/byu21"
    sim_id = 39
    sim_tz = "US/Mountain"

In [3]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
# https://matplotlib.org/3.1.0/gallery/color/named_colors.html
color_dict = {0: clr.to_hex("skyblue"), 1: clr.to_hex("crimson"), 2: clr.to_hex("orange")}

# Coded outcomes:
# 0 = not information
# 1 = became infected, but no outcome (yet) known
# 2 = recovered
# 3 = dead
# 4 = escaped
# 10 = infected others, but has not outcome, so it is considered an initial node
# color_dict = {0: "Lime Green", 1: "Dark Orange", 2: "Dodger Blue", 3: "Black", 4:"Orchid", 10:"White"}    

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output", "transmissions")
if not path.exists(output_folder):
    os.makedirs(output_folder)
    
frame_format = "png"
create_movie = True
movie_fps = 2

# Time delta for plots in seconds
min_step = 30
time_delta = 60 * min_step

# https://howchoo.com/g/ywi5m2vkodk/working-with-datetime-objects-and-timezones-in-python
# https://itnext.io/working-with-timezone-and-python-using-pytz-library-4931e61e5152
timezone = pytz.timezone(sim_tz)

# https://github.com/google/fonts/tree/master/apache
font = ImageFont.truetype("Roboto-Regular.ttf", size=24)

style = {}
style["bbox"] = (800, 800)
style["margin"] = 15
style["vertex_size"] = 7
style["vertex_label_size"] = 5
style["edge_arrow_size"] = 0.7
style["edge_arrow_width"] = 0.7
style["edge_curved"] = False

In [4]:
# Some utility functions

def get_transmissions(events):
    infections = events[(events["type"] == "infection")]
    
    tlist = []
    infected = infections.user_id.values
    peers = infections.inf.values    
    for id1, peer0 in zip(infected, peers):
        n1 = user_index[id1]
        
        if "PEER" in peer0:
            p2p0 = peer0[peer0.index("[") + 1:peer0.index(":")]
            n0 = user_index[p2pToId[p2p0]]
            tlist += [(n0, n1)]
            
    return tlist    

def get_outcomes(events, outcomes0 = None):
    inf = events[events["type"] == "infection"]
    infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
    
    if outcomes0 == None:
         outcomes = [0] * len(users)
    else:            
        outcomes = outcomes0
        
    for kid in infMap:
        src = infMap[kid]
        idx = user_index[kid]
        if "CASE0" in src:
            outcomes[idx] = 1
        if "PEER" in src:
            outcomes[idx] = 2
    
    return outcomes

def get_network(transmissions, outcomes):
    nvert = len(user_index)
    
    g = Graph(directed=True)
    g.add_vertices(nvert)
    g.add_edges(transmissions)

    if outcomes:
        g.vs["outcome"] = outcomes
        g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]
    
    return g

def gen_layout(g):
    return g.layout("fr")

def plot_network(g, layout, title, fn):
    img_fn = os.path.join(output_folder, fn)
    
    style["layout"] = layout
    p = plot(g, img_fn, **style)
    
    if ".png" in fn and title:
        image = Image.open(img_fn)
        draw = ImageDraw.Draw(image)
        draw.text((10, 760), title, fill='rgb(0, 0, 0)', font=font)
        image.save(img_fn)

def print_network_properties(g):
    print("Number of vertices in the graph:", g.vcount())
    print("Number of edges in the graph", g.ecount())
    print("Is the graph directed:", g.is_directed())
    print("Maximum degree in the graph:", g.maxdegree())
#     print("Adjacency matrix:\n", g.get_adjacency())    

In [5]:
# Load participants and histories

all_users = pd.read_csv(path.join(data_folder, "participants.csv")) 
all_events = pd.read_csv(path.join(data_folder, "histories.csv"))

users = all_users[all_users["sim_id"] == sim_id]
events = all_events[all_events["sim_id"] == sim_id]

events["event_start"] = events["time"] - events["contact_length"]/1000
events["event_start"] = events["event_start"].astype(int)

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()
p2pToId = pd.Series(users.id.values, index=users.p2p_id).to_dict()
idTop2p = pd.Series(users.p2p_id.values, index=users.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

# These should return the same value
print(len(users))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

432
432
386
432


In [6]:
min_time = min(events['time'])
max_time = max(events['time'])

first_date = datetime.fromtimestamp(min_time, tz=timezone)
last_date = datetime.fromtimestamp(max_time, tz=timezone)

print("First event:", first_date)
print("Last event :", last_date)
print(first_date.tzinfo)

First event: 2021-02-19 08:18:12-07:00
Last event : 2021-02-20 06:28:48-07:00
US/Mountain


In [7]:
# Transmissions over time

toutcomes = None

t = min_time
frame = 0
img_array = []
while t <= max_time:
    t0 = t
    t += time_delta
    print("FRAME", frame, ":", datetime.fromtimestamp(t0, tz=timezone), "-", datetime.fromtimestamp(t, tz=timezone))

    condition = events['time'] <= t
    tevents = events[condition]
    toutcomes = get_outcomes(tevents, toutcomes)
    transmissions = get_transmissions(tevents)
    g = get_network(transmissions, toutcomes)

    td = datetime.fromtimestamp(t, tz=timezone)
    layout = gen_layout(g)
    img_title = td.strftime('%B %d, %I:%M %p')
    img_fn =  "frame-" + str(frame) + "." + frame_format
    plot_network(g, layout, img_title, img_fn)

    if create_movie:
        img = cv2.imread(os.path.join(output_folder, img_fn))
        height, width, layers = img.shape
        size = (width,height)        
        img_array.append(img)
    
    frame += 1
    
if create_movie:
    print("Saving movie...")
    movie_fn = os.path.join(output_folder, 'movie.mp4')
    out = cv2.VideoWriter(movie_fn, cv2.VideoWriter_fourcc(*'MJPG'), movie_fps, size)    
    for img in img_array:
        out.write(img)
    out.release()
    print("Done.")    

FRAME 0 : 2021-02-19 08:18:12-07:00 - 2021-02-19 08:48:12-07:00
FRAME 1 : 2021-02-19 08:48:12-07:00 - 2021-02-19 09:18:12-07:00
FRAME 2 : 2021-02-19 09:18:12-07:00 - 2021-02-19 09:48:12-07:00
FRAME 3 : 2021-02-19 09:48:12-07:00 - 2021-02-19 10:18:12-07:00
FRAME 4 : 2021-02-19 10:18:12-07:00 - 2021-02-19 10:48:12-07:00
FRAME 5 : 2021-02-19 10:48:12-07:00 - 2021-02-19 11:18:12-07:00
FRAME 6 : 2021-02-19 11:18:12-07:00 - 2021-02-19 11:48:12-07:00
FRAME 7 : 2021-02-19 11:48:12-07:00 - 2021-02-19 12:18:12-07:00
FRAME 8 : 2021-02-19 12:18:12-07:00 - 2021-02-19 12:48:12-07:00
FRAME 9 : 2021-02-19 12:48:12-07:00 - 2021-02-19 13:18:12-07:00
FRAME 10 : 2021-02-19 13:18:12-07:00 - 2021-02-19 13:48:12-07:00
FRAME 11 : 2021-02-19 13:48:12-07:00 - 2021-02-19 14:18:12-07:00
FRAME 12 : 2021-02-19 14:18:12-07:00 - 2021-02-19 14:48:12-07:00
FRAME 13 : 2021-02-19 14:48:12-07:00 - 2021-02-19 15:18:12-07:00
FRAME 14 : 2021-02-19 15:18:12-07:00 - 2021-02-19 15:48:12-07:00
FRAME 15 : 2021-02-19 15:48:12-07:0