In [1]:
import os
from os import path
from datetime import datetime, timedelta, date
import pytz

# https://igraph.discourse.group/t/igraph-for-python3-plotting-using-pycairo-on-mac-oscatalina-10-15-4/162
# Install with:
# conda install -c conda-forge python-igraph
from igraph import *

from PIL import Image, ImageDraw, ImageFont
import cv2

import seaborn as sns
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors as clr
%matplotlib inline

In [3]:
# Various datasets to visualize

option = 10
time0 = time1 = ''
use_new_id_schema = False

if option == 1:
    base_folder = "./simulations/retreat2020"
    sim_id = 29
    sim_tz = "US/Eastern"
    time_step_min = 30
if option == 2:
    base_folder = "./simulations/cmutest"
    sim_id = 32
    sim_tz = "US/Mountain"
    time_step_min = 30
if option == 3:
    base_folder = "./simulations/cmu"
    sim_id = 34
    sim_tz = "US/Mountain"
    time_step_min = 60
if option == 4:
    base_folder = "./simulations/fgcu"
    sim_id = 33
    sim_tz = "US/Eastern"
    time_step_min = 60
if option == 5:
    base_folder = "./simulations/csw1"
    sim_id = 36
    sim_tz = "US/Eastern"
    time_step_min = 60
if option == 6:
    base_folder = "./simulations/csw2"
    sim_id = 37
    sim_tz = "US/Eastern"
    time_step_min = 60
if option == 7:
    base_folder = "./simulations/byu21"
    sim_id = 39
    sim_tz = "US/Mountain"
    time_step_min = 60
if option == 8:
    title = "UCAS21 OO SIMULATION"
    base_folder = "./simulations/ucas21"
    sim_id = 40
    sim_tz = "US/Mountain"
    time_step_min = 60
    
if option == 10:
    title = "WALTER JOHNSON HIGH SCHOOL"
    base_folder = "./simulations/wjhs22"
    sim_id = 76
    sim_tz = "US/Eastern"
    time0 = 'Oct 14 2022 7:30AM'
    time1 = 'Oct 14 2022 11:55AM'
    time_step_min = 5
    use_new_id_schema = True    

In [4]:
# Some config

# Coded outcomes:
# 0 = no infection
# 1 = index case
# 2 = transmission
# https://matplotlib.org/3.1.0/gallery/color/named_colors.html
color_dict = {0: clr.to_hex("skyblue"), 1: clr.to_hex("crimson"), 2: clr.to_hex("orange")}

# Coded outcomes:
# 0 = not information
# 1 = became infected, but no outcome (yet) known
# 2 = recovered
# 3 = dead
# 4 = escaped
# 10 = infected others, but has not outcome, so it is considered an initial node
# color_dict = {0: "Lime Green", 1: "Dark Orange", 2: "Dodger Blue", 3: "Black", 4:"Orchid", 10:"White"}    

# https://github.com/google/fonts/tree/master/apache
font = ImageFont.truetype("Roboto-Regular.ttf", size=24)

style = {}
style["bbox"] = (800, 800)
style["margin"] = 15
style["vertex_size"] = 7
style["vertex_label_size"] = 5
style["edge_arrow_size"] = 0.7
style["edge_arrow_width"] = 0.7
style["edge_curved"] = False

data_folder = path.join(base_folder, "data")
output_folder = path.join(base_folder, "output", "transmissions")
if not path.exists(output_folder):
    os.makedirs(output_folder)
    
frame_format = "png"
create_movie = True
movie_fps = 2

# Time delta for plots in seconds
time_delta_sec = 60 * time_step_min

# https://howchoo.com/g/ywi5m2vkodk/working-with-datetime-objects-and-timezones-in-python
# https://itnext.io/working-with-timezone-and-python-using-pytz-library-4931e61e5152
timezone = pytz.timezone(sim_tz)

if time0 and time1:
    obs_date0 = timezone.localize(datetime.strptime(time0, '%b %d %Y %I:%M%p'))
    obs_date1 = timezone.localize(datetime.strptime(time1, '%b %d %Y %I:%M%p'))
else:
    obs_date0 = None
    obs_date1 = None

In [5]:
# Some utility functions

def get_transmissions(events):
    infections = events[(events["type"] == "infection")]
    
    tlist = []
    infected = infections.user_id.values
    peers = infections.inf.values    
    for id1, peer0 in zip(infected, peers):
        n1 = user_index[id1]
            
        if "PEER" in peer0:
            if use_new_id_schema:
                # New schema
                id0 = int(peer0[peer0.index("[") + 1:peer0.index(":")])
                if id0 in user_index:
                    n0 = user_index[id0]
                    tlist += [(n0, n1)] 
            else:    
                # Old schema (sims before 2022): p2p id is in the infection column
                p2p0 = peer0[peer0.index("[") + 1:peer0.index(":")]
                if p2p0 in p2pToId:
                    id0 = p2pToId[p2p0]
                    if id0 in user_index:
                        n0 = user_index[id0]
                        tlist += [(n0, n1)]
                        
            
    return tlist    

def get_outcomes(events, outcomes0 = None):
    inf = events[events["type"] == "infection"]
    infMap = pd.Series(inf.inf.values, index=inf.user_id).to_dict()
    
    if outcomes0 == None:
         outcomes = [0] * len(users)
    else:            
        outcomes = outcomes0
        
    for kid in infMap:
        src = infMap[kid]
        idx = user_index[kid]
        if "CASE0" in src:
            outcomes[idx] = 1
        if "PEER" in src:
            outcomes[idx] = 2
    
    return outcomes

def get_network(transmissions, outcomes):
    nvert = len(user_index)
    
    g = Graph(directed=True)
    g.add_vertices(nvert)
    g.add_edges(transmissions)

    if outcomes:
        g.vs["outcome"] = outcomes
        g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]
    
    return g

def gen_layout(g):
    return g.layout("fr")

def plot_network(g, layout, title, fn):
    img_fn = os.path.join(output_folder, fn)
    
    style["layout"] = layout
    p = plot(g, img_fn, **style)
    
    if ".png" in fn and title:
        image = Image.open(img_fn)
        draw = ImageDraw.Draw(image)
        draw.text((10, 760), title, fill='rgb(0, 0, 0)', font=font)
        image.save(img_fn)

def print_network_properties(g):
    print("Number of vertices in the graph:", g.vcount())
    print("Number of edges in the graph", g.ecount())
    print("Is the graph directed:", g.is_directed())
    print("Maximum degree in the graph:", g.maxdegree())
#     print("Adjacency matrix:\n", g.get_adjacency())

# https://stackoverflow.com/a/48938464
def hour_rounder(t):
    # Rounds to nearest hour by adding a timedelta hour if minute >= 30
    return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour)
               +timedelta(hours=t.minute//30))

In [6]:
# Load participants and histories

all_users = pd.read_csv(path.join(data_folder, "participants.csv")) 
all_events = pd.read_csv(path.join(data_folder, "histories.csv"))

users = all_users[all_users["sim_id"] == sim_id]

events = all_events[all_events["sim_id"] == sim_id]
events.fillna({'contact_length':0, 'peer_id':-1}, inplace=True)
events["event_start"] = events["time"] - events["contact_length"]/1000
events["event_start"] = events["event_start"].astype(int)

p2pToSim = pd.Series(users.sim_id.values, index=users.p2p_id).to_dict()
p2pToId = pd.Series(users.id.values, index=users.p2p_id).to_dict()
idTop2p = pd.Series(users.p2p_id.values, index=users.id).to_dict()
        
user_index = {}
idx = 0
for kid in idTop2p:
    user_index[kid] = idx
    idx += 1

# These should return the same value
print(len(users))
print(len(idTop2p))    
print(len(p2pToId))
print(len(user_index))

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


446
446
446
446


In [7]:
# Round min and max times to the hour
min_time = min(events['time'])
max_time = max(events['time'])
first_date = hour_rounder(datetime.fromtimestamp(min_time, tz=timezone))
last_date = hour_rounder(datetime.fromtimestamp(max_time, tz=timezone))
min_time = datetime.timestamp(first_date)
max_time = datetime.timestamp(last_date)

print("First event:", first_date)
print("Last event :", last_date)

if time0 and time1:
    print("Start time:", datetime.strptime(time0, '%b %d %Y %I:%M%p'))
    print("End time:", datetime.strptime(time1, '%b %d %Y %I:%M%p'))

print(first_date.tzinfo)

First event: 2022-10-05 16:00:00-04:00
Last event : 2022-10-15 13:00:00-04:00
Start time: 2022-10-14 07:30:00
End time: 2022-10-14 11:55:00
US/Eastern


In [9]:
# Transmissions over time

toutcomes = None

t = min_time
frame = 0
img_array = []
while t <= max_time:
    t0 = t
    t += time_delta_sec

    td = datetime.fromtimestamp(t, tz=timezone)    
    if not obs_date0 or not obs_date1 or (obs_date0 <= td and td <= obs_date1):
        print("FRAME", frame, ":", datetime.fromtimestamp(t0, tz=timezone), "-", datetime.fromtimestamp(t, tz=timezone))

        condition = events['time'] <= t
        tevents = events[condition]
        toutcomes = get_outcomes(tevents, toutcomes)
        transmissions = get_transmissions(tevents)
        g = get_network(transmissions, toutcomes)

        td = datetime.fromtimestamp(t, tz=timezone)
        layout = gen_layout(g)
        img_title = td.strftime('%B %d, %I:%M %p')
        img_fn =  "frame-" + str(frame) + "." + frame_format
        plot_network(g, layout, img_title, img_fn)

        if create_movie:
            img = cv2.imread(os.path.join(output_folder, img_fn))
            height, width, layers = img.shape
            size = (width,height)        
            img_array.append(img)
    
        frame += 1
    
if create_movie:
    print("Saving movie...")
    movie_fn = os.path.join(output_folder, 'movie.mp4')
    out = cv2.VideoWriter(movie_fn, cv2.VideoWriter_fourcc(*'MJPG'), movie_fps, size)    
    for img in img_array:
        out.write(img)
    out.release()
    print("Done.")    

FRAME 0 : 2022-10-14 07:25:00-04:00 - 2022-10-14 07:30:00-04:00
FRAME 1 : 2022-10-14 07:30:00-04:00 - 2022-10-14 07:35:00-04:00
FRAME 2 : 2022-10-14 07:35:00-04:00 - 2022-10-14 07:40:00-04:00
FRAME 3 : 2022-10-14 07:40:00-04:00 - 2022-10-14 07:45:00-04:00
FRAME 4 : 2022-10-14 07:45:00-04:00 - 2022-10-14 07:50:00-04:00
FRAME 5 : 2022-10-14 07:50:00-04:00 - 2022-10-14 07:55:00-04:00
FRAME 6 : 2022-10-14 07:55:00-04:00 - 2022-10-14 08:00:00-04:00
FRAME 7 : 2022-10-14 08:00:00-04:00 - 2022-10-14 08:05:00-04:00
FRAME 8 : 2022-10-14 08:05:00-04:00 - 2022-10-14 08:10:00-04:00
FRAME 9 : 2022-10-14 08:10:00-04:00 - 2022-10-14 08:15:00-04:00
FRAME 10 : 2022-10-14 08:15:00-04:00 - 2022-10-14 08:20:00-04:00
FRAME 11 : 2022-10-14 08:20:00-04:00 - 2022-10-14 08:25:00-04:00
FRAME 12 : 2022-10-14 08:25:00-04:00 - 2022-10-14 08:30:00-04:00
FRAME 13 : 2022-10-14 08:30:00-04:00 - 2022-10-14 08:35:00-04:00
FRAME 14 : 2022-10-14 08:35:00-04:00 - 2022-10-14 08:40:00-04:00
FRAME 15 : 2022-10-14 08:40:00-04:0