In [329]:
import glob
from datetime import datetime, timedelta, date
from os import path

In [330]:
# folder = "./broad15"
# ext = "log"
# ntot = 50
# tstep = 15

folder = "./sma125"
ext = "txt"
ntot = 200
tstep = 5

# folder = "./shanghai"
# ext = "txt"
# ntot = 60
# tstep = 5

output = "./output"
if not path.exists(output):
    os.mkdir(output)

In [331]:
# Load all log files

files = [f for f in glob.glob(folder + "**/*." + ext, recursive=False)]

min_time = 1E10
max_time = 0
users = {}
count = 0
user_index = {}
for fn in files:
    bname = path.basename(fn)
    idx = bname.rfind('-')
    case_id = bname[0:idx]
    events = []
    with open(fn, 'r') as f:
        lst = list(enumerate(f))
        n = len(lst)

        for i, line in lst:
            if i == 0 or i == n - 1:
                continue
            line = line.strip()
            line = line[1:-2]
            parts = line.split(',')
            time = int(parts[0].split(':')[1])

            evstr = parts[1]
            idx = evstr.find(':')
            evtyp = evstr[0:idx]
            if idx < len(evstr):
                evdat = evstr[idx+1:]
            else:
                evdat = None

            # print(time, evtyp, evdat)
            events += [{"time": time, "type": evtyp, "data": evdat}]

            min_time = min(min_time, time)
            max_time = max(max_time, time)            
    
    # Events are stored last to first in log files, reverting the order
    events.reverse()
    users[case_id] = events
    user_index[case_id] = count
    count += 1

min_delta = timedelta(minutes=tstep)
start_data = datetime.fromtimestamp(min_time)
end_data = datetime.fromtimestamp(max_time) + min_delta
print(min_time)
print(max_time)
print(start_data)
print(end_data)  

1575565444
1575577153
2019-12-05 12:04:04
2019-12-05 15:24:13


In [332]:
# The function in this cell constructs the infection network for a given time point

# For debugging
# frange = range(20, 24)

def construct_network(td, fr = -1):
    inf_network = []
    user_outcome = [0] * ntot
    
#     if fr in frange:
#         print("*****", fr, "******")
    
    ninf = 0
    nknown = 0
    nmiss = 0
    nsurv = 0
    ndead = 0
    npeer = 0
    parents = {}
    first_inf = {}
    first_orph = {}
    infected = {}
    orphaned = {}
    for key in users:
        events = users[key]
        idx = user_index[key]
        if len(events) == 0: continue
        pkey = ""
        infect = None
        has_inf_event = False
        for ev in events:
            date = datetime.fromtimestamp(ev["time"])
            if td <= date: continue
            str_date = date.strftime('%H:%M:%S')
            data = ev["data"]
            if ev["type"] == "OUT":
                if data == "RECOVERED":
                    user_outcome[user_index[key]] = 2
                    nsurv += 1
                elif data == "DEAD":
                    user_outcome[user_index[key]] = 3
                    ndead += 1
                if not infect:
                    # Infection edge without origin
                    infect = [{"a":"unk", "b":key, "t":str_date, "s":"-"}]
                    orphaned[key] = date                    
                ninf += 1
            elif ev["type"] == "INF":
                if pkey:
                    # Duplicated parent, skipping
                    continue
                if "PEER" in data:
                    user_outcome[idx] = 1
                    pstr = data[5:-1]
                    if ":" in pstr:
                        pieces = pstr.split(":")
                        pkey = pieces[0]
                        strain = pieces[1]
                    else:
                        pkey = pstr
                        strain = "0"
                    if pkey in users:
                        # print(strain, pkey, "->", key)
#                         if fr in frange:
                        if pkey in parents:
                            if date < parents[pkey]:
                                parents[pkey] = date
                        else:
                            parents[pkey] = date
                        infect = [{"a":pkey, "b":key, "t":str_date, "s": strain}]
                        npeer += 1
                        has_inf_event = True
                    else:
                        infect = [{"a":"unk", "b":key, "t":str_date, "s": strain}]
                        has_inf_event = False
                elif "CASE0" in data:
                    user_outcome[idx] = 10
                    strain = data[6:-1]
                    # print(strain, "0", "->", key)
                    infect = [{"a":"zero", "b":key, "t":str_date, "s": strain}]
                    has_inf_event = True
                    first_inf[idx] = date
                elif "SOURCE" in data:
                    user_outcome[idx] = 10
                    if ":" in data:
                        strain = pieces[1]
                    else:
                        strain = "0"
                    infect = [{"a":"src", "b":key, "t":str_date, "s": strain}]
                    has_inf_event = True
                    first_inf[idx] = date

        if has_inf_event:
            nknown += 1
        else:
            nmiss += 1
        infected[idx] = has_inf_event

        if infect:
            inf_network += infect

    print("  Total number of cases:", ninf)
    print("  Total number of deaths:", ndead)
    print("  Total number of survivors:", nsurv)
    print("  Number of infections with known source:", nknown)
    print("  Number of infections from peer:", npeer)
    print("  Number of infections with missing source:", nmiss)

    # Finalizing outcomes and first infections based on ancilliary info
    
    for p in parents:
        idx = user_index[p]
        if user_outcome[idx] == 0:
            # This user infected others but not yet had an outcome, so marked as initial note (10)
            user_outcome[idx] = 10
        if not infected[idx]:
            # Not infected by anybody else, so it is a first case
            first_inf[idx] = parents[p]

    for o in orphaned:
        # Orphan nodes are those who had an outcome but not infecting parent, 
        # and did not infected anybody else
        idx = user_index[o]
        if not idx in first_inf:
            first_orph[idx] = orphaned[o]

    return inf_network, user_outcome, first_inf, first_orph

In [None]:
# Plot infection network using igraph
# https://igraph.org/python/

from igraph import *

td = start_data
frame = 0
while td <= end_data:
    td += min_delta
    print("-> Frame:", frame, "- Time:", td)
    network, outcomes, _, _ = construct_network(td, frame)

    g = Graph(directed=True)
    g.add_vertices(ntot)

    g.vs["outcome"] = outcomes
    color_dict = {0: "Lime Green", 1: "Dark Orange", 2: "Deep Sky Blue", 3: "Dark Gray", 10:"White"}
    g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]

    for edge in network:
        n0 = edge["a"]
        n1 = edge["b"]
        if n0 in user_index and n1 in user_index:
#             print(user_index[n0], "->", user_index[n1])
            g.add_edges([(user_index[n0], user_index[n1])])

#     print(g)

    layout = g.layout("fr", maxiter=500)
    p = plot(g, layout = layout, vertex_size = 9, edge_arrow_width = 1, edge_arrow_size = 0.5, 
             bbox=(0, 0, 800, 800))
    
#     if frame in frange:
    imgfn = os.path.join(output, "frame-" + str(frame) + td.strftime('-%H-%M') + ".png")    
    p.save(imgfn)
    frame += 1

-> Frame: 0 - Time: 2019-12-05 12:09:04
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing source: 37
-> Frame: 1 - Time: 2019-12-05 12:14:04
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing source: 37
-> Frame: 2 - Time: 2019-12-05 12:19:04
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing source: 37
-> Frame: 3 - Time: 2019-12-05 12:24:04
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing

In [None]:
# Plot tree-orientated network

network, outcomes, initial, orphan = construct_network(end_data)

seeds = []
for s in initial:
    seeds += [s]

for o in orphan:
    seeds += [o]

print(seeds)    

g = Graph(directed=True)
g.add_vertices(ntot)

succeptibles = []
for idx in range(ntot):
    if outcomes[idx] == 0:
        succeptibles += [idx]
g.delete_vertices(succeptibles)

g.vs["outcome"] = outcomes
color_dict = {0: "Lime Green", 1: "Dark Orange", 2: "Deep Sky Blue", 3: "Dark Gray", 10:"White"}
g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]

for edge in network:
    n0 = edge["a"]
    n1 = edge["b"]
    if n0 in user_index and n1 in user_index:
        g.add_edges([(user_index[n0], user_index[n1])])
        
layout = g.layout("rt", mode="out", root = seeds, rootlevel = [1] * len(seeds))
p = plot(g, layout = layout, vertex_size = 9, edge_arrow_width = 1, edge_arrow_size = 0.5, 
         bbox=(0, 0, 800, 800))

imgfn = os.path.join(output, "infection-tree.png")    
p.save(imgfn)