In [68]:
import glob
import os
from datetime import datetime, timedelta, date
from os import path

In [69]:
folder = "./sma125"
ext = "txt"
ntot = 200
tstep = 5

# folder = "./shanghai"
# ext = "txt"
# ntot = 60
# tstep = 5

# folder = "./broad15"
# ext = "log"
# ntot = 50
# tstep = 15

# folder = "./sma19"
# ext = "txt"
# ntot = 170
# tstep = 5

output = "./output"
if not path.exists(output):
    os.mkdir(output)

In [70]:
# Load all log files

files = [f for f in glob.glob(folder + "**/*." + ext, recursive=False)]

min_time = 1E10
max_time = 0
users = {}
inf_cases = []
noninf_cases = []
for fn in files:
    bname = path.basename(fn)
    idx = bname.rfind('-')
    case_id = bname[0:idx]
    events = []
    infected = False
    with open(fn, 'r') as f:
        lst = list(enumerate(f))
        n = len(lst)

        for i, line in lst:
            if i == 0 or i == n - 1:
                continue
            line = line.strip()
            line = line[1:-2]
            parts = line.split(',')
            time = int(parts[0].split(':')[1])

            evstr = parts[1]
            idx = evstr.find(':')
            evtyp = evstr[0:idx]
            if idx < len(evstr):
                evdat = evstr[idx+1:]
            else:
                evdat = None

            # print(time, evtyp, evdat)
            events += [{"time": time, "type": evtyp, "data": evdat}]
            
            if evtyp == "OUT" and evdat in ["DEAD", "RECOVERED"]:
                infected = True

            min_time = min(min_time, time)
            max_time = max(max_time, time)            
    
    # Events are stored last to first in log files, reverting the order
    events.reverse()
    users[case_id] = events
    
    if infected:
        inf_cases += [case_id]
    else:
        noninf_cases += [case_id]

# The infected cases are indexed first, to facilitate removal of non-infected cases
# when plotting the tree-oriented network
count = 0
user_index = {}
for cid in inf_cases:
    user_index[cid] = count
    count += 1
for cid in noninf_cases:
    user_index[cid] = count
    count += 1
    
print(len(inf_cases))
print(len(noninf_cases))
print(user_index)
    
min_delta = timedelta(minutes=tstep)
start_data = datetime.fromtimestamp(min_time)
end_data = datetime.fromtimestamp(max_time) + min_delta
print(min_time)
print(max_time)
print(start_data)
print(end_data)  

12
8
{'1ff3ff57-bfe3-472f-b839-9e409e13b8aa': 0, 'a367c9b6-1a65-4991-b2b1-7e910f6abc02': 1, '903ae526-949d-453a-a8ba-50da170f4e99': 2, '31eb700c-8a84-4089-b428-647833b87f77': 3, '1d0d2be8-eb19-41e1-9e05-8b7ca1cf1c12': 4, '9f3f698b-3dbe-4529-8c60-045d2f33907b': 5, '187a2da7-d489-496e-b3f6-47f2e5d11515': 6, '5fe70a9a-bac1-4d6e-af0d-34d2b2d59e77': 7, '8fe85d97-f6ad-41bf-8937-66a0520fab64': 8, '98836bb2-0b73-4368-a93c-d1c35ff67889': 9, '543e16b3-62ab-4fed-92b2-9cf947588ec0': 10, '7b85cd69-8e0e-453c-9356-a766fbfad3f2': 11, '0230570c-4fd6-4500-8d70-101464215635': 12, '23944a78-0284-43f6-9a65-fefe5fb2f78d': 13, 'b0fbfeac-880b-4028-ade8-1736e7d9afac': 14, 'e3107346-d306-4ca3-929c-b41671987d9c': 15, 'd2f80b20-da58-4998-b09c-e03af5337485': 16, 'ca7fd1ff-9621-4216-9bc2-847a1b7027ef': 17, 'bf0790e1-6176-43aa-b95b-44c9bca6f22a': 18, 'c1ec9397-47fb-4152-a299-87fdeb05e3fa': 19}
1568160457
1568177117
2019-09-10 20:07:37
2019-09-11 00:50:17


In [71]:
# The function in this cell constructs the infection network for a given time point

# For debugging
# frange = range(20, 24)

def construct_network(td, nt, fr = -1):
    inf_network = []
    user_outcome = [0] * nt
    
#     if fr in frange:
#         print("*****", fr, "******")
    
    ninf = 0
    nknown = 0
    nmiss = 0
    nsurv = 0
    ndead = 0
    npeer = 0
    parents = {}
    first_inf = {}
    first_orph = {}
    infected = {}
    orphaned = {}
    for key in users:
        events = users[key]
        idx = user_index[key]
        if len(events) == 0: continue
        pkey = ""
        infect = None
        has_inf_event = False
        has_vax_event = False
        for ev in events:
            date = datetime.fromtimestamp(ev["time"])
            if td <= date: continue
            str_date = date.strftime('%H:%M:%S')
            data = ev["data"]
            if ev["type"] == "OUT":
                if data == "VACCINATED":
                    has_vax_event = True
                else:    
                    if data == "RECOVERED":
                        user_outcome[user_index[key]] = 2
                        nsurv += 1
                    elif data == "DEAD":
                        user_outcome[user_index[key]] = 3
                        ndead += 1
                    if not infect:
                        # Infection edge without origin
                        infect = [{"a":"unk", "b":key, "t":str_date, "s":"-"}]
                        orphaned[key] = date                    
                    ninf += 1
            elif ev["type"] == "INF":
                if pkey:
                    # Duplicated parent, skipping
                    continue
                if "PEER" in data:
                    user_outcome[idx] = 1
                    pstr = data[5:-1]
                    if ":" in pstr:
                        pieces = pstr.split(":")
                        pkey = pieces[0]
                        strain = pieces[1]
                    else:
                        pkey = pstr
                        strain = "0"
                    if pkey in users:
                        # print(strain, pkey, "->", key)
#                         if fr in frange:
                        if pkey in parents:
                            if date < parents[pkey]:
                                parents[pkey] = date
                        else:
                            parents[pkey] = date
                        infect = [{"a":pkey, "b":key, "t":str_date, "s": strain}]
                        npeer += 1
                        has_inf_event = True
                    else:
                        infect = [{"a":"unk", "b":key, "t":str_date, "s": strain}]
                        has_inf_event = False
                elif "CASE0" in data:
                    user_outcome[idx] = 10
                    strain = data[6:-1]
                    # print(strain, "0", "->", key)
                    infect = [{"a":"zero", "b":key, "t":str_date, "s": strain}]
                    has_inf_event = True
                    first_inf[idx] = date
                elif "SOURCE" in data:
                    user_outcome[idx] = 10
                    if ":" in data:
                        strain = pieces[1]
                    else:
                        strain = "0"
                    infect = [{"a":"src", "b":key, "t":str_date, "s": strain}]
                    has_inf_event = True
                    first_inf[idx] = date

        if has_vax_event: 
            continue
            
        if has_inf_event:
            nknown += 1
        else:
            nmiss += 1
        infected[idx] = has_inf_event

        if infect:
            inf_network += infect

    print("  Total number of cases:", ninf)
    print("  Total number of deaths:", ndead)
    print("  Total number of survivors:", nsurv)
    print("  Number of infections with known source:", nknown)
    print("  Number of infections from peer:", npeer)
    print("  Number of infections with missing source:", nmiss)

    # Finalizing outcomes and first infections based on ancilliary info
    
    for p in parents:
        idx = user_index[p]
        if user_outcome[idx] == 0:
            # This user infected others but not yet had an outcome, so marked as initial note (10)
            user_outcome[idx] = 10
        if not idx in infected or not infected[idx]:
            # Not infected by anybody else, so it is a first case
            first_inf[idx] = parents[p]

    for o in orphaned:
        # Orphan nodes are those who had an outcome but not infecting parent, 
        # and did not infected anybody else
        idx = user_index[o]
        if not idx in first_inf:
            first_orph[idx] = orphaned[o]

    return inf_network, user_outcome, first_inf, first_orph

In [72]:
# Plot infection network using igraph
# https://igraph.org/python/

from igraph import *

td = start_data
frame = 0
while td <= end_data:
    td += min_delta
    print("-> Frame:", frame, "- Time:", td)
    network, outcomes, _, _ = construct_network(td, ntot, frame)

    g = Graph(directed=True)
    g.add_vertices(ntot)

    g.vs["outcome"] = outcomes
    color_dict = {0: "Lime Green", 1: "Dark Orange", 2: "Deep Sky Blue", 3: "Dark Gray", 10:"White"}
    g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]

    for edge in network:
        n0 = edge["a"]
        n1 = edge["b"]
        if n0 in user_index and n1 in user_index:
#             print(user_index[n0], "->", user_index[n1])
            g.add_edges([(user_index[n0], user_index[n1])])

#     print(g)

    layout = g.layout("fr", maxiter=500)
    p = plot(g, layout = layout, vertex_size = 9, edge_arrow_width = 1, edge_arrow_size = 0.5, 
             bbox=(0, 0, 800, 800))
    
#     if frame in frange:
    imgfn = os.path.join(output, "frame-" + str(frame) + td.strftime('-%H-%M') + ".png")    
    p.save(imgfn)
    frame += 1

-> Frame: 0 - Time: 2019-09-10 20:12:37
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing source: 20
-> Frame: 1 - Time: 2019-09-10 20:17:37
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing source: 20
-> Frame: 2 - Time: 2019-09-10 20:22:37
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing source: 20
-> Frame: 3 - Time: 2019-09-10 20:27:37
  Total number of cases: 0
  Total number of deaths: 0
  Total number of survivors: 0
  Number of infections with known source: 0
  Number of infections from peer: 0
  Number of infections with missing

-> Frame: 39 - Time: 2019-09-10 23:27:37
  Total number of cases: 3
  Total number of deaths: 1
  Total number of survivors: 2
  Number of infections with known source: 4
  Number of infections from peer: 2
  Number of infections with missing source: 15
-> Frame: 40 - Time: 2019-09-10 23:32:37
  Total number of cases: 4
  Total number of deaths: 2
  Total number of survivors: 2
  Number of infections with known source: 7
  Number of infections from peer: 5
  Number of infections with missing source: 12
-> Frame: 41 - Time: 2019-09-10 23:37:37
  Total number of cases: 4
  Total number of deaths: 2
  Total number of survivors: 2
  Number of infections with known source: 7
  Number of infections from peer: 5
  Number of infections with missing source: 12
-> Frame: 42 - Time: 2019-09-10 23:42:37
  Total number of cases: 4
  Total number of deaths: 2
  Total number of survivors: 2
  Number of infections with known source: 7
  Number of infections from peer: 5
  Number of infections with mis

In [73]:
# Plot tree-orientated network

network, outcomes, initial, orphan = construct_network(end_data, ntot)

seeds = []
for s in initial:
    seeds += [s]

for o in orphan:
    seeds += [o]

succeptibles = []
for idx in range(ntot):
    if outcomes[idx] == 0:
        succeptibles += [idx]
        
# print(outcomes)
# print(seeds)
# print(succeptibles)

g = Graph(directed=True)
g.add_vertices(ntot)

g.vs["outcome"] = outcomes
color_dict = {0: "Lime Green", 1: "Dark Orange", 2: "Deep Sky Blue", 3: "Dark Gray", 10:"White"}
g.vs["color"] = [color_dict[out] for out in g.vs["outcome"]]

for edge in network:
    n0 = edge["a"]
    n1 = edge["b"]
    if n0 in user_index and n1 in user_index:
        g.add_edges([(user_index[n0], user_index[n1])])

# Removing non-infected, assuming they are all after the infected
g.delete_vertices(succeptibles)
        
layout = g.layout("rt", mode="out", root = seeds, rootlevel = [1] * len(seeds))
p = plot(g, layout = layout, vertex_size = 16, edge_arrow_width = 1, edge_arrow_size = 0.9, 
         bbox=(0, 0, 800, 800))

imgfn = os.path.join(output, "infection-tree.png")    
p.save(imgfn)

  Total number of cases: 12
  Total number of deaths: 7
  Total number of survivors: 5
  Number of infections with known source: 12
  Number of infections from peer: 10
  Number of infections with missing source: 0
