In [1]:
import pandas as pd
import numpy as np

### Overview

Compare output.txt with printed compartement numbers


1. number of infected equal for each timepoint -> done (failing)
2. Nobody is in schools/... at unreasonable time points (e.g. 3 o'clock in the morning) -> done (not failing, but sometimes unreasonable)

3. Infected severe are in hospital (ID 05) -> done (passed)
4. Infected critical are at ICU (ID 06) -> done (passed)
5. Death are at graveyard (ID 10) -> done (passed)

6. Time since transmission >= 0 for all infected/exposed -> is true if 1. is not failing
7. Search for maximal transmission value -> done
8. Time since transmission is either increasing over time or set to 0 if person is recovered : not systematically checked, but some individual agents passed

### Import data

In [20]:
output_path = '../../../output/output.txt'

In [21]:
# get number of columns per row to identify max column number
with open(output_path, 'r') as temp_f:
    col_count = [ len(l.split(" ")) for l in temp_f.readlines() ]

In [22]:
max(col_count)

12918

In [23]:
# create dummy col names
column_names = [i for i in range(0, max(col_count))]

In [24]:
output_df = pd.read_csv(output_path, header=None, delimiter=" ", names=column_names, dtype={0: 'str'})

In [25]:
output_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12908,12909,12910,12911,12912,12913,12914,12915,12916,12917
0,1000,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,
1,301,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,
2,302,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,
3,303,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,
4,304,337,0.0,0,1.0,0.0,2.0,0.0,3.0,0.0,...,,,,,,,,,,


In [26]:
output_df.max()[1:].max()

883.0

### Helper function

In [27]:
location_row = output_df.iloc[34]

In [28]:
def get_number_of_infections_per_timepoint_one_location(location_row):
    
    location_id = location_row[0]
    output_timesteps = int(location_row[1])
    
    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "n_infected"])

    timestep_col_id = 2 # col id for the current timestep

    for t in range(1,output_timesteps+1):
        # number of agents is in column after timestep
        timepoint = location_row[timestep_col_id]

        if np.isnan(timepoint):
            #print("break")
            break
        n_agents = int(location_row[timestep_col_id + 1])

        # timestep_col_id, # agents, agent_ot1, transmission_time
        agent_id = [timestep_col_id + 1 + 1 + i*2 for i in range(0,n_agents)]
        transmission_time_id = [timestep_col_id + 1 + 2 + i*2 for i in range(0,n_agents)]

        # timepoint n_agents n_infected
        df_res.loc[len(df_res),] = [location_id, timepoint, n_agents, sum([1 for i in location_row[transmission_time_id] if i>=0])]

        timestep_col_id += 2 + n_agents*2
    
    return df_res

In [29]:
get_number_of_infections_per_timepoint_one_location(output_df.iloc[1])

Unnamed: 0,LocationID,timepoint,n_agents,n_infected
0,0301,0.0,0,0
1,0301,1.0,0,0
2,0301,2.0,0,0
3,0301,3.0,0,0
4,0301,4.0,0,0
...,...,...,...,...
332,0301,332.0,30,0
333,0301,333.0,0,0
334,0301,334.0,3,0
335,0301,335.0,7,0


In [30]:
def get_number_of_infection_per_timepoint(output_df):
    
    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "n_infected"])
    
    for i in range(0,len(output_df)):
        location_row = output_df.iloc[i]
        df_res_row = get_number_of_infections_per_timepoint_one_location(location_row)
        df_res = pd.concat([df_res, df_res_row])
    
    return df_res

In [69]:
df_transformed = get_number_of_infection_per_timepoint(output_df)

In [70]:
df_transformed = df_transformed.astype({'n_agents': 'int', "n_infected": "int"})

# Evaluation

### Number of Infections in graveyard

In [72]:
df_transformed[df_transformed["LocationID"]=="1000"]

Unnamed: 0,LocationID,timepoint,n_agents,n_infected
0,1000,0.0,0,0
1,1000,1.0,0,0
2,1000,2.0,0,0
3,1000,3.0,0,0
4,1000,4.0,0,0
...,...,...,...,...
332,1000,332.0,1,0
333,1000,333.0,1,0
334,1000,334.0,1,0
335,1000,335.0,1,0


### Number of Infections per timepoint

In [73]:
n_days = 14

In [74]:
print("Number of infected people over all locations per hour:")
df_transformed.groupby("timepoint").sum()[['n_agents', 'n_infected']].loc[[i*24 for i in range(0,n_days)],:]

Number of infected people over all locations per hour:


Unnamed: 0_level_0,n_agents,n_infected
timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,884,8
24.0,884,5
48.0,884,5
72.0,884,5
96.0,884,6
120.0,884,2
144.0,884,1
168.0,884,2
192.0,884,1
216.0,884,1


### Nobody is in schools/... during unreasonable timepoints

In [75]:
# 1 -> School
# 2 -> Work
# 3 -> Social Event
# 4 -> Basic Shop

In [76]:
df_transformed["hour_of_day"] = df_transformed["timepoint"].apply(lambda x: int(x)%24)

In [77]:
df_transformed["Location_Type"] = df_transformed['LocationID'].apply(lambda x: x[0:2])

In [78]:
# school
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["01"])].copy()

In [79]:
len(df_sub["timepoint"].unique())

337

In [80]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,302,0
8,527,0
9,756,0


In [81]:
# work
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["02"])].copy()

len(df_sub["timepoint"].unique())

337

In [82]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,1748,11
8,3355,20
9,4993,20


In [83]:
# social event
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["03"])].copy()

len(df_sub["timepoint"].unique())

337

In [84]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,919,1
1,859,1
2,859,1
3,859,1
4,859,1
5,859,1
6,859,1
7,859,1
8,859,1
9,859,1


In [85]:
# basic shop
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["04"])].copy()

len(df_sub["timepoint"].unique())

337

In [86]:
df_sub.groupby("hour_of_day").sum()[["n_agents", "n_infected"]]

Unnamed: 0_level_0,n_agents,n_infected
hour_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,0,0
8,0,0
9,156,1


### Infections in hospital == infected severe?

In [48]:
# 5 -> Hospital

In [49]:
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["05"])].copy()

In [50]:
# calculate numer of infections in hospital
n_inf_hospital = df_sub.groupby("timepoint").sum()[['n_agents', 'n_infected']]

In [51]:
# subselect every 24h
n_inf_hospital.loc[[i*24 for i in range(0,n_days)],:]

Unnamed: 0_level_0,n_agents,n_infected
timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0,0
24.0,0,0
48.0,0,0
72.0,0,0
96.0,0,0
120.0,0,0
144.0,0,0
168.0,0,0
192.0,0,0
216.0,0,0


### Infections at ICU == infected critical?

In [52]:
# 6 -> Hospital
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["06"])].copy()

In [53]:
# calculate numer of infections in ICU
n_inf_ICU = df_sub.groupby("timepoint").sum()[['n_agents', 'n_infected']]

In [54]:
# subselect every 24h
n_inf_ICU.loc[[i*24 for i in range(0,n_days)],:]

Unnamed: 0_level_0,n_agents,n_infected
timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,0,0
24.0,0,0
48.0,0,0
72.0,0,0
96.0,0,0
120.0,0,0
144.0,0,0
168.0,0,0
192.0,0,0
216.0,0,0


### Persons at graveyard == death?

In [55]:
df_transformed["Location_Type"] = df_transformed['LocationID'].apply(lambda x: x[0:2])

In [56]:
# 10 -> n_graveyard
df_sub = df_transformed.loc[df_transformed["Location_Type"].isin(["10"])].copy()

# calculate numer of infections in ICU
n_graveyard = df_sub.groupby("timepoint").sum()[['n_agents']]

In [57]:
# subselect every 24h
n_graveyard.loc[[i*24 for i in range(0,n_days)],:]

Unnamed: 0_level_0,n_agents
timepoint,Unnamed: 1_level_1
0.0,0
24.0,1
48.0,1
72.0,1
96.0,1
120.0,1
144.0,1
168.0,1
192.0,1
216.0,1


## Time since transmission

In [58]:
def tidy_row(location_row):

    location_id = location_row[0]
    output_timesteps = location_row[1]

    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "agent_id", "time_since_transmission"])

    timestep_col_id = 2 # col id for the current timestep

    for t in range(1,output_timesteps+1):
        # number of agents is in column after timestep
        timepoint = location_row[timestep_col_id]

        if np.isnan(timepoint):
            #print("break")
            break
        n_agents = int(location_row[timestep_col_id + 1])

        # timestep_col_id, # agents, agent_ot1, transmission_time
        agent_ids = [timestep_col_id + 1 + 1 + i*2 for i in range(0,n_agents)]
        transmission_time_ids = [timestep_col_id + 1 + 2 + i*2 for i in range(0,n_agents)]

        for i in range(0,n_agents):
            agent_id = agent_ids[i]
            transmission_time_id = transmission_time_ids[i]
            df_res.loc[len(df_res),] = [location_id, timepoint, n_agents, location_row[agent_id], location_row[transmission_time_id]]

        timestep_col_id += 2 + n_agents*2
    return df_res

In [59]:
def tidy_output_df(output_df):
    
    df_res = pd.DataFrame(columns=["LocationID", "timepoint", "n_agents", "agent_id", "time_since_transmission"])
    
    for i in range(0,len(output_df)):
        location_row = output_df.iloc[i]
        df_res_row = tidy_row(location_row)
        df_res = pd.concat([df_res, df_res_row])
    
    return df_res

In [60]:
# takes ~3min
tidy_output = tidy_output_df(output_df)

### maximum timecourse value

In [61]:
tidy_output["time_since_transmission"].max()

107.0

In [62]:
tidy_output['time_since_transmission'].max()/24

4.458333333333333

In [63]:
tidy_output.sort_values(['agent_id','time_since_transmission'])

Unnamed: 0,LocationID,timepoint,n_agents,agent_id,time_since_transmission
90,0419,141.0,2,0.0,-1.0
119,0419,213.0,3,0.0,-1.0
142,0419,262.0,3,0.0,-1.0
171,0419,305.0,4,0.0,-1.0
51,0034,114.0,1,0.0,-1.0
...,...,...,...,...,...
1401,00498,332.0,2,883.0,-1.0
1406,00498,333.0,5,883.0,-1.0
1411,00498,334.0,5,883.0,-1.0
1416,00498,335.0,5,883.0,-1.0


In [64]:
tidy_output.loc[(tidy_output["time_since_transmission"]>0),].head()

Unnamed: 0,LocationID,timepoint,n_agents,agent_id,time_since_transmission
422,302,94.0,9,550.0,94.0
430,302,95.0,6,550.0,95.0
190,308,70.0,4,820.0,70.0
194,308,71.0,3,820.0,71.0
0,510,2.0,2,141.0,2.0


In [65]:
agent_id = 841.0
df_sub = tidy_output[tidy_output["agent_id"]==agent_id].sort_values("timepoint")
max_time = df_sub["time_since_transmission"].max()

In [66]:
max_time

107.0

In [67]:
df_sub.iloc[90:105]

Unnamed: 0,LocationID,timepoint,n_agents,agent_id,time_since_transmission
276,482,90.0,4,841.0,90.0
280,482,91.0,4,841.0,91.0
284,482,92.0,4,841.0,92.0
288,482,93.0,4,841.0,93.0
292,482,94.0,4,841.0,94.0
296,482,95.0,4,841.0,95.0
300,482,96.0,4,841.0,96.0
304,482,97.0,4,841.0,97.0
308,482,98.0,4,841.0,98.0
312,482,99.0,4,841.0,99.0
