#### load in modules

In [2]:
import os
import json
import numpy as np
import pandas as pd

#### read in metadta

In [70]:
# path to folder containing metadata from a given run
metadata_dir = './example_data/metadata'
# extract metadata for each agent (corresponding to a given policy and initial node)
# and seed (corresponding to a given simulated outbreak)
seed_agent_metadata = {}
for metadata_file in os.listdir(metadata_dir):
    # extract seed from filename
    seed = int(metadata_file.split('.')[0].split('seed')[1])
    metadata_path = os.path.join(metadata_dir, metadata_file)
    tmp_metadata = json.loads(open(metadata_path).read())
    seed_agent_metadata[seed] = { agent['id']: agent for agent in tmp_metadata['agents']}

#### read in true underlying infection states

In [71]:
# path to folder containing infection states from simulated outbreaks
simulated_outbreaks_dir = './example_data/outbreaks'
# extract simulated infection states for each seed (corresponding to a given simulated outbreak)
seed_infection_states = {}
for seed_dir in os.listdir(simulated_outbreaks_dir):
    # extract seed from directory name
    seed = int(seed_dir.split('seed')[1])
    with open(os.path.join(simulated_outbreaks_dir, seed_dir, 'infection_states.csv'), 'r') as f:
        infection_states = np.array([int(x) for x in f.read().split(',')])
    seed_infection_states[seed] = infection_states

#### read in observed infection states (this takes a while to run)

In [72]:
# path to folder containing observations by agents
observations_dir = './example_data/observations'
# extract observations for each seed (corresponding to a given simulated outbreak)
# and agent (corresponding to a given policy and initial node)
seed_agent_observed_nodes = {}
# data looks like this:
# seed_agent_observed_nodes = {
#     seed: {
#         agent_id: {
#             time: [observed_nodes]
#         }
#     }
# }
for obs_file in os.listdir(observations_dir):
    # extract seed from filename
    seed = int(obs_file.split('.')[0].split('seed')[1])
    agent_id = obs_file.split('.')[1]
    obs_path = os.path.join(observations_dir, obs_file)
    if seed in seed_agent_observed_nodes:
        seed_agent_observed_nodes[seed][agent_id] = {}
    else:
        seed_agent_observed_nodes[seed] = { agent_id: {} }
    with open(obs_path, 'r') as f:
        line_i = 0
        for line in f:
            if line_i > 0:
                seed_agent_observed_nodes[seed][agent_id][int(line.split(',')[0])] = np.where(
                    np.array(line.split(',')[1:]).astype(int) != -1)[0]
            line_i += 1

#### read in evaluation metrics (AUC)

In [73]:
# path to folder containing evaluation metrics
metrics_dir = './example_data/metric_aucs/'
# extract evaluation metrics for each seed (corresponding to a given simulated outbreak)
# and agent (corresponding to a given policy and initial node)
seed_agent_metrics = []
# data looks like this:
for metrics_file in os.listdir(metrics_dir):
    # extract seed from filename
    seed = int(metrics_file.split('.')[0].split('seed')[1])
    # extract agent-id from filename
    agent_id = metrics_file.split('.')[1]
    metrics_path = os.path.join(metrics_dir, metrics_file)
    tmp_metrics_df = pd.read_csv(metrics_path, sep=',')
    # add metadata
    tmp_metrics_df['seed'] = seed
    tmp_metrics_df['agent_id'] = agent_id
    tmp_metrics_df['initial_node'] = int(
        seed_agent_metadata[seed][agent_id]['passive_allocation_policy'].split('initial(')[1].split(')')[0])
    tmp_metrics_df['active_policy'] = seed_agent_metadata[seed][agent_id]['allocation_policy']

    # append to agents_dfs
    seed_agent_metrics.append(tmp_metrics_df)

# concatenate all agents_dfs
seed_agent_metrics_df = pd.concat(seed_agent_metrics)

#### calculate max. test budget (threshold)

In [74]:
# calculate proportion of infected nodes observed by agent at each time point
seed_agent_observed_infected_prop = {}
for seed, agent_observed_nodes in seed_agent_observed_nodes.items():
    seed_agent_observed_infected_prop[seed] = {}
    # calculate number of infected nodes in true infection states
    num_infected = np.sum(seed_infection_states[seed] == 1)
    for agent_id, observed_nodes in agent_observed_nodes.items():
        seed_agent_observed_infected_prop[seed][agent_id] = {}
        for t, observed_nodes_t in observed_nodes.items():
            num_observed_infected = np.sum(seed_infection_states[seed][observed_nodes_t] == 1)
            seed_agent_observed_infected_prop[seed][agent_id][t] = num_observed_infected / num_infected

In [75]:
# calculate tests needed for each agent to find all infected nodes
threshold = 1
seed_agent_tests_to_threshold = {}
for seed, agent_observed_infected_prop in seed_agent_observed_infected_prop.items():
    seed_agent_tests_to_threshold[seed] = {}
    for agent_id, observed_infected_prop in agent_observed_infected_prop.items():
        sorted_observed_infected_prop = {k: v for k, v in sorted(observed_infected_prop.items(), key=lambda item: item[0])}
        for t, prop in sorted_observed_infected_prop.items():
            if prop >= threshold:
                seed_agent_tests_to_threshold[seed][agent_id] = t
                break

#### export proecssed data

In [76]:
# export combined metadata to json
with open('./example_data/seed_agent_metadata.json', 'w+') as outfile:
    json.dump(seed_agent_metadata, outfile)

In [77]:
# export combined evaluation metrics (AUC) to csv
seed_agent_metrics_df.to_csv('./example_data/seed_agent_metrics.csv', index=False)

In [78]:
# export test budget needed by each agent to reach threshold to csv
with open('./example_data/tests_to_threshold.csv', 'w') as f:
    f.write('seed,agent_id,tests_to_threshold\n')
    for seed, agent_tests_to_threshold in seed_agent_tests_to_threshold.items():
        for agent_id, tests_to_threshold in agent_tests_to_threshold.items():
            f.write(f'{seed},{agent_id},{tests_to_threshold}\n')