In [1]:
#import packages

In [6]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# from analysis_scripts import analysis_functions
from matplotlib import pyplot as plt
import ast
from matplotlib.sankey import Sankey
#
# functions used
import pandas as pd
import numpy as np

In [7]:
# create functions to be used 

In [8]:

def calculate_number_of_dalys(filename):
    """A function that takes in a filepath and returns the DALYs in the simulation"""
    # Import data from output file as dataframe
    df = pd.read_csv(filename, sep='\t')
    # Check the relevant column is in the dataframe columns
    assert 'DALYs' in df.columns, 'This file did not record the DALYs caused by COVID in the population'
    # Only look at those who have had some health burden caused by COVID (no asymptomatic cases)
    df = df.loc[df['DALYs'] != '-']
    # Calculate the DALYs caused by COVID
    DALYs = df.DALYs.astype(float).sum()
    # return the result
    return DALYs


def calculate_number_of_yll_and_yld(filename):
    """A function that takes in a filepath and returns the DALYs in the simulation"""
    # Import data from output file as dataframe
    df = pd.read_csv(filename, sep='\t')
    # Check the relevant column is in the dataframe columns
    assert 'YLL' in df.columns, 'This file did not record the YLL caused by COVID in the population'
    assert 'YLD' in df.columns, 'This file did not record the YLD caused by COVID in the population'

    # Only look at those who have had some health burden caused by COVID (no asymptomatic cases)
    yll_df = df.loc[df['YLL'] != '-']
    yld_df = df.loc[df['YLD'] != '-']
    # Calculate the DALYs caused by COVID
    yll = yll_df.YLL.astype(float).sum()
    yld = yld_df.YLD.astype(float).sum()

    # return the result
    return yll, yld


def get_distributions_of_number_of_infections_per_source(filename, return_mean_std=False):
    """A function that takes in a filepath and distribution in the number of infections caused per person, if you want
    the average and std of a run, set return_mean_std to be true"""
    # Import data from output file as dataframe
    df = pd.read_csv(filename, sep='\t')
    # Add a counter column, which will be used to determine the number of infections caused by each host/infection
    df['counter'] = [1] * len(df)
    # Check that who gave the infection is recorded in the dataframe
    assert 'Source' in df.columns, 'This file did not record the Source of each infection'
    # Group by who gave out the infection and sum the results
    df = df.groupby(by='Source').sum()
    # Sort the number of infections caused by each host, from smallest to largest
    df = df.sort_values('counter')
    # Find the number of infections caused by each host and how many hosts caused each number of infections
    number_of_infections, counts = np.unique(df['counter'], return_counts=True)
    # Return the range of the number of infections caused by each host, and the corresponding number of hosts who caused
    # that number of infections
    mean = ''
    std = ''
    if return_mean_std:
        mean = df.counter.mean()
        std = df.counter.std()
    return (number_of_infections, counts) if not return_mean_std else number_of_infections, counts, mean, std


def get_final_infection_status(filename):
    """A function that takes in a filepath and distribution in the infection status of those with covid"""
    # Import data from output file as dataframe
    df = pd.read_csv(filename, sep='\t')
    # copy the output file so we can drop rows from it
    infectious_pop = df.copy()
    # Calculate the number of people who died from their infection
    who_died_from_covid = infectious_pop.loc[df['DiedAt'] != '-'].index
    n_died = len(who_died_from_covid)
    # remove those who died from covid from the infectious_pop df
    infectious_pop = infectious_pop.drop(index=who_died_from_covid)
    # Calculate the number of people who reached a critical condition
    who_reached_critical = infectious_pop.loc[df['CriticalAt'] != '-'].index
    n_critical = len(who_reached_critical)
    # remove those who reached critical covid from the infectious_pop df
    infectious_pop = infectious_pop.drop(index=who_reached_critical)
    # Calculate the number of people who had severe covid
    who_reached_severe = infectious_pop.loc[df['SevereAt'] != '-'].index
    n_severe = len(who_reached_severe)
    # remove those who reached severe from infectious_pop
    infectious_pop = infectious_pop.drop(index=who_reached_severe)
    # Calculate the number of people who had mild covid
    who_reached_mild = infectious_pop.loc[df['SymptomaticAt'] != '-'].index
    n_mild = len(who_reached_mild)
    #  remove those who had mild infection from infectious_pop
    infectious_pop = infectious_pop.drop(index=who_reached_mild)
    who_was_asymptomatic = infectious_pop.loc[df['SymptomaticAt'] == '-']
    # Calculate the proportion of infections that are asymptomatic
    n_asymptomatic = len(who_was_asymptomatic)
    # Create reference to COVID infection status
    infections_by_status = [n_asymptomatic, n_mild, n_severe, n_critical, n_died]
    distribution_of_status = list(np.divide(infections_by_status, sum(infections_by_status)))
    return distribution_of_status


def extract_time_in_each_stage(filename):
    """A function to get the average time spent in each infection state from the simulation"""
    # Import data from output file as dataframe
    df = pd.read_csv(filename, sep='\t')
    # Get those who are not asymptomatic
    filtered_pop = df.loc[df['SymptomaticAt'] != '-'].copy()
    # Drop initial infections from calculations
    filtered_pop = filtered_pop.loc[filtered_pop['Time'] != - 1]
    # Change data types of columns to int
    filtered_pop[['ContagiousAt', 'SymptomaticAt']] = filtered_pop[['ContagiousAt', 'SymptomaticAt']].astype(int)
    # Calculate incubation period
    filtered_pop['IncubationPeriod'] = filtered_pop['SymptomaticAt'] - filtered_pop['ContagiousAt']
    #  Check calculations haven't been messed up
    assert (filtered_pop['IncubationPeriod'] > 0).all()
    mean_incubation_period = filtered_pop['IncubationPeriod'].mean()
    std_incubation_period = filtered_pop['IncubationPeriod'].std()
    # get those who are severely ill
    filtered_pop = df.loc[df['SevereAt'] != '-'].copy()
    # Drop initial infections if any
    filtered_pop = filtered_pop.loc[filtered_pop['Time'] != - 1]
    # change data type to int
    filtered_pop[['SymptomaticAt', 'SevereAt']] = filtered_pop[['SymptomaticAt', 'SevereAt']].astype(int)
    # Calculate the interval between being symptomatic and severe
    filtered_pop['time_until_severe'] = filtered_pop['SevereAt'] - filtered_pop['SymptomaticAt']
    # Calculate mean and std until severe
    mean_time_until_severe = filtered_pop['time_until_severe'].mean()
    std_time_until_severe = filtered_pop['time_until_severe'].std()
    # Get those who are critically ill
    filtered_pop = df.loc[df['CriticalAt'] != '-'].copy()
    # Drop initial infections if any
    filtered_pop = filtered_pop.loc[filtered_pop['Time'] != - 1]
    # change data type to int
    filtered_pop[['SevereAt', 'CriticalAt']] = filtered_pop[['SevereAt', 'CriticalAt']].astype(int)
    # Calculate duration between being severely ill and critically ill
    filtered_pop['time_until_critical'] = filtered_pop['CriticalAt'] - filtered_pop['SevereAt']
    # Calculate mean and STD
    mean_time_until_critical = filtered_pop['time_until_critical'].mean()
    std_time_until_critical = filtered_pop['time_until_critical'].std()
    # Get those who died
    filtered_pop = df.loc[df['DiedAt'] != '-'].copy()
    # Drop initial infections if any
    filtered_pop = filtered_pop.loc[filtered_pop['Time'] != - 1]
    # change data type to int
    filtered_pop[['DiedAt', 'CriticalAt']] = filtered_pop[['DiedAt', 'CriticalAt']].astype(int)
    # Calculate time between being severly ill and dying
    filtered_pop['time_until_death'] = filtered_pop['DiedAt'] - filtered_pop['CriticalAt']
    # Calculate mean and std
    mean_time_until_death = filtered_pop['time_until_death'].mean()
    std_time_until_death = filtered_pop['time_until_death'].std()
    # mean transition periods
    mean_time_in_each_category = [mean_incubation_period, mean_time_until_severe, mean_time_until_critical,
                                  mean_time_until_death]
    std_time_in_each_category = [std_incubation_period, std_time_until_severe, std_time_until_critical,
                                 std_time_until_death]

    # return results
    return mean_time_in_each_category, std_time_in_each_category


def find_number_who_reached_each_stage_of_disease(filename):
    """A function which looks at the output of the model and works out which stage of the disease was reached"""
    df = pd.read_csv(filename, sep='\t')
    # Find who is contagious
    contagious_index = df.loc[df['ContagiousAt'] != '-'].index
    # Found how many people were contagious
    contagious_count = len(contagious_index)
    # Find who recovered from COVID at the contagious stage and how many
    recovered_at_contagious_index = df.loc[
        contagious_index.intersection(
            df.loc[df['SymptomaticAt'] == '-'].index
        )
    ].index
    recovered_at_contagious_count = len(recovered_at_contagious_index)
    # Find who developed symptomatic COVID and how many
    symptomatic_index = df.loc[df['SymptomaticAt'] != '-'].index
    symptomatic_count = len(symptomatic_index)
    # Find who recovered from COVID at the symptomatic stage and how many
    recovered_at_symptomatic_index = df.loc[
        symptomatic_index.intersection(
            df.loc[df['SevereAt'] == '-'].index
        )
    ].index
    recovered_at_symptomatic_count = len(recovered_at_symptomatic_index)
    # Find who developed severe COVID and how many recovered at this point
    severe_index = df.loc[df['SevereAt'] != '-'].index
    severe_count = len(severe_index)
    # Find who recovered from COVID at the symptomatic stage and how many
    recovered_at_severe_index = df.loc[
        severe_index.intersection(
            df.loc[df['CriticalAt'] == '-'].index
        )
    ].index
    recovered_at_severe_count = len(recovered_at_severe_index)
    # Find who developed critical COVID and how many recovered at this point
    critical_index = df.loc[df['CriticalAt'] != '-'].index
    critical_count = len(critical_index)
    # Find who recovered from COVID at the symptomatic stage and how many
    recovered_at_critical_index = df.loc[
        critical_index.intersection(
            df.loc[df['DiedAt'] == '-'].index
        )
    ].index
    recovered_at_critical_count = len(recovered_at_critical_index)
    n_died = critical_count - recovered_at_critical_count

    counts_at_each_stage = pd.Series(data=[contagious_count, recovered_at_contagious_count, symptomatic_count,
                                           recovered_at_symptomatic_count, severe_count, recovered_at_severe_count,
                                           critical_count, recovered_at_critical_count, n_died],
                                     index=['n_contagious', 'n_recovered_at_contagious', 'n_symptomatic',
                                            'n_recovered_at_symptomatic', 'n_severe', 'n_recovered_at_severe',
                                            'n_critical', 'n_recovered_at_critical', 'n_died'])
    return counts_at_each_stage


def get_cum_sum_cases_deaths(filename):
    simulation_data_over_time = pd.read_csv(filename, sep="\t")
    # Group the district level information by time
    simulation_data_over_time = simulation_data_over_time.groupby('time').sum()
    # Total the number of both asymptomatic and symptomatic cases
    simulation_data_over_time['new_cases'] = simulation_data_over_time['metric_new_cases_asympt'] + \
                                             simulation_data_over_time['metric_new_cases_sympt']
    # Calculate the cumulative sum of cases
    simulation_data_over_time['new_cases_cumulative'] = simulation_data_over_time['new_cases'].cumsum()
    # return the cumulative sum of cases and deaths
    return simulation_data_over_time['new_cases_cumulative'].tolist(), \
           simulation_data_over_time['metric_died_count'].tolist(), simulation_data_over_time['new_cases'].tolist()


def get_cases_and_deaths_over_time(filename):
    # read in data from file
    simulation_data_over_time = pd.read_csv(filename, sep="\t")
    # Calculate total new cases
    simulation_data_over_time['new_cases'] = simulation_data_over_time['metric_new_cases_asympt'] + \
                                             simulation_data_over_time['metric_new_cases_sympt']
    simulation_data_over_time = simulation_data_over_time.groupby('time').sum()
    # Return cases and deaths
    return simulation_data_over_time['new_cases'].tolist(), simulation_data_over_time['metric_new_deaths'].tolist()


def extract_number_of_districts_with_cases_over_time(filename):
    data = pd.read_csv(filename, sep='\t')
    # calculate number of cases at each time point
    sim_time = data.time.unique()
    results = pd.DataFrame(index=sim_time)
    n_districts_with_covid = []
    districts_with_covid = []
    for t in data.time.unique():
        df = data.loc[data['time'] == t]
        districts_with_asymptomatic_cases = list(df.loc[df['metric_new_cases_asympt'] > 0, 'myId'].unique())
        districts_with_symptomatic_cases = list(df.loc[df['metric_new_cases_sympt'] > 0, 'myId'].unique())
        districts_with_covid_at_t = districts_with_symptomatic_cases + districts_with_asymptomatic_cases
        districts_with_covid_at_t = list(np.unique(districts_with_covid_at_t))
        n_districts_with_covid_at_t = len(districts_with_covid_at_t)
        n_districts_with_covid.append(n_districts_with_covid_at_t)
        districts_with_covid.append(districts_with_covid_at_t)
    results['n_districts_with_covid'] = n_districts_with_covid
    results['districts_with_covid'] = districts_with_covid
    results['cumulative_districts'] = results['districts_with_covid'].cumsum()
    cumulative_districts_no_duplicates = []
    for district_list in results['cumulative_districts']:
        cumulative_districts_no_duplicates.append(list(np.unique(district_list)))
    results['cumulative_districts'] = cumulative_districts_no_duplicates
    return results



In [9]:

mid_district_start_points = ['_d29_', '_d39_', '_d41_', '_d42_', '_d51_']

mid_directory = os.fsencode(
    '/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_input/model_output/mobility_scenarios/midRangeCities'
)

for start_point in mid_district_start_points:
    data = pd.DataFrame()
    run_count = 0
    for idx, file in enumerate(os.listdir(mid_directory)):
        filename = os.fsdecode(file)
        filepath = mid_directory.decode() + filename
        if ("infect" not in filename) and (start_point in filename):
            data_intro = pd.read_csv(filepath, delimiter='\t')
            if run_count == 0:
                orig_index = pd.DataFrame()
                orig_index['time'] = data_intro['time']
                orig_index['myId'] = data_intro['myId']
            else:
                assert list(orig_index.time) == list(data_intro.time)
                assert list(orig_index.myId) == list(data_intro.myId)

            run_count += 1

            data = pd.concat([data, data_intro]).groupby(level=0).mean()
            if run_count == 30:
                data['myId'] = orig_index['myId']
            data.to_csv(f"/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_input/model_output/mobility_scenarios/midRangeCities"
                        f"start_at{start_point}output.csv")


mid_directory = os.fsencode(
    '/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_input/model_output/mobility_scenarios/midRangeCities'
)

mid_results_df = pd.DataFrame()
mid_results_df.index = ['DALYs', 'YLL', 'YLD', 'mean_infected_per_host', 'dist_inf_status', 'cumulative_cases',
                        'n_cases', 'up', 'low', 'cumulative_deaths', 'n_deaths']

for start_point in mid_district_start_points:
    dalys_per_run = []
    yld_per_run = []
    yll_per_run = []
    n_infected_per_host = []
    countinfected_per_host = []
    mean_n_infected_per_host = []
    distribution_of_infection_status = []
    mean_time_spent_in_each_status = []
    std_time_spent_in_each_status = []
    disease_progression_df = pd.DataFrame()
    cumsum_cases = []
    cumsum_deaths = []
    daily_cases = []
    daily_deaths = []
    n_cases = []
    for idx, file in enumerate(os.listdir(mid_directory)):
        filename = os.fsdecode(file)
        filepath = mid_directory.decode() + filename

        if start_point in filename:
            if 'infect' in filename:
                dalys_per_run.append(calculate_number_of_dalys(filepath))
                yll_per_run.append(calculate_number_of_yll_and_yld(filepath)[0])
                yld_per_run.append(calculate_number_of_yll_and_yld(filepath)[1])

                n_infected_per_host.append(
                    get_distributions_of_number_of_infections_per_source(filepath,
                                                                                            return_mean_std=True)[0]
                )
                countinfected_per_host.append(
                    get_distributions_of_number_of_infections_per_source(filepath,
                                                                                            return_mean_std=True)[1]
                )
                mean_n_infected_per_host.append(
                    get_distributions_of_number_of_infections_per_source(filepath,
                                                                                            return_mean_std=True)[2]
                )
                distribution_of_infection_status.append(get_final_infection_status(filepath))
                mean_time_spent_in_each_status.append(extract_time_in_each_stage(filepath)[0])
                std_time_spent_in_each_status.append(extract_time_in_each_stage(filepath)[1])
                disease_progression_df[idx] = find_number_who_reached_each_stage_of_disease(filepath)
            else:
                cumsum_cases.append(get_cum_sum_cases_deaths(filepath)[0])
                cumsum_deaths.append(get_cum_sum_cases_deaths(filepath)[1])
                n_cases.append(get_cum_sum_cases_deaths(filepath)[2])
                daily_cases.append(get_cases_and_deaths_over_time(filepath)[0])
                daily_deaths.append(get_cases_and_deaths_over_time(filepath)[1])
    mid_results_df[start_point] = [np.mean(dalys_per_run), np.mean(yll_per_run), np.mean(yld_per_run),
                                   np.mean(mean_n_infected_per_host),
                                   [np.mean(i) for i in zip(*distribution_of_infection_status)],
                                   [np.mean(j) for i in zip(*cumsum_cases) for j in i],
                                   [np.mean(i) for i in zip(*n_cases)],
                                   [np.mean(i) + 1.96 * np.std(i) for i in zip(*n_cases)],
                                   [np.mean(i) - 1.96 * np.std(i) for i in zip(*n_cases)],
                                   [np.mean(j) for i in zip(*cumsum_deaths)for j in i],
                                   [np.mean(i) for i in zip(*daily_deaths)],
                                   ]

mid_results_df = pd.DataFrame()
mid_results_df.index = ['DALYs', 'YLL', 'YLD', 'mean_infected_per_host', 'dist_inf_status', 'cumulative_cases',
                        'n_cases', 'up', 'low', 'cumulative_deaths', 'n_deaths']
mid_district_start_points = ['_d2_', '_d18_', '_d23_', '_d26_', '_d31_']
for start_point in mid_district_start_points:
    dalys_per_run = []
    yll_per_run = []
    yld_per_run = []
    n_infected_per_host = []
    countinfected_per_host = []
    mean_n_infected_per_host = []
    distribution_of_infection_status = []
    mean_time_spent_in_each_status = []
    std_time_spent_in_each_status = []
    disease_progression_df = pd.DataFrame()
    cumsum_cases = []
    cumsum_deaths = []
    daily_cases = []
    daily_deaths = []
    n_cases = []
    for idx, file in enumerate(os.listdir(mid_directory)):
        filename = os.fsdecode(file)
        filepath = mid_directory.decode() + filename

        if start_point in filename:
            if filename.startswith("infections"):
                dalys_per_run.append(calculate_number_of_dalys(filepath))
                yll_per_run.append(calculate_number_of_yll_and_yld(filepath)[0])
                yld_per_run.append(calculate_number_of_yll_and_yld(filepath)[1])
                n_infected_per_host.append(
                    get_distributions_of_number_of_infections_per_source(filepath,
                                                                                            return_mean_std=True)[0]
                )
                countinfected_per_host.append(
                    get_distributions_of_number_of_infections_per_source(filepath,
                                                                                            return_mean_std=True)[1]
                )
                mean_n_infected_per_host.append(
                    get_distributions_of_number_of_infections_per_source(filepath,
                                                                                            return_mean_std=True)[2]
                )
                distribution_of_infection_status.append(get_final_infection_status(filepath))
                mean_time_spent_in_each_status.append(extract_time_in_each_stage(filepath)[0])
                std_time_spent_in_each_status.append(extract_time_in_each_stage(filepath)[1])
                disease_progression_df[idx] = find_number_who_reached_each_stage_of_disease(filepath)
            if filename.startswith("district_start_point"):
                cumsum_cases.append(get_cum_sum_cases_deaths(filepath)[0])
                cumsum_deaths.append(get_cum_sum_cases_deaths(filepath)[1])
                n_cases.append(get_cum_sum_cases_deaths(filepath)[2])
                daily_cases.append(get_cases_and_deaths_over_time(filepath)[0])
                daily_deaths.append(get_cases_and_deaths_over_time(filepath)[1])
    mid_results_df[start_point] = [np.mean(dalys_per_run), np.mean(yll_per_run), np.mean(yld_per_run),
                                   np.mean(mean_n_infected_per_host),
                                   [np.mean(i) for i in zip(*distribution_of_infection_status)],
                                   [np.mean(j) for i in zip(*cumsum_cases) for j in i],
                                   [np.mean(i) for i in zip(*n_cases)],
                                   [np.mean(i) + 1.96 * np.std(i) for i in zip(*n_cases)],
                                   [np.mean(i) - 1.96 * np.std(i) for i in zip(*n_cases)],
                                   [np.mean(j) for i in zip(*cumsum_deaths)for j in i],
                                   [np.mean(i) for i in zip(*daily_deaths)],
                                   ]
mid_results_df.to_csv("/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_input/model_output/mobility_scenarios/midRangeCities/mid_run_results.csv")
outbound_rankings = pd.read_csv("/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/preprocessed/mobility/New Files/"
                                "Most mobile districts i5.csv")
matched_names = ['d_2', 'd_18', 'd_23', 'd_26', 'd_31']
ranking_mid = [int(outbound_rankings.loc[outbound_rankings['outbound_district'] == name, 'order'].values) + 1 for
               name in matched_names]


flip_rank = [61 - i for i in ranking_mid]




ValueError: Length mismatch: Expected axis has 0 elements, new values have 11 elements

In [None]:
# create plots 

In [None]:

mid_results_df = pd.read_csv("/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_input/model_output/mobility_scenarios/midRangeCities/mid_run_results.csv")
mid_results_df.index = mid_results_df['Unnamed: 0']
mid_results_df = mid_results_df.drop('Unnamed: 0', axis=1)
mid_color = {'1': 'darkred',
             '2': 'maroon',
             '3': 'firebrick',
             '4': 'brown',
             '5': 'indianred'}
mid_starting_districts = ''

for column in mid_results_df.columns:
    plt.plot(np.arange(len(ast.literal_eval(mid_results_df.loc['n_cases', column]))),
             np.cumsum(ast.literal_eval(mid_results_df.loc['n_cases', column])),
             label=column[1:-1], color=mid_color[str(mid_results_df.loc['ranking', column])])
    mid_starting_districts += column

plt.legend()
plt.xlabel('Days')
plt.ylabel('Cumulative number of cases')
plt.yscale('log')

plt.title('The cumulative number of cases for epidemics starting in different districts')
plt.savefig(f"/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_output/mobility/mid_range/"
            f"cumulative_cases_per_district_start_{mid_starting_districts}_batch_"
            f"{30}_runs_log.png",
            bbox_inches='tight')
plt.clf()

for column in mid_results_df.columns:
    plt.plot(np.arange(len(ast.literal_eval(mid_results_df.loc['n_cases', column]))),
             ast.literal_eval(mid_results_df.loc['n_cases', column]),
             label=column[1:-1], color=mid_color[str(mid_results_df.loc['ranking', column])])
plt.legend()
plt.xlabel('Days')
plt.ylabel('Number of cases')
plt.yscale('log')

plt.title('The number of cases for epidemics starting in different districts')

plt.savefig(f"/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_output/mobility/mid_range/"
            f"av_num_cases_per_district_start{mid_starting_districts}_batch_"
            f"{30}_runs_log.png",
            bbox_inches='tight')

plt.clf()
dalys = list(mid_results_df.loc['DALYs'].values.astype(float)) 
district_names = list(mid_results_df.columns)
colors = list(mid_color.values())
plt.bar(np.arange(len(dalys)), dalys, color=colors)
plt.xticks(np.arange(len(dalys)), district_names)
plt.ylabel('DALYs (logscale)')
plt.yscale('log')
plt.title('DALYs per start region')
plt.savefig(f"/Users/sophieayling/Documents/GitHub/Disease-Modelling-SSA/data/map_output/mobility/mid_range/"
            f"av_DALYs_cases_per_district_start{mid_starting_districts}_batch_"
            f"{30}_runs_logscale.png",
            bbox_inches='tight')
plt.clf()


In [None]:
# not yet edited for mid 

In [None]:
red_max_to_mid = np.linspace(256, 256 / 2 + 10, len(max_color.keys()))
red_upper_scale = [[i / 256, 0, 0] for i in red_max_to_mid]
red_mid_to_min = np.linspace(256 / 2, 100, len(max_color.keys()))
red_lower_scale = [[i / 256, 0, 0] for i in red_mid_to_min]
blue_max_to_mid = np.linspace(256, 256 / 2 + 10, len(max_color.keys()))
blue_upper_scale = [[0, 0, i / 256] for i in blue_max_to_mid]
blue_mid_to_min = np.linspace(256 / 2, 100, len(max_color.keys()))
blue_lower_scale = [[0, 0, i / 256] for i in blue_mid_to_min]

yll_data = list(max_results_df.loc['YLL'].values.astype(float)) + list(min_results_df.loc['YLL'].values.astype(float))
yll_colors = red_upper_scale + blue_upper_scale
yld_data = list(max_results_df.loc['YLD'].values.astype(float)) + list(min_results_df.loc['YLD'].values.astype(float))
yld_colors = red_lower_scale + blue_lower_scale
plt.subplot(2, 1, 1)
plt.bar(np.arange(len(max_results_df.loc['YLL'].values.astype(float))),
        list(max_results_df.loc['YLL'].values.astype(float)),
        color=red_lower_scale[0], label='YLL')
plt.bar(np.arange(len(max_results_df.loc['YLD'].values.astype(float))),
        list(max_results_df.loc['YLD'].values.astype(float)),
        color=red_upper_scale[1], label='YLD', bottom=list(max_results_df.loc['YLL'].values.astype(float)))
plt.legend()
plt.xticks(np.arange(len(max_district_start_points)), max_district_start_points)
plt.ylabel('DALYs')
plt.title('Most mobile')
plt.subplot(2, 1, 2)
plt.bar(np.arange(len(min_results_df.loc['YLL'].values.astype(float))),
        list(min_results_df.loc['YLL'].values.astype(float)),
        color=blue_lower_scale[1], label='YLL')
plt.bar(np.arange(len(min_results_df.loc['YLD'].values.astype(float))),
        list(min_results_df.loc['YLD'].values.astype(float)),
        color=blue_upper_scale[1], label='YLD', bottom=list(min_results_df.loc['YLL'].values.astype(float)))
plt.legend()
plt.xticks(np.arange(len(min_district_start_points)), min_district_start_points)
plt.ylabel('DALYs')
plt.title('Least mobile')
plt.show()
plt.bar(np.arange(len(yld_data)), yld_data, color=yld_colors, bottom=yll_data, label='YLD')
plt.yscale('log')
plt.show()
for column in max_results_df.columns:
    plt.plot(np.arange(len(ast.literal_eval(max_results_df.loc['n_deaths', column]))),
             ast.literal_eval(max_results_df.loc['n_deaths', column]),
             label=column[1:-1], color=max_color[str(max_results_df.loc['ranking', column])])
for column in min_results_df.columns:
    plt.plot(np.arange(len(ast.literal_eval(min_results_df.loc['n_deaths', column]))),
             ast.literal_eval(min_results_df.loc['n_deaths', column]),
             label=column[1:-1], color=min_color[str(min_results_df.loc['ranking', column])])

plt.legend()
plt.xlabel('Days')
plt.ylabel('Number of deaths')
plt.title('The number of deaths for epidemics starting in different districts')
plt.yscale('log')

plt.savefig(f"/Users/robbiework/PycharmProjects/spacialEpidemiologyAnalysis/plots/"
            f"av_num_deaths_per_district_start{max_starting_districts + min_starting_districts}_batch_"
            f"{30}_runs_log.png",
            bbox_inches='tight')
plt.clf()
print('hi')

plt.subplot(2, 1, 1)
for column in max_results_df.columns:
    plt.plot(np.arange(len(ast.literal_eval(max_results_df.loc['n_cases', column]))),
             np.cumsum(ast.literal_eval(max_results_df.loc['n_cases', column])),
             label=column[1:-1], color=max_color[str(max_results_df.loc['ranking', column])])
plt.legend()
plt.xlabel('Days')
plt.ylabel('Cumulative number of cases')
plt.subplot(2, 1, 2)
for column in min_results_df.columns:
    plt.plot(np.arange(len(ast.literal_eval(min_results_df.loc['n_cases', column]))),
             np.cumsum(ast.literal_eval(min_results_df.loc['n_cases', column])),
             label=column[1:-1], color=min_color[str(min_results_df.loc['ranking', column])])

plt.legend()
plt.xlabel('Days')
plt.ylabel('Cumulative number of cases')
plt.suptitle('The cumulative number of cases in the\nmost and least mobile districts respectively')
plt.subplots_adjust(hspace=0.6)
plt.savefig(f"/Users/robbiework/PycharmProjects/spacialEpidemiologyAnalysis/plots/"
            f"cumulative_cases_per_district_start_{max_starting_districts + min_starting_districts}_batch_"
            f"{30}_runs_log.png",
            bbox_inches='tight')

plt.clf()