# Microsim Analysis

Does some analysis of the outputs from the [microsim_model.py](./microsim_model.py).

## Initialisation

In [1]:
# Import libraries
import pandas as pd
import pickle
from typing import List
import os
import glob
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm  # For a progress bar

#sys.path.append("microsim")
#from microsim.microsim_model import Microsim


# Set directories 
base_dir = str(Path(os.getcwd()).parent)  # Get main RAMP-UA directory (should be parent to this file)
data_dir = os.path.join(base_dir, "data") 
results_dir = os.path.join(data_dir, "output")

if base_dir.split("/")[-1] != "RAMP-UA":
    raise Exception(f"The base directory should point to the main 'RAMP-UA' directory,"
                    f"but it points to {base_dir}")

## Read data

Each time the model is run it outputs data into a new sub-directory under [../data/output/](../data/output) numbered incrementally. E.g.:
 - `data/output/0`
 - `data/output/1`
 - `...` 
 
The following variables specify which directories to read the data from

In [2]:
START_DIRECTORY = 0
END_DIRECTORY = 9

Now read the data:

In [3]:
def read_data(results_dir, start_directory=None, end_directory=None):
    """
    Read all the available output files stored in subdirectories of `results_dir`. Results directories
    should be numbered incrementally.
    Optionally provide `start_directory` and `end_directory` to only select results directories within
    those.
    
    :param results_dir: The directory to look in for results
    :param start_directory: Optinal directory to start reading from
    :param end_directory: Optinal directory to start reading to
    :return: a dictionary with the results. Structured as follows:
      dict:
        -> 4  (the name of the model results subdirectory)
          ->  Retail (dataframe showing locations for the activity)
          ->  SecondarySchool
          ->  Individuals
          ->  PrimarySchool
          ->  Work
          ->  Home
        -> 9 
          -> ...
    """
    # Get all the subdirectories in the results directory
    all_dirs = [ d for d in glob.glob(os.path.join(results_dir,"*")) if os.path.isdir(d)]
    # Now get those inbetween start and end (inclusive)
    selected_dirs = []
    if start_directory is None and end_directory is None:
        selected_dirs = all_dirs
    else:
        for d in all_dirs:
            if start_directory <= int(d.split('/')[-1]) <= end_directory:
                selected_dirs.append(d)
    print("Reading results from directories:\n", "\n".join(selected_dirs), flush=True)
                
    # Read the results in each of those directories. Use a dict. Each item will be another dict that 
    # stores the results of one model run (i.e. one subdiectory).
    results_dict = dict()
    for d in tqdm(selected_dirs, desc="Reading directories"):
        model_name = d.split('/')[-1]  # Name of the model run (e.g. '2')
        model_results = dict()  # results for this model run
        for filename in glob.glob(os.path.join(d,"*")):
            #print(filename)
            if filename.split("/")[-1] == "m0.pickle": # Special case: whole model object is stored
                # Not actually reading the full model yet
                pass
            elif filename.endswith(".pickle"):  # In all other cases just dataframes are stored
                with open(filename, 'rb') as f:
                    df_name = filename.split("/")[-1].replace(".pickle","")  # Name of the file (e.g. 'Work')
                    df = pickle.load(f)  # The dataframe stored in the file
                    model_results[df_name] = df
            else: # Other files (e.g. csv files) can be ignored
                pass
        
        # Store this model in the main dictionary
        results_dict[model_name] = model_results
    
    assert len(results_dict) == len(selected_dirs)
    return results_dict     

In [4]:
res = read_data(results_dir, 3, 9)
print(f"Read results from {len(res)} models")

Reading results from directories:
 /Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/9
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/7
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/6
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/8
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/4
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/3
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/5


Reading directories: 100%|██████████| 7/7 [00:12<00:00,  1.81s/it]

Read results from 7 models





## Disease status

A graph of disease statuses over time ...

_Just working with one model at the moment, in future should work with many results_

In [6]:
# Just get the first result
m = res[next(iter(res.keys()))]

individuals = m['Individuals'] # Dataframe of the individuals
individuals



Unnamed: 0,ID,area,_hid,_pid,hhnssec,hpnssec5,soc4,sic,sex,hhref,...,disease_status031,disease_status032,disease_status033,disease_status034,disease_status035,disease_status036,disease_status037,disease_status038,disease_status039,disease_status040
0,0,E02004129,7,64,4,Small employers and own account workers,0,0,1,0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
1,1,E02004129,7,6004,4,Small employers and own account workers,61,67,0,1,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
2,2,E02004129,46,1408,5,Lower supervisory and technical occupations,92,13,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,E02004129,46,4108,5,Lower supervisory and technical occupations,22,58,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,E02004129,51,496,2,Managerial and professional occupations,0,0,1,0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
694058,818209,E02004235,26389,55181,5,Lower supervisory and technical occupations,12,50,0,0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
694059,818210,E02004235,26393,53730,7,Semi-routine occupations,71,43,0,1,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
694060,818211,E02004235,26404,49789,6,Semi-routine occupations,71,41,1,0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
694061,818212,E02004235,26404,51881,6,Semi-routine occupations,52,4,1,1,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0


## Locations

Some analysis/visualisation of the locations ... 

In [7]:
retail = m['Retail']
homes = m['Home']

Where an individual goes *shopping*:

In [8]:
_id = 2
individuals.loc[_id]

shops_they_visit = retail.loc[retail.ID.isin(list(individuals.loc[_id, "Retail_Venues"]))]
shops_they_visit[:,]

Unnamed: 0,ID,Danger0,Danger001,Danger002,Danger003,Danger004,Danger005,Danger006,Danger007,Danger008,...,Danger031,Danger032,Danger033,Danger034,Danger035,Danger036,Danger037,Danger038,Danger039,Danger040
10,11,0,0,0.00125,0.005694,0.005694,0.005778,0.006611,0.023799,1.592487,...,4.27692,0.832784,0.155635,0.024915,0.003841,0.000497,7.1e-05,7e-06,1.1674e-06,9.97e-08
11,12,0,0,0.045622,0.091733,0.093035,0.093899,0.102545,0.252926,26.711506,...,13.461897,2.612263,0.462677,0.073744,0.010995,0.001599,0.00026,2.7e-05,4.6483e-06,3.418e-07
15,16,0,0,0.000472,0.000472,0.005854,0.005854,0.005854,0.017833,0.053771,...,5.306356,1.119799,0.206263,0.040349,0.006383,0.000886,9.3e-05,1.3e-05,1.1283e-06,7.32e-08
17,18,0,0,0.0005,0.0005,0.006056,0.006056,0.006056,0.018035,0.053972,...,5.356015,1.130106,0.208199,0.040686,0.006439,0.000894,9.4e-05,1.3e-05,1.1554e-06,7.57e-08
18,19,0,0,0.014028,0.017917,0.017917,0.01799,0.018719,0.031787,3.44622,...,3.427323,0.670842,0.119354,0.019132,0.003009,0.000497,7.5e-05,8e-06,1.052e-06,1.744e-07
19,20,0,0,0.00191,0.00191,0.00191,0.00191,0.00191,0.00191,0.003162,...,0.559113,0.109435,0.019596,0.003135,0.000528,9.2e-05,1.3e-05,1e-06,1.418e-07,2.92e-08
20,21,0,0,0.012476,0.015476,0.015476,0.015532,0.016094,0.026694,2.61157,...,3.591676,0.705566,0.125732,0.020502,0.003253,0.000538,7.7e-05,9e-06,1.0462e-06,1.669e-07
21,22,0,0,0.011632,0.011632,0.011632,0.011632,0.011632,0.013221,1.558513,...,3.141558,0.619179,0.110852,0.017823,0.002838,0.000475,6.9e-05,8e-06,8.499e-07,1.669e-07
22,23,0,0,0.049375,0.062486,0.068746,0.068992,0.07145,0.116633,11.460939,...,13.900124,2.752883,0.498196,0.08216,0.013019,0.002032,0.000294,3.3e-05,4.3302e-06,6.166e-07
23,24,0,0,0.046858,0.059302,0.065762,0.065995,0.068328,0.111014,10.421245,...,13.671179,2.701605,0.488873,0.080711,0.012823,0.001997,0.000286,3.2e-05,4.2619e-06,6.28e-07


Where an individual lives:

In [77]:
home = homes.loc[homes.ID.isin(list(individuals.loc[_id, "Home_Venues"]))]
home

Unnamed: 0,ID,Danger0,Danger001
1,1,0,0


Who else lives there

In [78]:
home_id = home.ID.values[0]
individuals.loc[individuals.Home_Venues.apply(lambda x: home_id in x ) ]

Unnamed: 0,ID,area,_hid,_pid,hhnssec,hpnssec5,soc4,sic,sex,hhref,...,Work_Flows,Work_Duration,disease_status,current_risk,MSOA_Cases,HID_Cases,presymp_days,symp_days,disease_status000,disease_status001
2,2,E02004129,46,1408,5,Lower supervisory and technical occupations,92,13,1,1,...,[1.0],0.0,0,0,0,0,-1,-1,0,0.0
3,3,E02004129,46,4108,5,Lower supervisory and technical occupations,22,58,0,0,...,[1.0],0.236111,0,0,0,0,-1,-1,0,0.0
