# Microsim Analysis

Does some analysis of the outputs from the [microsim_model.py](./microsim_model.py).

## Initialisation

In [4]:
# Import libraries
import pandas as pd
import pickle
from typing import List
import os
import glob
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm  # For a progress bar

#sys.path.append("microsim")
#from microsim.microsim_model import Microsim


# Set directories 
base_dir = str(Path(os.getcwd()).parent)  # Get main RAMP-UA directory (should be parent to this file)
data_dir = os.path.join(base_dir, "data") 
results_dir = os.path.join(data_dir, "output")

if base_dir.split("/")[-1] != "RAMP-UA":
    raise Exception(f"The base directory should point to the main 'RAMP-UA' directory,"
                    f"but it points to {base_dir}")

## Read data

Each time the model is run it outputs data into a new sub-directory under [../data/output/](../data/output) numbered incrementally. E.g.:
 - `data/output/0`
 - `data/output/1`
 - `...` 
 
The following variables specify which directories to read the data from

In [5]:
START_DIRECTORY = 0
END_DIRECTORY = 9

Now read the data:

In [24]:
def read_data(results_dir, start_directory=None, end_directory=None):
    """
    Read all the available output files stored in subdirectories of `results_dir`. Results directories
    should be numbered incrementally.
    Optionally provide `start_directory` and `end_directory` to only select results directories within
    those.
    
    :param results_dir: The directory to look in for results
    :param start_directory: Optinal directory to start reading from
    :param end_directory: Optinal directory to start reading to
    :return: a dictionary with the results. Structured as follows:
      dict:
        -> 4  (the name of the model results subdirectory)
          ->  Retail (dataframe showing locations for the activity)
          ->  SecondarySchool
          ->  Individuals
          ->  PrimarySchool
          ->  Work
          ->  Home
        -> 9 
          -> ...
    """
    # Get all the subdirectories in the results directory
    all_dirs = [ d for d in glob.glob(os.path.join(results_dir,"*")) if os.path.isdir(d)]
    # Now get those inbetween start and end (inclusive)
    selected_dirs = []
    if start_directory is None and end_directory is None:
        selected_dirs = all_dirs
    else:
        for d in all_dirs:
            if start_directory <= int(d.split('/')[-1]) <= end_directory:
                selected_dirs.append(d)
    print("Reading results from directories:\n", "\n".join(selected_dirs), flush=True)
                
    # Read the results in each of those directories. Use a dict. Each item will be another dict that 
    # stores the results of one model run (i.e. one subdiectory).
    results_dict = dict()
    for d in tqdm(selected_dirs, desc="Reading directories"):
        model_name = d.split('/')[-1]  # Name of the model run (e.g. '2')
        model_results = dict()  # results for this model run
        for filename in glob.glob(os.path.join(d,"*")):
            #print(filename)
            if filename.split("/")[-1] == "m0.pickle": # Special case: whole model object is stored
                # Not actually reading the full model yet
                pass
            elif filename.endswith(".pickle"):  # In all other cases just dataframes are stored
                with open(filename, 'rb') as f:
                    df_name = filename.split("/")[-1].replace(".pickle","")  # Name of the file (e.g. 'Work')
                    df = pickle.load(f)  # The dataframe stored in the file
                    model_results[df_name] = df
            else: # Other files (e.g. csv files) can be ignored
                pass
        
        # Store this model in the main dictionary
        results_dict[model_name] = model_results
    
    assert len(results_dict) == len(selected_dirs)
    return results_dict     

In [26]:
res = read_data(results_dir, 3, 9)
print(f"Read results from {len(res)} models")

Reading results from directories:
 /Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/9
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/7
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/6
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/8
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/4
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/3
/Users/nick/research_not_syncd/git_projects/RAMP-UA/data/output/5


Reading directories: 100%|██████████| 7/7 [00:08<00:00,  1.25s/it]


Read results from 7 models


## Disease status

A graph of disease statuses over time ...

_Just working with one model at the moment, in future should work with many results_

In [29]:
# Just get the first result
m = res[next(iter(res.keys()))]

individuals = m['Individuals'] # Dataframe of the individuals




## Locations

Some analysis/visualisation of the locations ... 

In [52]:
retail = m['Retail']
homes = m['Home']

Where an individual goes *shopping*:

In [76]:
_id = 2
individuals.loc[_id]

shops_they_visit = retail.loc[retail.ID.isin(list(individuals.loc[_id, "Retail_Venues"]))]
shops_they_visit

Unnamed: 0,ID,Danger0,Danger001
10,11,0,0
11,12,0,0
15,16,0,0
17,18,0,0
18,19,0,0
19,20,0,0
20,21,0,0
21,22,0,0
22,23,0,0
23,24,0,0


Where an individual lives:

In [77]:
home = homes.loc[homes.ID.isin(list(individuals.loc[_id, "Home_Venues"]))]
home

Unnamed: 0,ID,Danger0,Danger001
1,1,0,0


Who else lives there

In [78]:
home_id = home.ID.values[0]
individuals.loc[individuals.Home_Venues.apply(lambda x: home_id in x ) ]

Unnamed: 0,ID,area,_hid,_pid,hhnssec,hpnssec5,soc4,sic,sex,hhref,...,Work_Flows,Work_Duration,disease_status,current_risk,MSOA_Cases,HID_Cases,presymp_days,symp_days,disease_status000,disease_status001
2,2,E02004129,46,1408,5,Lower supervisory and technical occupations,92,13,1,1,...,[1.0],0.0,0,0,0,0,-1,-1,0,0.0
3,3,E02004129,46,4108,5,Lower supervisory and technical occupations,22,58,0,0,...,[1.0],0.236111,0,0,0,0,-1,-1,0,0.0
