In [1]:
# Notebook parameters. Values here are for development only and
# will be overridden when running via snakemake and papermill.

config_file = "../../../config/agam.yaml"

# Finalize cohorts

Here we finalise the list of cohorts to include in the site, removing cohorts which fail H12 window size calibration.

In [2]:
import yaml
import pandas as pd
from selection_atlas.setup import AtlasSetup

# Initialise the atlas setup.
setup = AtlasSetup(config_file)

In [3]:
df_cohorts = pd.read_csv(setup.cohorts_file)
df_cohorts.head()

Unnamed: 0,cohort_id,cohort_size,country,admin1_iso,admin1_name,admin2_name,taxon,year,quarter,cohort_label,sample_query
0,AO-LUA_Luanda_colu_2009_Q2,77,Angola,AO-LUA,Luanda,Luanda,coluzzii,2009,2,Angola / Luanda / coluzzii / 2009 / Q2,cohort_admin2_quarter == 'AO-LUA_Luanda_colu_2...
1,BF-02_Comoe_colu_2011,18,Burkina Faso,BF-02,Cascades,Comoe,coluzzii,2011,-1,Burkina Faso / Comoe / coluzzii / 2011,cohort_admin2_quarter == 'BF-02_Comoe_colu_201...
2,BF-02_Comoe_colu_2012,63,Burkina Faso,BF-02,Cascades,Comoe,coluzzii,2012,-1,Burkina Faso / Comoe / coluzzii / 2012,cohort_admin2_quarter == 'BF-02_Comoe_colu_201...
3,BF-02_Comoe_colu_2015,33,Burkina Faso,BF-02,Cascades,Comoe,coluzzii,2015,-1,Burkina Faso / Comoe / coluzzii / 2015,cohort_admin2_quarter == 'BF-02_Comoe_colu_201...
4,BF-02_Comoe_colu_2016,53,Burkina Faso,BF-02,Cascades,Comoe,coluzzii,2016,-1,Burkina Faso / Comoe / coluzzii / 2016,cohort_admin2_quarter == 'BF-02_Comoe_colu_201...


In [None]:
window_size = []

for cohort_id in df_cohorts.loc[:, "cohort_id"]:
    # Load window sizes.
    h12_calibration_file = setup.h12_calibration_files.as_posix().format(
        cohort=cohort_id
    )
    with open(h12_calibration_file) as params_file:
        window_size_params = yaml.safe_load(params_file)

    window_size.append(window_size_params["h12_window_size"])

df_cohorts["h12_window_size"] = window_size

In [None]:
df_cohorts = df_cohorts.query("~h12_window_size.isna()", engine="python")
df_cohorts.to_csv(setup.final_cohorts_file, index=None)