# Generate book table of contents

Here we generate the Jupyter book table of contents file at `docs/_toc.yml`.

In [1]:
contigs = ["2L"]

In [2]:
import pandas as pd
import geopandas as gpd
from pyprojroot import here
import yaml

In [3]:
gdf_cohorts = gpd.read_file(here() / "build" / "final_cohorts.geojson")
gdf_cohorts.head()

Unnamed: 0,cohort_id,cohort_size,country,admin1_iso,admin1_name,admin2_name,taxon,year,quarter,cohort_label,...,country_alpha2,country_alpha3,shapeName,shapeISO,shapeID,shapeGroup,shapeType,representative_lon,representative_lat,geometry
0,ML-2_Kati_colu_2014_Q3,27,Mali,ML-2,Koulikouro,Kati,coluzzii,2014,3,Mali / Kati / coluzzii / 2014 / Q3,...,ML,MLI,Kati,,8926073B54119460885487,MLI,ADM2,-8.426097,12.598369,"POLYGON ((-9.04476 12.42636, -9.02295 12.41940..."
1,ML-2_Kati_gamb_2014_Q3,24,Mali,ML-2,Koulikouro,Kati,gambiae,2014,3,Mali / Kati / gambiae / 2014 / Q3,...,ML,MLI,Kati,,8926073B54119460885487,MLI,ADM2,-8.426097,12.598369,"POLYGON ((-9.04476 12.42636, -9.02295 12.41940..."


In [4]:
gdf_cohorts.columns

Index(['cohort_id', 'cohort_size', 'country', 'admin1_iso', 'admin1_name',
       'admin2_name', 'taxon', 'year', 'quarter', 'cohort_label',
       'sample_query', 'latitude', 'longitude', 'h12_window_size',
       'country_alpha2', 'country_alpha3', 'shapeName', 'shapeISO', 'shapeID',
       'shapeGroup', 'shapeType', 'representative_lon', 'representative_lat',
       'geometry'],
      dtype='object')

In [5]:
countries = gdf_cohorts["country_alpha2"].unique()
countries

array(['ML'], dtype=object)

In [6]:
# TODO read alerts from some external file
alerts = [
    "SA-1",
    "SA-2",
]

In [7]:
alerts_part = dict(
    caption="Alerts",
    chapters=[
        dict(
            file="alerts",
            sections=[
                dict(file=f"alerts/{a}")
                for a in alerts
            ],
        ),
    ]
)
alerts_part

{'caption': 'Alerts',
 'chapters': [{'file': 'alerts',
   'sections': [{'file': 'alerts/SA-1'}, {'file': 'alerts/SA-2'}]}]}

In [8]:
genome_part = dict(
    caption="By genome",
    chapters=[
        dict(
            file=f"genome/ag-{contig}",
        ) 
        for contig in contigs
    ],
)
genome_part

{'caption': 'By genome', 'chapters': [{'file': 'genome/ag-2L'}]}

In [9]:
def build_country_chapter(country):
    cohorts = sorted(
        gdf_cohorts.query(f"country_alpha2 == '{country}'")["cohort_id"]
    )
    chapter = dict(
        file=f"country/{country}",
        sections=[
            dict(
                file=f"cohort/{cohort}",
            )
            for cohort in cohorts
        ],
    )
    return chapter



In [10]:
countries_part = dict(
    caption="By country",
    chapters=[
        build_country_chapter(country)
        for country in countries
    ]
)
countries_part

{'caption': 'By country',
 'chapters': [{'file': 'country/ML',
   'sections': [{'file': 'cohort/ML-2_Kati_colu_2014_Q3'},
    {'file': 'cohort/ML-2_Kati_gamb_2014_Q3'}]}]}

In [11]:
toc = dict(
    format="jb-book",
    root="home-page",
    parts=[
        alerts_part,
        genome_part,
        countries_part,
    ]
)
toc

{'format': 'jb-book',
 'root': 'home-page',
 'parts': [{'caption': 'Alerts',
   'chapters': [{'file': 'alerts',
     'sections': [{'file': 'alerts/SA-1'}, {'file': 'alerts/SA-2'}]}]},
  {'caption': 'By genome', 'chapters': [{'file': 'genome/ag-2L'}]},
  {'caption': 'By country',
   'chapters': [{'file': 'country/ML',
     'sections': [{'file': 'cohort/ML-2_Kati_colu_2014_Q3'},
      {'file': 'cohort/ML-2_Kati_gamb_2014_Q3'}]}]}]}

In [12]:
with open(here() / "docs" / "_toc.yml", mode="w") as output_file:
    yaml.safe_dump(toc, output_file)