In [None]:
# Notebook parameters. Values here are for development only and
# will be overridden when running via snakemake and papermill.

# config_file = "../../../config/agam.yaml"
config_file = "../../../config/afun.yaml"

In [None]:
from bokeh.io import output_notebook
import malariagen_data
from IPython.display import Markdown
from selection_atlas.setup import AtlasSetup
from selection_atlas.page_utils import AtlasPageUtils

# Initialise the atlas setup.
setup = AtlasSetup(config_file)
page_utils = AtlasPageUtils(setup=setup)

# N.B., do not add the "remove-output" tag to this cell!!! If you do,
# the bokeh javascript libraries will not get loaded in the generated
# HTML page. The call to output_notebook() injects javascript in the
# cell output which triggers the bokeh javascript libraries to be loaded
# in the page.
output_notebook(hide_banner=True)

# Methods

## Data sources

In [None]:
df_samples = setup.sample_metadata()
countries = df_samples["country"].unique()

In [None]:
Markdown(f"""
This report analyses genome variation data from the 
{{term}}`Malaria Vector Genome Observatory`. The current analysis version 
{setup.atlas_id}/{setup.analysis_version} includes data for a total of
{len(df_samples):,} mosquitoes sampled from {len(countries)} countries. 
See Table 1 below for a complete list of the sample sets used in the 
current analysis version, with information about the corresponding 
contributors, data releases and citations.
""")

In [None]:
page_utils.style_data_sources(
    df_samples=df_samples,
    caption="Table 1. Data sources included in the current analysis version.",
)

In [None]:
Markdown(f"""
Sample metadata, unphased SNP calls, and phased SNP haplotypes were retrieved from 
the {{term}}`Malaria Vector Genome Observatory` cloud data repository hosted in 
Google Cloud Storage (GCS) via the {{term}}`MalariaGEN Python API` version 
{malariagen_data.__version__}.
""")

TODO something about reference genome and genome annotations from VectorBase.

## Sample inclusion and grouping into cohorts

TODO

## H12 and G123 window size calibration

TODO how was window-size calibration done?

TODO after calibration, some cohorts removed if cannot get a window-size.

## H12 genome-wide selection scans

TODO

## G123 genome-wide selection scans

TODO

## IHS genome-wide selection scans

TODO

## Automated detection of selection signals

TODO

## Identification of selection alerts

TODO

## Web report generation

TODO