In [1]:
# Notebook parameters. Values here are for development only and
# will be overridden when running via snakemake and papermill.

config_file = "../../../config/agam.yaml"

In [2]:
from bokeh.io import output_notebook
from IPython.display import Markdown
from selection_atlas.setup import AtlasSetup
from selection_atlas.page_utils import AtlasPageUtils

# Initialise the atlas setup.
setup = AtlasSetup(config_file)
page_utils = AtlasPageUtils(setup=setup)

# N.B., do not add the "remove-output" tag to this cell!!! If you do,
# the bokeh javascript libraries will not get loaded in the generated
# HTML page. The call to output_notebook() injects javascript in the
# cell output which triggers the bokeh javascript libraries to be loaded
# in the page.
output_notebook(hide_banner=True)

# Data sources

In [3]:
df_samples = setup.sample_metadata()
df_samples

                                     

Unnamed: 0,sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call,...,admin1_name,admin1_iso,admin2_name,taxon,cohort_admin1_year,cohort_admin1_month,cohort_admin1_quarter,cohort_admin2_year,cohort_admin2_month,cohort_admin2_quarter
0,VBS10116-4954STDY7089644,UG4A2016A1_96,Mara Lawniczak,Uganda,Busia,2013,1,0.466,34.089,F,...,Eastern Region,UG-E,Busia,gambiae,UG-E_gamb_2013,UG-E_gamb_2013_01,UG-E_gamb_2013_Q1,UG-E_Busia_gamb_2013,UG-E_Busia_gamb_2013_01,UG-E_Busia_gamb_2013_Q1
1,VBS10117-4954STDY7089645,UG4A2016B1_95,Mara Lawniczak,Uganda,Busia,2016,6,0.466,34.089,F,...,Eastern Region,UG-E,Busia,gambiae,UG-E_gamb_2016,UG-E_gamb_2016_06,UG-E_gamb_2016_Q2,UG-E_Busia_gamb_2016,UG-E_Busia_gamb_2016_06,UG-E_Busia_gamb_2016_Q2
2,VBS10118-4954STDY7089646,UG4A2016C1_94,Mara Lawniczak,Uganda,Busia,2016,6,0.466,34.089,F,...,Eastern Region,UG-E,Busia,gambiae,UG-E_gamb_2016,UG-E_gamb_2016_06,UG-E_gamb_2016_Q2,UG-E_Busia_gamb_2016,UG-E_Busia_gamb_2016_06,UG-E_Busia_gamb_2016_Q2
3,VBS10119-4954STDY7089647,UG4A2016D1_93,Mara Lawniczak,Uganda,Busia,2016,6,0.466,34.089,F,...,Eastern Region,UG-E,Busia,gambiae,UG-E_gamb_2016,UG-E_gamb_2016_06,UG-E_gamb_2016_Q2,UG-E_Busia_gamb_2016,UG-E_Busia_gamb_2016_06,UG-E_Busia_gamb_2016_Q2
4,VBS10120-4954STDY7089648,UG4A2016E1_92,Mara Lawniczak,Uganda,Busia,2016,6,0.466,34.089,F,...,Eastern Region,UG-E,Busia,gambiae,UG-E_gamb_2016,UG-E_gamb_2016_06,UG-E_gamb_2016_Q2,UG-E_Busia_gamb_2016,UG-E_Busia_gamb_2016_06,UG-E_Busia_gamb_2016_Q2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4873,SAMN15222632,D342,Jacob Tennessen,Burkina Faso,Tengrela,2016,-1,10.700,-4.800,F,...,Cascades,BF-02,Comoe,coluzzii,BF-02_colu_2016,BF-02_colu_2016,BF-02_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016
4874,SAMN15222633,D343,Jacob Tennessen,Burkina Faso,Tengrela,2016,-1,10.700,-4.800,F,...,Cascades,BF-02,Comoe,coluzzii,BF-02_colu_2016,BF-02_colu_2016,BF-02_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016
4875,SAMN15222634,D346,Jacob Tennessen,Burkina Faso,Tengrela,2016,-1,10.700,-4.800,F,...,Cascades,BF-02,Comoe,coluzzii,BF-02_colu_2016,BF-02_colu_2016,BF-02_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016
4876,SAMN15222635,D347,Jacob Tennessen,Burkina Faso,Tengrela,2016,-1,10.700,-4.800,F,...,Cascades,BF-02,Comoe,coluzzii,BF-02_colu_2016,BF-02_colu_2016,BF-02_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016,BF-02_Comoe_colu_2016


In [4]:
countries = df_samples["country"].unique()
countries

array(['Uganda', 'Kenya', 'Benin', 'Ghana', "Cote d'Ivoire", 'Togo',
       'Angola', 'Burkina Faso', 'Democratic Republic of the Congo',
       'Central African Republic', 'Cameroon', 'Mayotte', 'Gabon',
       'Gambia, The', 'Guinea', 'Mali', 'Equatorial Guinea',
       'Guinea-Bissau', 'Malawi', 'Mozambique', 'Tanzania', 'Zambia',
       'Comoros, The Union of the', 'Sao Tome and Principe', 'Madagascar'],
      dtype=object)

In [8]:
Markdown(f"""
This report analyses data from the {{term}}`Malaria Vector Genome Observatory`. The 
current analysis version is {setup.atlas_id}/{setup.analysis_version}. This analyses
data for {len(df_samples):,} mosquitoes sampled from {len(countries)}. Please see
the table below for more information regarding the sample sets and data releases 
from which the data were obtained.
""")


This report analyses data from the {term}`Malaria Vector Genome Observatory`. The 
current analysis version is agam/2025.03.05. This analyses
data for 4,878 mosquitoes sampled from 25. Please see
the table below for more information regarding the sample sets and data releases 
from which the data were obtained.


## Data sources

In [13]:
df_sample_sets = (
    df_samples[["sample_set", "study_id", "contributor", "release"]]
    .sort_values(["release", "sample_set"])
    .drop_duplicates()
    .reset_index(drop=True)
)
df_sample_sets

Unnamed: 0,sample_set,study_id,contributor,release
0,AG1000G-AO,AG1000G-AO,Joao Pinto,3.0
1,AG1000G-BF-A,AG1000G-BF-1,Austin Burt,3.0
2,AG1000G-BF-B,AG1000G-BF-1,Austin Burt,3.0
3,AG1000G-BF-C,AG1000G-BF-2,Nora Besansky,3.0
4,AG1000G-CD,AG1000G-CD,David Weetman,3.0
5,AG1000G-CF,AG1000G-CF,Alessandra della Torre,3.0
6,AG1000G-CI,AG1000G-CI,David Weetman,3.0
7,AG1000G-CM-A,AG1000G-CM-1,Nora Besansky,3.0
8,AG1000G-CM-B,AG1000G-CM-2,Nora Besansky,3.0
9,AG1000G-CM-C,AG1000G-CM-3,Brad White,3.0


In [None]:
def make_clickable_study(x):
    return f'<a href="https://www.malariagen.net/partner_study/{x}/" target="_blank">{x}</a>'


def make_clickable_release(x):
    url = f"https://malariagen.github.io/vector-data/{x[:3].lower()}/{x.lower()}.html"
    return f'<a href="{url}" rel="noopener noreferrer" target="_blank">{x}</a>'


df_sources = df_contributors.reset_index().copy()
df_sources["Study"] = df_sources["Study"].apply(make_clickable_study)
df_sources["Data release"] = df_sources["Data release"].apply(make_clickable_release)
df_sources_style = (
    df_sources[["Sample set", "Study", "Contributor", "Data release"]]
    .style.set_caption(
        "MalariaGEN Vector Observatory partners, studies and sample sets contributing data for this cohort."
    )
    .hide(axis="index")
)
df_sources_style