In [1]:
# Notebook parameters. Values here are for development only and
# will be overridden when running via snakemake and papermill.

# Parameter provided manually.
alert_id = "SA-AG-01"

# Parameters from the alert yaml.
label = "Voltage-gated sodium channel"
region = "2RL:63,000,000-65,000,000"
ir_candidate_genes = [
    dict(
        identifier="AGAP004707",
        names=["Vgsc", "Para"],
        notes="Blah blah blah.",
        references=[
            dict(
                citation="Clarkson et al. (2021)",
                title="FOo bar",
                url="https://doi.org/10.1111/mec.15845",
            ),
            dict(
                citation="Martinez-Torres et al. (1998)",
                title="Spam eggs",
                url="https://pubmed.ncbi.nlm.nih.gov/9535162/",
            ),
        ],
    ),
]

# Parameters from the workflow config.yaml.
contigs = ["2RL", "3RL", "X"]
cohorts_analysis = "20240924"
analysis_version = "2025.02.13"
dask_scheduler = "single-threaded"

In [2]:
# Parameters
analysis_version = "2025.02.25"
min_cohort_size = 15
max_cohort_size = 100
sample_sets = [
    "3.0",
    "1237-VO-BJ-DJOGBENOU-VMF00050",
    "1237-VO-BJ-DJOGBENOU-VMF00067",
    "1244-VO-GH-YAWSON-VMF00051",
    "1245-VO-CI-CONSTANT-VMF00054",
    "1253-VO-TG-DJOGBENOU-VMF00052",
    "1178-VO-UG-LAWNICZAK-VMF00025",
    "1244-VO-GH-YAWSON-VMF00149",
    "barron-2019",
    "crawford-2016",
    "tennessen-2021",
    "bergey-2019",
    "campos-2021",
    "fontaine-2015-rebuild",
]
sample_query = "taxon in ['gambiae', 'coluzzii', 'arabiensis', 'bissau']"
contigs = ["2RL", "3RL", "X"]
cohorts_analysis = "20240924"
h12_calibration_contig = "3L"
h12_signal_detection_min_delta_aic = 1000
h12_signal_detection_min_stat_max = 0.1
h12_signal_detection_gflanks = [6]
dask_scheduler = "single-threaded"
alerts = ["01", "02", "03", "04", "05", "06", "07", "08", "09"]
label = "Cyp9k1"
region = "X:14,992,258-15,583,310"
ir_candidate_genes = [
    {
        "identifier": "AGAP000818",
        "names": ["Cyp9k1", "cytochrome P450"],
        "notes": "This gene encodes a cytochrome P450 enzyme which is associated with pyrethroid resistance through increased expression and where copy number variation has been observed in Anopheles gambiae and Anopheles coluzzii.\n",
        "references": [
            {
                "citation": "Main et al. (2015)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC4615556/",
                "title": "Complex genome evolution in Anopheles coluzzii associated with increased insecticide usage in Mali\n",
            },
            {
                "citation": "Main et al. (2018)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC5885317/",
                "title": "Genetic variation associated with increased insecticide resistance in the malaria mosquito, Anopheles coluzzii\n",
            },
            {
                "citation": "Vontas et al. (2018)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC5939083/",
                "title": "Rapid selection of a pyrethroid metabolic enzyme CYP9K1 by operational malaria control activities\n",
            },
            {
                "citation": "Lucas et al. (2019)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC6673711/",
                "title": "Whole-genome sequencing reveals high complexity of copy number variation at insecticide resistance loci in malaria mosquitoes\n",
            },
            {
                "citation": "Tennessen et al. (2020)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC7858241/",
                "title": "A population genomic unveiling of a new cryptic mosquito taxon within the malaria-transmitting Anopheles gambiae complex\n",
            },
            {
                "citation": "Lucas et al. (2023)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC10432508/",
                "title": "Genome-wide association studies reveal novel loci associated with pyrethroid and organophosphate resistance in Anopheles gambiae and Anopheles coluzzii\n",
            },
        ],
    }
]
alert_id = "SA-AG-05"


In [3]:
from pyprojroot import here

root = here()
%run -i {root}/workflow/common/scripts/setup.py
%run -i {root}/workflow/site/scripts/page-setup.py

region_contig, region_span = region.split(":")
region_start, region_stop = region_span.replace(",", "").split("-")

# N.B., do not add the "remove-output" tag to this cell!!! If you do,
# the bokeh javascript libraries will not get loaded in the generated
# HTML page. The call to output_notebook() injects javascript in the
# cell output which triggers the bokeh javascript libraries to be loaded
# in the page.
output_notebook(hide_banner=True)

# Alert `SA-AG-05` (Cyp9k1)

In [4]:
html = f"This alert reports selection signals on Chromosome {region_contig} within the region {region_span} bp."
HTML(html)

HTML(value='This alert reports selection signals on Chromosome X within the region 14,992,258-15,583,310 bp.')

## Selection signals

Selection signals overlapping this genome region are shown in the figure below.

In [5]:
df_signals = load_signals(
    contig=region_contig,
    start=region_start,
    stop=region_stop,
)

plot_signals(
    df=df_signals,
    contig=region_contig,
    x_min=df_signals["span2_pstart"].min() - 50_000,
    x_max=df_signals["span2_pstop"].max() + 50_000,
)

## Cohorts affected
Overlapping signals of selection are found in the following cohorts. 

In [6]:
cohort_links = [
    '<a href="../cohort/' + row["cohort_id"] + '.html">' + row["cohort_label"] + "</a>"
    for i, row in df_signals.sort_values("cohort_label").iterrows()
]
html_message = "<li>" + "</li><li>".join(cohort_links) + "</li>"
HTML(html_message)

HTML(value='<li><a href="../cohort/BJ-OU_Avrankou_colu_2017_Q3.html">Benin / Avrankou / coluzzii / 2017 / Q3</…

## Insecticide resistance genes

The following genes are found within this genome region and may be driving recent positive selection based on evidence for an association with resistance to insecticides.

In [7]:
for gene in ir_candidate_genes:
    gene_id = gene["identifier"]
    html = f'<p><strong><a href="https://vectorbase.org/vectorbase/app/record/gene/{gene_id}">{gene_id}</a></strong>'
    gene_names = gene["names"]
    if gene_names:
        html += " (" + ", ".join(gene_names) + ")"
    html += " — "
    notes = gene["notes"]
    html += f"{notes} See also:"
    html += "<ul>"
    for reference in gene["references"]:
        citation = reference["citation"]
        url = reference["url"]
        title = reference["title"]
        html += f'<li><a href="{url}">{citation}</a> {title}</li>'
    html += "</ul></p>"
    display(HTML(html))

HTML(value='<p><strong><a href="https://vectorbase.org/vectorbase/app/record/gene/AGAP000818">AGAP000818</a></…