In [1]:
# Notebook parameters. Values here are for development only and
# will be overridden when running via snakemake and papermill.

# Parameter provided manually.
alert_id = "SA-AG-01"

# Parameters from the alert yaml.
label = "Voltage-gated sodium channel"
region = "2RL:63,000,000-65,000,000"
ir_candidate_genes = [
    dict(
        identifier="AGAP004707",
        names=["Vgsc", "Para"],
        notes="Blah blah blah.",
        references=[
            dict(
                citation="Clarkson et al. (2021)",
                title="FOo bar",
                url="https://doi.org/10.1111/mec.15845",
            ),
            dict(
                citation="Martinez-Torres et al. (1998)",
                title="Spam eggs",
                url="https://pubmed.ncbi.nlm.nih.gov/9535162/",
            ),
        ],
    ),
]

# Parameters from the workflow config.yaml.
contigs = ["2RL", "3RL", "X"]
cohorts_analysis = "20240924"
analysis_version = "2025.02.13"
dask_scheduler = "single-threaded"

In [2]:
# Parameters
analysis_version = "2025.02.25"
min_cohort_size = 15
max_cohort_size = 100
sample_sets = [
    "3.0",
    "1237-VO-BJ-DJOGBENOU-VMF00050",
    "1237-VO-BJ-DJOGBENOU-VMF00067",
    "1244-VO-GH-YAWSON-VMF00051",
    "1245-VO-CI-CONSTANT-VMF00054",
    "1253-VO-TG-DJOGBENOU-VMF00052",
    "1178-VO-UG-LAWNICZAK-VMF00025",
    "1244-VO-GH-YAWSON-VMF00149",
    "barron-2019",
    "crawford-2016",
    "tennessen-2021",
    "bergey-2019",
    "campos-2021",
    "fontaine-2015-rebuild",
]
sample_query = "taxon in ['gambiae', 'coluzzii', 'arabiensis', 'bissau']"
contigs = ["2RL", "3RL", "X"]
cohorts_analysis = "20240924"
h12_calibration_contig = "3L"
h12_signal_detection_min_delta_aic = 1000
h12_signal_detection_min_stat_max = 0.1
h12_signal_detection_gflanks = [6]
dask_scheduler = "single-threaded"
alerts = ["01", "02", "03", "04", "05", "06", "07", "08", "09"]
label = "Dgk"
region = "X:9,127,402-9,363,702"
ir_candidate_genes = [
    {
        "identifier": "AGAP000519",
        "names": ["Dgk", "Rdga", "diacylglycerol kinase"],
        "notes": "This gene encodes a diacylglycerol kinase. This gene has not been directly implicated in resistance to pesticides in any insect species to date, but there are two hypotheses regarding a potential link to insecticide resistance in Anopheles based on studies in other species. (1) In several systems, Dgk genes act as negative regulators of synaptic transmission between cholinergenic neurons by limiting the amount of acetylcholine available at synaptic junctions. In C. elegans, loss of function mutations in a Dgk gene cause hyperactivity and hypersensitivity to aldicarb, a carbamate insecticide, presumably because of an increase in acetylcholine levels available at synaptic junctions. If Dgk performs a similar function in Anopheles, then a gain of function mutation might reduce sensitivity to carbamate and/or organophosphate insecticides. (2) In Drosophila melanogaster, the ortholog of this gene Rdga is expressed exclusively in the retina and modulates sensitivity to light, via the same pathway and mechanism described above. This gene is also under circadian control in Anopheles gambiae. If mutations in this gene affected sensitivity to light in Anopheles, and the gene is expressed under a circadian rhythm, then such mutations might also affect circadian behaviours, such as the timing of host-seeking and feeding behaviours.\n",
        "references": [
            {
                "citation": "Miller et al. (1999)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC4703424/",
                "title": "Go\u03b1 and Diacylglycerol Kinase Negatively Regulate the Gq\u03b1 Pathway in C. elegans\n",
            },
            {
                "citation": "Rund et al. (2011)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC3156198/",
                "title": "Genome-wide profiling of diel and circadian gene expression in the malaria vector Anopheles gambiae\n",
            },
            {
                "citation": "Kientega et al. (2024)",
                "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC11406867/",
                "title": "Whole-genome sequencing of major malaria vectors reveals the evolution of new insecticide resistance variants in a longitudinal study in Burkina Faso\n",
            },
        ],
    }
]
alert_id = "SA-AG-09"


In [3]:
from pyprojroot import here

root = here()
%run -i {root}/workflow/common/scripts/setup.py
%run -i {root}/workflow/site/scripts/page-setup.py

region_contig, region_span = region.split(":")
region_start, region_stop = region_span.replace(",", "").split("-")

# N.B., do not add the "remove-output" tag to this cell!!! If you do,
# the bokeh javascript libraries will not get loaded in the generated
# HTML page. The call to output_notebook() injects javascript in the
# cell output which triggers the bokeh javascript libraries to be loaded
# in the page.
output_notebook(hide_banner=True)

# Alert `SA-AG-09` (Dgk)

In [4]:
html = f"This alert reports selection signals on Chromosome {region_contig} within the region {region_span} bp."
HTML(html)

HTML(value='This alert reports selection signals on Chromosome X within the region 9,127,402-9,363,702 bp.')

## Selection signals

Selection signals overlapping this genome region are shown in the figure below.

In [5]:
df_signals = load_signals(
    contig=region_contig,
    start=region_start,
    stop=region_stop,
)

plot_signals(
    df=df_signals,
    contig=region_contig,
    x_min=df_signals["span2_pstart"].min() - 50_000,
    x_max=df_signals["span2_pstop"].max() + 50_000,
)

## Cohorts affected
Overlapping signals of selection are found in the following cohorts. 

In [6]:
cohort_links = [
    '<a href="../cohort/' + row["cohort_id"] + '.html">' + row["cohort_label"] + "</a>"
    for i, row in df_signals.sort_values("cohort_label").iterrows()
]
html_message = "<li>" + "</li><li>".join(cohort_links) + "</li>"
HTML(html_message)

HTML(value='<li><a href="../cohort/BF-02_Comoe_colu_2012.html">Burkina Faso / Comoe / coluzzii / 2012</a></li>…

## Insecticide resistance genes

The following genes are found within this genome region and may be driving recent positive selection based on evidence for an association with resistance to insecticides.

In [7]:
for gene in ir_candidate_genes:
    gene_id = gene["identifier"]
    html = f'<p><strong><a href="https://vectorbase.org/vectorbase/app/record/gene/{gene_id}">{gene_id}</a></strong>'
    gene_names = gene["names"]
    if gene_names:
        html += " (" + ", ".join(gene_names) + ")"
    html += " — "
    notes = gene["notes"]
    html += f"{notes} See also:"
    html += "<ul>"
    for reference in gene["references"]:
        citation = reference["citation"]
        url = reference["url"]
        title = reference["title"]
        html += f'<li><a href="{url}">{citation}</a> {title}</li>'
    html += "</ul></p>"
    display(HTML(html))

HTML(value='<p><strong><a href="https://vectorbase.org/vectorbase/app/record/gene/AGAP000519">AGAP000519</a></…