In [None]:
print("hello")

In [None]:
import sys
sys.version_info

In [None]:
import plotly.io
plotly.io.renderers.default = "notebook_connected+plotly_mimetype"

In [None]:
import malariagen_data

In [None]:
ag3 = malariagen_data.Ag3(
    results_cache="results_cache",
)
ag3

In [None]:
# Hide progress for the blog post.
# ag3._show_progress = False

## Sample selection

In [None]:
df_samples = ag3.sample_metadata()

In [None]:
df_samples.query("country == 'Burkina Faso'").groupby(["sample_set", "contributor", "year"]).size()

In [None]:
sample_sets = [
    "AG1000G-BF-A",
    "AG1000G-BF-B",
    "1191-VO-MULTI-OLOUGHLIN-VMF00106",
    "1191-VO-MULTI-OLOUGHLIN-VMF00140",
]
sample_query = "country == 'Burkina Faso'"

In [None]:
ag3.count_samples(
    sample_sets=sample_sets,
    sample_query=sample_query,
)

## Changes in pyrethroid target-site resistance

In [None]:
vgsc_transcript = "AGAP004707-RD"

In [None]:
ds_colu_vgsc_aa_frq = ag3.aa_allele_frequencies_advanced(
    transcript=vgsc_transcript,
    area_by="admin1_name",
    period_by="year",
    sample_sets=sample_sets,
    sample_query="country == 'Burkina Faso' and taxon == 'coluzzii'",
    variant_query="max_af >= 0.05",
)
ds_colu_vgsc_aa_frq

In [None]:
ag3.plot_frequencies_time_series(
    ds_colu_vgsc_aa_frq,
    height=500,
    width=700,
)

* Interesting that N1570Y is declining, even though we know much higher resistance.
* But P1874S is not declining, holding it's own against V402L+I1527T, suggesting comparable selection coefficient.

In [None]:
ag3.plot_diplotype_clustering_advanced(
    region=vgsc_transcript,
    sample_sets=sample_sets,
    sample_query="country == 'Burkina Faso' and taxon == 'coluzzii'",
    site_mask="gamb_colu",
    color="year",
    linkage_method="complete",
    snp_transcript=vgsc_transcript,
    snp_filter_min_maf=0.03,
)

In [None]:
ace1_transcript = "AGAP001356-RA"

In [None]:
ds_gamb_ace1_aa_frq = ag3.aa_allele_frequencies_advanced(
    transcript=ace1_transcript,
    area_by="admin1_name",
    period_by="year",
    sample_sets=sample_sets,
    sample_query="country == 'Burkina Faso' and taxon == 'gambiae'",
    variant_query="max_af >= 0.05",
)
ds_gamb_ace1_aa_frq

In [None]:
ds_gamb_ace1_cnv_frq = ag3.gene_cnv_frequencies_advanced(
    region=ace1_transcript,
    area_by="admin1_name",
    period_by="year",
    sample_sets=sample_sets,
    sample_query="country == 'Burkina Faso' and taxon == 'gambiae'",
    variant_query="max_af >= 0.05",
)
ds_gamb_ace1_cnv_frq

In [None]:
ag3.plot_frequencies_time_series(
    ds_gamb_ace1_aa_frq,
    height=500,
    width=700,
)

In [None]:
ag3.plot_frequencies_time_series(
    ds_gamb_ace1_cnv_frq,
    height=500,
    width=700,
)

In [None]:
import xarray as xr

In [None]:
ds_gamb_ace1_frq = xr.concat(
    [ds_gamb_ace1_aa_frq, ds_gamb_ace1_cnv_frq], 
    dim="variants", 
    coords="minimal",
    data_vars="minimal",
    compat="override",
    join="override",
)
ds_gamb_ace1_frq

In [None]:
ag3.plot_frequencies_time_series(
    ds_gamb_ace1_frq,
    height=500,
    width=700,
)

TODO remove fixed variants.

## Genome-wide scan for allele frequency changes

In [None]:
import warnings
warnings.simplefilter(action="ignore")

In [None]:
ag3.plot_fst_gwss(
    contig="2RL",
    window_size=500,
    cohort1_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2012",
    cohort2_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2017",
    sample_sets=sample_sets,
    site_mask="gamb_colu",
    max_cohort_size=None,
)

In [None]:
ag3.plot_fst_gwss(
    contig="2R",
    window_size=500,
    cohort1_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2012",
    cohort2_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2017",
    sample_sets=sample_sets,
    site_mask="gamb_colu",
    max_cohort_size=None,
)

In [None]:
ag3.plot_fst_gwss(
    contig="2RL",
    window_size=1_000,
    cohort1_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2012",
    cohort2_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2017",
    sample_sets=sample_sets,
    site_mask="gamb_colu",
    max_cohort_size=None,
)

In [None]:
ag3.plot_fst_gwss(
    contig="3RL",
    window_size=500,
    cohort1_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2012",
    cohort2_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2017",
    sample_sets=sample_sets,
    site_mask="gamb_colu",
    max_cohort_size=None,
)

In [None]:
ag3.plot_fst_gwss(
    contig="X",
    window_size=500,
    cohort1_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2012",
    cohort2_query="country == 'Burkina Faso' and taxon == 'gambiae' and year == 2017",
    sample_sets=sample_sets,
    site_mask="gamb_colu",
    max_cohort_size=None,
    
)

In [None]:
ag3.plot_fst_gwss?