In [6]:
import malariagen_data
import pandas as pd
import numpy as np
from pyprojroot import here

import bokeh.layouts as bklay
import bokeh.plotting as bkplt
import bokeh.models as bkmod

In [7]:
from myst_nb import glue

contig = '2L'
glue("chrom", contig)

'2L'

### Ag Chromosome {glue:}`chrom`

The plot below shows selection signals discovered in the major vector species *An. gambiae*, *An. coluzzii* or *An. arabiensis*, all of which are members of the *Anopheles gambiae* species complex. The reference genome used for these analyses is AgamP4, from the {glue:}`chrom` chromosome. 

Hover over a signal for more information about the species, location, date and selection statistic in which the signal was found. Click on a signal to see the underlying selection scan data.

In [8]:
ag3 = malariagen_data.Ag3()

cohorts = pd.read_csv(here() / "build/" / "cohorts.csv")

df_signals = [
    pd.read_csv(here() / "build/h12-signal-detection/" / f"{row['cohort_id']}_{contig}.csv").assign(taxon=row['taxon'])
    for idx, row in cohorts.iterrows()
]
df_signals = pd.concat(df_signals, axis=0).assign(statistic = "H12")
color_dict = {'gambiae': '#BEC4FF',
             'coluzzii': '#D7B2A6',
             'arabiensis': '#A6D7CA'}

df_signals['color'] = df_signals['taxon'].map(color_dict)

In [109]:
df = df_signals.reset_index()
source = bkmod.ColumnDataSource(data={
    'cohort': df.cohort_id,
    'statistic': df.statistic,
    'chromosome': df.contig,
    'score': df.delta_i.astype(int),
    'peak_start': df.span2_pstart,
    'peak_stop': df.span2_pstop,
    'focus_start': df.focus_pstart,
    'focus_stop': df.focus_pstop,    
    'bottom': df.index,
    'top': df.index + .8,
    'color':df.color
})

hover = bkmod.HoverTool(tooltips=[
        ("Cohort", '@cohort'),
        ("Statistic", '@statistic'),
        ("Score", '@score'),
        ("Focus", "@focus_start{,} - @focus_stop{,}"),
    ])

# make figure 
fig1 = bkplt.figure(title='Selection signals',
                  plot_width=900, plot_height=200 + (10 * max(df.index)), 
                  tools="tap,xpan,xzoom_in,xzoom_out,xwheel_zoom,reset".split() + [hover],
                  toolbar_location='above', active_drag='xpan', active_scroll='xwheel_zoom')

fig1.quad(bottom='bottom', top='top', left='peak_start', right='focus_start', 
          source=source, color="color", alpha=.7, line_width=2)

fig1.quad(bottom='bottom', top='top', left='focus_start', right='focus_stop', 
          source=source, color="red", alpha=.7, line_width=2)

fig1.quad(bottom='bottom', top='top', left='focus_stop', right='peak_stop', 
          source=source, color="color", alpha=.7, line_width=2)

fig1.x_range = bkmod.Range1d(0, ag3.genome_sequence(contig).shape[0])
fig1.y_range = bkmod.Range1d(-0.5, max(df.index) + 1.3)
fig1.yaxis.visible = False
fig1.xaxis.visible = False
fig1.ygrid.visible = False

url = "https://sanjaynagi.github.io/"
taptool = fig1.select(type=bkmod.TapTool)
taptool.callback = bkmod.OpenURL(url=url)

fig2 = ag3.plot_genes(
    region=contig, 
    sizing_mode="stretch_width",
    x_range=fig1.x_range,
    show=False)

fig = bklay.gridplot(
    [fig1, fig2],
    ncols=1,
    toolbar_location="above",
    merge_tools=True,
    sizing_mode="stretch_width",
)

bkplt.show(fig)

In [116]:
df_signals.merge(cohorts)[['contig', 'focus_pstart', 'focus_pstop', 'cohort_label', 'statistic', 'delta_i']]

Unnamed: 0,contig,focus_pstart,focus_pstop,cohort_label,statistic,delta_i
0,2L,2515957,2775707,Burkina Faso / Houet / coluzzii / 2012 / Q3,H12,2887
1,2L,25371531,25495793,Burkina Faso / Houet / coluzzii / 2012 / Q3,H12,8472
2,2L,42192119,42217717,Burkina Faso / Houet / coluzzii / 2012 / Q3,H12,1421
3,2L,2438823,2793237,Burkina Faso / Houet / gambiae / 2012 / Q3,H12,1499
4,2L,25417632,25488784,Burkina Faso / Houet / gambiae / 2012 / Q3,H12,2440
5,2L,28430203,28635226,Burkina Faso / Houet / gambiae / 2012 / Q3,H12,4188
6,2L,37315402,37394091,Burkina Faso / Houet / gambiae / 2012 / Q3,H12,697
7,2L,42452450,42497920,Burkina Faso / Houet / gambiae / 2012 / Q3,H12,1330
8,2L,45978049,46010007,Burkina Faso / Houet / gambiae / 2012 / Q3,H12,2370
