# Identifying V<sub>H</sub>Hs for resynthesis

This notebook generates an interactive dashboard for each antigen, with the relevant CDR3s already selected. For the sake of memory and responsiveness, the dashboards are not shown. You can uncomment the lines to show them. By default, the notebook just extracts the relevant visualizations, then saves the charts and underlying data to a file. This still takes a while beecause the plots are large.

In [1]:
import nbseq
import os

# change working directory to `./panning-extended` for simplicity of access to feature tables, etc
# make sure we don't do this twice, or we'll end up in the wrong place and be very confused
if 'dir_changed' not in globals():
    os.chdir('../../')
    dir_changed = True

In [2]:
ex = nbseq.Experiment.from_files(
    ft_aa=None,
    tree_aa=None, 
    tree_cdr3='intermediate/cdr3/features/all/alpaca/msa-muscle.nwk', 
    fd_cdr3='results/tables/cdr3/asvs.csv',
    metadata='config/metadata_full.csv'
) 

Loading experiment panning-extended from '/vast/palmer/home.mccleary/cng2/code/phageseq-paper/panning-extended'...
- Reading metadata from config/metadata_full.csv ...
- Reading phenotypes from config/phenotypes.csv ...
- Reading Config from config/config.yaml ...
- Using SQL database at 'sqlite:////vast/palmer/home.mccleary/cng2/code/phageseq-paper/panning-extended/intermediate/aa/asvs.db'
- Reading feature data for table 'cdr3' from results/tables/cdr3/asvs.csv (2.6 MB)...
- Reading cdr3 feature table from results/tables/cdr3/feature_table.biom (8.4 MB)...
- Using mmseqs2 database 'cdr3' at 'intermediate/cdr3/features_db/features'
- Reading enrichment model (conditional ECDF) for space cdr3 from results/tables/cdr3/enrichment/null/ecdf.pickle (307.6 kB)...
Finished in 0.41 seconds


In [3]:
ex

Experiment('panning-extended') with feature spaces ['cdr3']:
  obs: ['plate.x' 'well.x' 'depth' 'expt' 'round' 'sample' 'phage_library'
    'notes' 'r' 'io' 'kind' 'selection' 'replicate' 'name_full' 'name'
    'well_027e' 'sel_plate_027i' 'sel_well_027i' 'selection_027j' 'plate.y'
    'well.y' 'category' 'antigen' 'genotype_pair' 'gene_CS' 'gene_S'
    'genotype_CS' 'background_CS' 'strain_CS' 'loc_CS' 'cond_CS' 'genotype_S'
    'background_S' 'strain_S' 'loc_S' 'cond_S' 'cond_notes' 'bflm' 'swim'
    'twitch' 'swarm' 'PMB-R' 'FEP-R' 'TET-R' 'CIP-R' 'CHL-R' 'GEN-R' 'ERY-R'
    'IPM-R' 'cdiGMP' 'FliC' 'FliCa' 'FliCb' 'FlgEHKL' 'PilQ' 'PilA' 'PilB'
    'LasA' 'LasB' 'Apr' 'XcpQ' 'ToxA' 'EstA' 'LepA' 'PlpD' 'Phz' 'Pcn' 'Pvd'
    'Hcn' 'Rhl' 'T3SS' 'T6SS' 'Pel' 'Psl' 'CdrB' 'SCV' 'Mucoid' 'Alginate'
    'OprM' 'OprJ' 'OprN' 'OprOP' 'OpdH' 'OprD' 'OprL' 'OprF' 'OprG' 'OprH'
    'OprB' 'MexAB' 'MexCD' 'MexEF' 'MexJK' 'MexXY' 'MexGHI' 'PirA' 'Pfu'  'TonB'
    'FptA' 'FpvA' 'PfeA' 'CupB5' 'Cu

In [4]:
from nbseq.viz.utils import extract_encoded_data

In [5]:
import nbseq.viz
import nbseq.viz.syntax
import nbseq.viz.dash

# load styles to view collapsible accordions and color-coded amino acid and nucleic acid strings in the notebook
nbseq.viz.setup_accordion()
nbseq.viz.syntax.aa_highlighter.setup_notebook()
nbseq.viz.syntax.na_highlighter.setup_notebook()
 
# import libraries for interactive visualization
import altair as alt
alt.data_transformers.enable("default")
alt.data_transformers.disable_max_rows()

import panel as pn
pn.extension('tabulator','vega')

In [6]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [7]:
def make_selection(features):
    return [{"feature":feature} for feature in features]

In [8]:
%%bash
out="results/plots/picking/"
rm -rf "$out"
mkdir -p "$out"

out="results/tables/figures/picking/"
rm -rf "$out"
mkdir -p "$out"

In [9]:
def vegalite_to_csv(input, output, subplots=True, show=False, index=False, 
                         extra_fields = [],
                         rename={'name':'selection', '-log10(binary p-value)':'-log10(binary enrichment probability)'}, **kwargs):
    """
    Take an altair chart or load a Vega-Lite plot from JSON file and convert to CSV files, one per subplot, using `extract_encoded_data`
    """
    from pathlib import Path
    
    if isinstance(input, str) or isinstance(input, Path):
        with open(input, 'r') as file:
            data = file.read()
        chart = alt.Chart.from_json(data)
    else:
        chart = input
        
    df = extract_encoded_data(chart, view_name_col=('subplot' if subplots else None), overwrite={'f_samples_pos_sig_jitter': 'f_samples_pos_sig', 'binary_nlogp_jitter': 'binary_nlogp'}, extra_fields=extra_fields)
    df = df.rename(columns=rename)
    
    if subplots:
        nbseq.utils.mkdirp(output)
        for title, dff in df.groupby('subplot'):
            dff = dff.dropna(axis='columns', how='all').drop('subplot', axis=1)
            fn = str(Path(output) / f"{title}.csv")
            if show:
                display(fn)
                display(dff)
            dff.to_csv(fn, index=index, **kwargs)
    else:
        nbseq.utils.mkdirp_file(output)
        df.to_csv(output, index=index, **kwargs)
        if show:
            display(output)
            display(df)

In [10]:
def get_chart_from_selection_group_dash(dash):
    """ Extract altair chart from a selection_group dashboard by traversing the hierarchy of Holoviz Panel objects """
    
    # dash.objects = 
    # [Row(sizing_mode='stretch_width')
    #      [0] TextInput(name='global_query', placeholder='Filter entire dataset', value="expt == '027j' & io == 'i...)
    #      [1] Select(name='space', options=['cdr3', 'aa'], value='cdr3'),
    #  ParamFunction(function, _pane=Column, defer_load=False, loading_indicator=True)]
    
    # dash.objects[1]._pane = 
    # Column(sizing_mode='stretch_width')
    #            [0] Row(sizing_mode='stretch_width')
    #            [0] Autocomp <...> pe} =...)
    #            [1] ParamFunction(function, _pane=Column, defer_load=False, loading_indicator=True)
    
    # dash.objects[1]._pane.objects[1] = 
    # ParamFunction(function, _pane=Column, defer_load=False, loading_indicator=True)
    
    # dash.objects[1]._pane.objects[1]._pane = 
    # Column(sizing_mode='stretch_width')
    #            [0] Vega(VConcatChart, debounce=1000, selection=Selection <...> d': 'n_samples_pos_...], titles={'n_samples_pos_sig': '...}, value=                          ...)
    
    # dash.objects[1]._pane.objects[1]._pane.objects[0] =
    # Vega(VConcatChart, debounce=1000, selection=Selection, show_actions=True)
    
    # dash.objects[1]._pane.objects[1]._pane.objects[0].object = 
    # alt.VConcatChart(...)

    
    # chart = dash.objects[1]._pane[1]._pane[0].object
    chart = dash.objects[1]._pane.objects[1]._pane.objects[0].object
    return chart

In [11]:
def save_chart_and_data(chart, name, show=False):
    fn = f'results/plots/picking/fig-suppl-picking-{name}.svg'
    chart.save(fn)
    fn = f'results/plots/picking/fig-suppl-picking-{name}.vl.json'
    chart.save(fn)
    fn = f'results/tables/figures/picking/fig-suppl-picking-{name}'
    vegalite_to_csv(chart, fn, show=show)

## OprM

In [12]:
# clones targeting multiple porins
resynthesized_multi_clones = [
    "23e31ef458dd55c2ca3a401c48199cd7", # A11 : OprM, OprN
    "be7f0a382362bea500a77c549ae4b89f", # B11 : OprM, OprN
    "7234998c215928b0ee8bdafa202ae9e4", # C11 : OprM, OprN, OprJ
    "17b36ed0dee65d5c2f7c9de75279c527", # D11 : OprM, PilQ
    "867e5b900d55bf6c1c47127f72022536", # E11 : OprN, FlgEHKL?
    "b06a62b6aa922d6d6dc34a3d9448c2a4", # F11 : OprM, OprN, OprJ
    "b56082f7d915e96c35d2215dd5c30211", # G11 : OprN, OprM
]

In [13]:
resynthesized_oprM_clones = [
    "921ca0d3eba15f7dbd62a5263b2a0dc2", # B5	
    "a69dc1910533351ae1bdb24dd63e1d55", # C5	
    "3e95174072009ab9efad1818533b4e4d", # D5	
    "b1d9dd62480caef69065aae29f4c2951", # E5	
    "b4d91bce115427b0985072b948c64cf6", # F5	
    "79f5779c4414afa8a30076a3b59450f4", # G5	
    "73625727e049af5e138bc248d9c77bed", # H5	
    "0a687cbe05ea7f80e69401a5279c6d79", # A6	
    "858c526bcaa23e41ffeef109638993de", # B6	
    "4e4ded3881abed85e346184fb94bc197", # C6	
    "321242cb7e04c9afaf814ae4eb3c7bdf", # D6	
    "acc9211fb1630c461ef6ef4c3a430c37", # E6	
    "aee77492bc619f89c13fee88ed2460ed", # F6	
    "f19422704015e52a3537d7c30a85c822", # G6	
    "7a08aec063cbd0310a4760ed965832c6", # H6	
    
    "23e31ef458dd55c2ca3a401c48199cd7", # A11	
    "be7f0a382362bea500a77c549ae4b89f", # B11	
    "7234998c215928b0ee8bdafa202ae9e4", # C11	
    "17b36ed0dee65d5c2f7c9de75279c527", # D11	
    "867e5b900d55bf6c1c47127f72022536", # E11	
    "b06a62b6aa922d6d6dc34a3d9448c2a4", # F11	
    "b56082f7d915e96c35d2215dd5c30211", # G11	
]

validated_oprM_clones = [
    "79f5779c4414afa8a30076a3b59450f4", # G5
    "73625727e049af5e138bc248d9c77bed", # H5
    "0a687cbe05ea7f80e69401a5279c6d79", # A6
    "858c526bcaa23e41ffeef109638993de", # B6
    "4e4ded3881abed85e346184fb94bc197", # C6
    "acc9211fb1630c461ef6ef4c3a430c37", # E6
    "7a08aec063cbd0310a4760ed965832c6", # H6
        
    "867e5b900d55bf6c1c47127f72022536", # E11	
    "b06a62b6aa922d6d6dc34a3d9448c2a4", # F11	
    "b56082f7d915e96c35d2215dd5c30211", # G11	
]

In [14]:
bad_samples_oprM = ['027j.1.A2.1.R8i', '027j.1.C2.1.R8i', '027j.1.B6.1.R8i', '027j.1.C5.1.R8i']
dash_oprM = nbseq.viz.dash.selection_group_dashboard(
    ex, starting_phenotype='OprM', 
    # initial_selection=make_selection(resynthesized_oprM_clones),
    initial_selection=make_selection(validated_oprM_clones),
    tree=True,
    global_query=(
    "expt == '027j' & io == 'i' & kind == '+' & "
    f"~(name_full in {bad_samples_oprM})")
)



In [15]:
chart = get_chart_from_selection_group_dash(dash_oprM)
save_chart_and_data(chart,'oprm')

Uncomment to show interactive dashboard for OprM (slow!)

In [16]:
# dash_oprM

## OprN

In [17]:
validated_oprN_clones = [
    "b4ef97969571d2f06dc43c777b4e90a7",
    "01d61371247ae93242f8283e89c588d4",
    "ca75d85a2fed07944d2c7fe9d5ac4432",
]
    
resynthesized_oprN_clones = [
    "87cac08238c99cbd9338bf1674ffcd1a",
    "b4ef97969571d2f06dc43c777b4e90a7",
    "01d61371247ae93242f8283e89c588d4",
    "ca75d85a2fed07944d2c7fe9d5ac4432",
    "d787a9dcebc35c8d26d219cbb99bb95b",
    "8fd74f23665cc507696d2471b990c1a2",
    "fc541d812f6d28ea18344cf14254ece7",
    "e1c440903c6972eb060953753ce08990",
    "18cd450df73f2da2eec32d4613967864",
    "9f057e3628cb8da2934ae41f7db52b38",
    "289b8cea7db722e2b9a665d9670aca21",
    "4fb3cdaf65a4b90af50af05b6f968e9a",
    "6ff22e74c44b253863215af93f91a2dd",
    "13a637a9c2e61315bab5838c4df537c2",
    "9688b758a6d3c9ef9eb5e3834ca25b10",
    "44d9952e17868078778e864a7efbde72",
]

In [18]:
bad_samples_oprN = ['027j.1.A4.1.R8i','027j.1.C4.1.R8i']
dash_oprN = nbseq.viz.dash.selection_group_dashboard(ex, starting_phenotype='OprN', global_query=(
    "expt == '027j' & io == 'i' & kind == '+' & "
    f"~(name_full in {bad_samples_oprN})"),
    # initial_selection=make_selection(resynthesized_oprN_clones),
    tree=True,
    initial_selection=make_selection(validated_oprN_clones),
)



In [19]:
chart = get_chart_from_selection_group_dash(dash_oprN)
save_chart_and_data(chart,'oprn')

Uncomment to show interactive dashboard for OprN

In [20]:
# dash_oprN

## OprJ

In [21]:
validated_oprJ_clones = [ 

"619d7cb2618b0de2451c810ccad7928d", # A9
"689e3d69f730856b5069b9423618c6dd", # C9
"5d678f62efc6537a063b16b8e76972ed", # D9
"e26b5e5f1c8a8227c70fde375a6fdda9", # E9
"caca2d02fc027bda43a47346cfad040f", # F9
"1f358120b580a12e7177c1f5b8273eeb", # C10
"f26902463d72d7920c57d129d4b443d4", # D10
]

resynthesized_oprJ_clones = [

"619d7cb2618b0de2451c810ccad7928d", # A9	
"f1002d2194fcd6e82c58eb671be77940", # B9	
"689e3d69f730856b5069b9423618c6dd", # C9	
"5d678f62efc6537a063b16b8e76972ed", # D9	
"e26b5e5f1c8a8227c70fde375a6fdda9", # E9	
"caca2d02fc027bda43a47346cfad040f", # F9	
"33a95bc00ec64198f288e5762b0bdefd", # G9	
"fa63ab1133a93aef6a931f44cb582657", # H9	
"216f32d9c2fbed45fb4d8866fc733063", # A10	
"57b20da570936db205fa009818d6cbc7", # B10	
"1f358120b580a12e7177c1f5b8273eeb", # C10	
"f26902463d72d7920c57d129d4b443d4", # D10	

]

In [22]:
bad_samples_oprJ = ['027j.1.A3.1.R8i','027j.1.B3.1.R8i']
dash_oprJ = nbseq.viz.dash.selection_group_dashboard(ex, starting_phenotype='OprJ', global_query=(
    "expt == '027j' & io == 'i' & kind == '+' & "
    f"~(name_full in {bad_samples_oprJ})"),
     # initial_selection=make_selection(resynthesized_oprJ_clones),
    initial_selection=make_selection(validated_oprJ_clones),
    tree=True,

)



In [23]:
chart = get_chart_from_selection_group_dash(dash_oprJ)
save_chart_and_data(chart,'oprj')

Uncomment to show interactive dashboard for OprJ

In [24]:
# dash_oprJ

## FliC

In [25]:
resynthesized_fliC_clones = [
    '8db090ea459fd8bc724385106e33378d',
    '421415088da22b245d06bfdb37eafe5b',
    '19673cf75fcea8493f07e574bad384f3',
    '2f63ba400bdfc97df610bb9c885b7609',
    'b061e268cab631458dc57a9cfe70aafc',
    '0868772ceb6bc48512c483445bf05283',
    '004b743cdbdc940c5bf2ce26f21d99de',
    'f81aea3ea29093b18bdeb356589d3be6',
    '02720d34d9792b1bb2df8c6205d4ab40',
    '276bc61ed1fb56452f1199631544d5c0',
    '75247f0a9fd51fdb22e3b508e8d3b3b6',
    '8381ecbe726053c90a76416c454c62c4',
    'eee52f822e384618a17bc5739989bd88',
    '25f4a559f4d59299f8592e4ba754f6a4',
    'fc779db2cf4d946f132439b385168fd7',
    '9fcffa88165d479a8d6f41f6a7afcb2a',
    'f3846fb815b7691f41b8c05fd13d1eb2',
    '6804aa4cd67c9c49484c58d38e001d78',
    '8c51505c75cc7349ded21bb3239e6eb3']

In [26]:
dash_fliC = nbseq.viz.dash.selection_group_dashboard(
    ex, starting_phenotype='FliC',
    initial_selection=make_selection(resynthesized_fliC_clones),
    tree=True,
                                         # tree=False,
)



In [27]:
chart = get_chart_from_selection_group_dash(dash_fliC)
save_chart_and_data(chart,'flic')

Uncomment to show interactive dashboard for FliC

In [28]:
# dash_fliC

## FlgEHKL

In [29]:
resynthesized_flgEHKL_clones = [
    '6d72a8720c935bb6bb7cb02e03b5381f',
    '49e0bad9177fcee66f22f56d74511b26',
    '0c97620726c0a010e74c44b1148149ba',
    '989eda0b48b0c47e024b0ccac3f61248',
    '13c8b9c0a4f1eef51383b511c2506509',
    '20f513a3c1ed5d5efff33f11270f6a00',
    '2eca247ea2c113908c3c3b60b795e983',
    '3352df861590099f163fa266f17b3c22',
    '3dc74e1a3e7200816c8f5925e90ea1c5',
    '0c39c8b51ed890b2265ba8d06ed1bd0e',
    '3413be6f5640e8911c34708f9c69503e',
    'c92b07be23bb0cd9360e81f9dc61b2ea',
    '99f9fc1711b1d674af2f073b0975f712']

In [30]:
validated_flgEHKL_clones = [
    '49e0bad9177fcee66f22f56d74511b26',
    '989eda0b48b0c47e024b0ccac3f61248',
    '13c8b9c0a4f1eef51383b511c2506509',
    '20f513a3c1ed5d5efff33f11270f6a00',
    '2eca247ea2c113908c3c3b60b795e983',
    '3352df861590099f163fa266f17b3c22',
    'c92b07be23bb0cd9360e81f9dc61b2ea',
    '99f9fc1711b1d674af2f073b0975f712']

In [31]:
# '027j.1.A3.1.R8i','027j.1.B3.1.R8i'
bad_samples_flgEHKJ = []
dash_flgEHKL = nbseq.viz.dash.selection_group_dashboard(ex, starting_phenotype='FlgEHKL',
                                         # initial_selection=make_selection(resynthesized_flgEHKL_clones),
                                         initial_selection=make_selection(validated_flgEHKL_clones),
                                         tree=True,
                                         global_query=(
    "expt == '027j' & io == 'i' & kind == '+' & "
    f"~(name_full in {bad_samples_flgEHKJ})"),pos_query="{phenotype} == 1 & FliC == 0"
)



In [32]:
chart = get_chart_from_selection_group_dash(dash_flgEHKL)
save_chart_and_data(chart,'flgehkl')

Uncomment to view interactive dashboard for FlgEHKL (flagellar hook-basal body)

In [33]:
# dash_flgEHKL

## Features

In [34]:
dash_features = nbseq.viz.dash.vhh_dashboard(ex, 
                             feature='7a08aec063cbd0310a4760ed965832c6', 
                             global_query="expt == '027j' & kind == '+' & io == 'i'")



Uncomment to view interactive dashboard for feature CDR3ID:`7a08aec063cbd0310a4760ed965832c6`

In [35]:
# dash_features

In [36]:
def get_chart_from_vhh_dashboard(dash):
    """ Extract altair chart from a selection_group dashboard by traversing the hierarchy of Holoviz Panel objects """
    
    chart = dash[1]._pane[1].object
    return chart

In [37]:
chart = get_chart_from_vhh_dashboard(dash_features)

In [38]:
chart.save('results/plots/picking/7a08aec063cbd0310a4760ed965832c6.vl.json')
chart.save('results/plots/picking/7a08aec063cbd0310a4760ed965832c6.svg')
vegalite_to_csv(chart, 'results/tables/figures/picking/fig-picking-7a08aec063cbd0310a4760ed965832c6', extra_fields=['OprM','name'], show=False)

