# Phagemid titers from phage display selection campaigns (`fig-suppl-titers`)

In [1]:
import pandas as pd
import altair as alt
from natsort import natsort_keygen

In [2]:
FONT_SIZE=6

## `panning-small`

### `panning-small`, purified protein panning

In [3]:
%%bash
out="../output/fig-suppl-titers"
rm -rf "$out"
mkdir -p "$out"

In [4]:
CG032h = pd.read_csv("../data/fig-panning-proteins/2020-08-06_CG032h4_titers/CG032h_titers.csv", index_col=0)

In [5]:
status_map = {'-':'Counter-selection eluate', '+':'Selection eluate', 'in':'Input phage'}
CG032h.status = CG032h.status.map(status_map)
CG032h['round'] = CG032h['round'].apply(lambda x: x[1:])

In [6]:
(alt.Chart(CG032h).mark_line(point=True)
 .encode(
     x='round',
     y=alt.Y('log(pfu)',scale=alt.Scale(domain=[6,13]), title="phage titer, log10(pfu/mL)"),
     color=alt.Color('status', sort=list(status_map.keys()), title=None)
 )
 .properties(
     width=50,height=100
 )
 .facet(
     column=alt.Facet('antigen', sort=["PAK flagellin", "PAO1 flagellin", "PAK pilin"], title="selection antigen")
 ).configure(
     facet=dict(
        spacing=FONT_SIZE//2
     ),
     point=dict(
         # size=FONT_SIZE,
         strokeWidth=2,
     ),
     header=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE,
         labelPadding=FONT_SIZE//2,
         titlePadding=-FONT_SIZE,
     ),
     axis=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE
     ),
     legend=dict(
         titleFontSize=FONT_SIZE,
         labelFontSize=FONT_SIZE,
         symbolSize=FONT_SIZE*2,
         padding=-FONT_SIZE
     )
 )
)

In [7]:
(CG032h[['antigen','selection','status','round','log(pfu)']]
 .rename({
     'antigen': 'selection antigen',
     'status': 'phage pool',
     'log(pfu)': 'phagemid titer [log10(pfu/mL) on carbenicillin]'
 })
 .to_csv('../output/fig-suppl-titers/panning-small-proteins.csv', index=False)
)

### `panning-small`, cell-based panning

In [8]:
CG024e = pd.read_csv("../data/fig-panning-hts/2020-10-27_CG024e_cell_panning_titers/CG024e_alpaca.csv", index_col=0)

In [9]:
fraction_map = {
    "-": "antigen- cell eluate",
    "+": "antigen+ cell eluate",
    "i": "input phage"
}
CG024e["fraction_name"] = CG024e["fraction"].map(fraction_map)
CG024e = CG024e.query("fraction != '?' & selection <= 6")

In [10]:
selection_name_map = {
    1: "PAK flagellin",
    2: "PAO1 flagellin",
    3: "PA103 pilin",
    4: "PAK pilin",
    5: "PAO1 Pel/Psl",
    6: "PAO1 efflux pumps",
    7: "Pa mix liquid",
    8: "Pa mix scrape",
    9:  "PAK flagellin",
    10: "PAO1 flagellin",
    11: "PA103 pilin",
    12: "PAK pilin",
    13: "PAO1 Pel/Psl",
    14: "PAO1 efflux pumps",
    15: "Pa mix liquid",
    16: "Pa mix scrape",
}

In [11]:
(alt.Chart(CG024e).mark_line(point=True)
 .encode(
    x=alt.X("r:O", title="round"),
    y=alt.Y("C_log(pfu/mL)", title="phage titer, log10(pfu/mL)", scale=alt.Scale(domain=[5,14])),
    color=alt.Color("fraction_name", title=None, sort=list(fraction_map.values()))
  )
 .properties(width=35,height=60)
 .facet(
     column=alt.Column("selection_name", title=None, sort=list(selection_name_map.values())),
     # row="library"
 ).configure(
     facet=dict(
        spacing=FONT_SIZE//2
     ),
     point=dict(
         size=FONT_SIZE*3,
         strokeWidth=2,
     ),
     header=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE,
         labelPadding=FONT_SIZE//2,
         titlePadding=0,
     ),
     axis=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE
     ),
     legend=dict(
         titleFontSize=FONT_SIZE,
         labelFontSize=FONT_SIZE,
         symbolSize=FONT_SIZE*2,
         padding=-FONT_SIZE
     )
 )
)

In [12]:
(CG024e[['selection_name','selection','fraction_name','r','C_log(pfu/mL)']]
 .rename({
     'selection_name': 'selection antigen',
     'fraction_name': 'phage pool',
     'r':'round',
     'C_log(pfu/mL)': 'phagemid titer [log10(pfu/mL) on carbenicillin]'
 })
 .to_csv('../output/fig-suppl-titers/panning-small-cells.csv', index=False)
)

## `panning-massive`

In [13]:
data = pd.read_csv("../data/fig-panning-massive/2021-11-02_CG027i_arrayed_panning/CG027i.csv", index_col=0)

In [14]:
fraction_map = {
    "-": "CS cell eluent",
    "+": "selection cell eluent",
    "i": "input phage"
}
data["fraction_name"] = data["fraction"].map(fraction_map)

In [15]:
data.groupby(['r','fraction_name'])['C_log(pfu/mL)'].count()

r  fraction_name        
1  CS cell eluent           20
   input phage              14
   selection cell eluent    88
2  CS cell eluent           48
   input phage              48
   selection cell eluent    48
3  CS cell eluent           48
   input phage              48
   selection cell eluent    48
4  CS cell eluent           60
   input phage              60
   selection cell eluent    60
Name: C_log(pfu/mL), dtype: int64

In [16]:
chart_high_throughput = (alt.Chart(data).mark_boxplot()
 .encode(
    x=alt.X("r:O", title="round"),
    y=alt.Y("C_log(pfu/mL)", title="phage titer, log10(pfu/mL)", scale=alt.Scale(domain=[5,14])),
    color=alt.Color(
        "fraction_name", 
        # title="phage fraction", 
        title=None,
        sort=list(fraction_map.values())),
    xOffset="fraction_name",
  )
  .properties(
     width=100,
     height=100
 )
 .configure(
     point=dict(
         size=FONT_SIZE*2,
         strokeWidth=2,
     ),
     line=dict(
         # size=FONT_SIZE,
         strokeWidth=1,
     ),
     header=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE,
         labelPadding=FONT_SIZE//2,
     ),
     axis=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE
     ),
     legend=dict(
         titleFontSize=FONT_SIZE,
         labelFontSize=FONT_SIZE,
         symbolSize=FONT_SIZE*2,
         padding=-FONT_SIZE
     )
 )
)

In [17]:
chart_high_throughput

In [18]:
(data[['r','sample_full','fraction_name','C_log(pfu/mL)']]
 .rename(columns={
     'sample_full': 'selection name',
     'fraction_name': 'phage pool',
     'r':'round',
     'C_log(pfu/mL)': 'phagemid titer [log10(pfu/mL) on carbenicillin]'
 })
 .to_csv('../output/fig-suppl-titers/panning-massive.csv', index=False)
)

### `panning-extended`

In [19]:
data = pd.read_csv("../data/fig-panning-massive/2022-10-17_CG027j_titers/CG027j.csv")

In [20]:
data['Scale'] = data.Sample.str.slice(0,1).apply(lambda x: '1/100x' if x in {'E','F','G'} else '1x')

In [21]:
fraction_map = {
    "CS1-": "CS #1 cell eluent",
    "CS2-": "CS #2 cell eluent",
    "CS3-": "CS #3 cell eluent",
    "+": "selection cell eluent",
    "i": "input phage"
}
data["fraction_name"] = data["fraction"].map(fraction_map)

In [22]:
(alt.Chart(data).mark_boxplot(size=5)
 .encode(
    x=alt.X("r:O", title="round"),
    y=alt.Y("C_log(pfu/mL)", title="phage titer, log10(pfu/mL)", scale=alt.Scale(domain=[5,14])),
    color=alt.Color(
        "fraction_name", 
        # title="fraction",
        title=None,
        sort=list(fraction_map.values()), 
        scale=alt.Scale(domain=list(fraction_map.values()), range=["#3182bd","#6baed6","#9ecae1","#e6550d","#d62728"])
    ),
    xOffset=alt.XOffset("fraction_name",sort=list(fraction_map.values()))
  )
 .properties(
     # width=alt.Step(25),
     width=100,
     height=100
 )
 .facet(column=alt.Column('Scale', sort=['1x','1/100x'], title=None))
 .resolve_scale(x='independent',xOffset='independent')
 .configure(
     point=dict(
         size=FONT_SIZE*2,
         strokeWidth=2,
     ),
     line=dict(
         # size=FONT_SIZE,
         strokeWidth=1,
     ),
     header=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE,
         labelPadding=FONT_SIZE//2,
     ),
     axis=dict(
         labelFontSize=FONT_SIZE,
         titleFontSize=FONT_SIZE
     ),
     legend=dict(
         titleFontSize=FONT_SIZE,
         labelFontSize=FONT_SIZE,
         symbolSize=FONT_SIZE*2,
         padding=-FONT_SIZE
     )
 )
)

In [23]:
dff = data.query("Scale == '1x' & r == 7 & fraction != 'i'")

from scipy.stats import ttest_ind

titers_S =  dff.loc[dff['fraction']=='+',    'C_log(pfu/mL)']
titers_CS = dff.loc[dff['fraction']=='CS3-', 'C_log(pfu/mL)']

ttest_ind(titers_S.values, titers_CS.values, alternative='greater')

Ttest_indResult(statistic=3.735044712010297, pvalue=0.00021515793406050765)

In [24]:
data.groupby(['Scale','r','fraction_name'])['C_log(pfu/mL)'].count()

Scale   r  fraction_name        
1/100x  6  CS #1 cell eluent        12
           CS #2 cell eluent        12
           CS #3 cell eluent        12
           selection cell eluent    36
        7  CS #3 cell eluent        24
           input phage              24
           selection cell eluent    36
1x      5  CS #1 cell eluent        24
           CS #2 cell eluent        24
           CS #3 cell eluent        24
           input phage              24
           selection cell eluent    24
        6  CS #3 cell eluent        12
           input phage              24
           selection cell eluent    24
        7  CS #3 cell eluent        24
           input phage              24
           selection cell eluent    36
Name: C_log(pfu/mL), dtype: int64

In [25]:
df = (data[['r','sample_full','fraction_name','C_log(pfu/mL)','Scale']]
 .rename(columns={
     'sample_full': 'selection name',
     'fraction_name': 'phage pool',
     'r':'round',
     'C_log(pfu/mL)': 'phagemid titer [log10(pfu/mL) on carbenicillin]'
 })
)

In [26]:
for scale in ['1x','1/100x']:
    fn = f"../output/fig-suppl-titers/panning-extended-{scale.replace('/','_')}.csv"
    (df.query(f"Scale == '{scale}'").drop(columns=['Scale'])
     .to_csv(fn, index=False)
    )
    print(fn)

../output/fig-suppl-titers/panning-extended-1x.csv
../output/fig-suppl-titers/panning-extended-1_100x.csv
