diff --git a/Snakefile b/Snakefile
index 340df54..5e049e8 100644
--- a/Snakefile
+++ b/Snakefile
@@ -86,6 +86,7 @@ rule make_summary:
homolog_escape=nb_markdown('homolog_escape.ipynb'),
make_supp_data=nb_markdown('make_supp_data.ipynb'),
structural_contacts='results/summary/annotate_structural_contacts.md',
+ custom_plots='results/summary/custom-plots_LY555.md',
output:
summary = os.path.join(config['summary_dir'], 'summary.md')
run:
@@ -146,6 +147,8 @@ rule make_summary:
which are [here]({path(config['supp_data_dir'])}). These include
`dms-view` input files.
+ 16. Make custom plots for this project that fall out of the core pipeline, summarzied in [this notebook]({path(input.custom_plots)})
+
"""
).strip())
@@ -158,6 +161,26 @@ rule make_rulegraph:
shell:
"snakemake --forceall --rulegraph | dot -Tsvg > {output}"
+rule custom_plots:
+ input:
+ config['escape_fracs'],
+ config['gisaid_mutation_counts']
+ output:
+ md='results/summary/custom-plots_LY555.md',
+ md_files=directory('results/summary/custom-plots_LY555_files')
+ envmodules:
+ 'R/3.6.2-foss-2019b'
+ params:
+ nb='custom-plots_LY555.Rmd',
+ md='custom-plots_LY555.md',
+ md_files='custom-plots_LY555_files'
+ shell:
+ """
+ R -e \"rmarkdown::render(input=\'{params.nb}\')\";
+ mv {params.md} {output.md};
+ mv {params.md_files} {output.md_files}
+ """
+
rule structural_contacts:
input:
config['structural_contacts_config']
diff --git a/cluster.yaml b/cluster.yaml
index 44537a2..ae0f3ff 100644
--- a/cluster.yaml
+++ b/cluster.yaml
@@ -31,3 +31,9 @@ structural_contacts:
time: 0-1
mem: 50000
constraint: gizmok
+
+custom_plots:
+ cpus: 4
+ time: 0-1
+ mem: 50000
+ constraint: gizmok
diff --git a/config.yaml b/config.yaml
index 1e66fd9..4550310 100644
--- a/config.yaml
+++ b/config.yaml
@@ -179,3 +179,4 @@ structural_contacts_dir: results/structural_contacts
structural_contacts: results/structural_contacts/structural_contacts.csv
antibody_contacts: results/structural_contacts/antibody_contacts.csv
escape_selections_dir: results/escape_selections
+custom_plots_dir: results/custom_plots_LY
\ No newline at end of file
diff --git a/custom_analyses/custom-plots_LY555.Rmd b/custom-plots_LY555.Rmd
similarity index 95%
rename from custom_analyses/custom-plots_LY555.Rmd
rename to custom-plots_LY555.Rmd
index fc64eba..e25f55f 100644
--- a/custom_analyses/custom-plots_LY555.Rmd
+++ b/custom-plots_LY555.Rmd
@@ -28,13 +28,13 @@ if(any(installed_packages == F)){
invisible(lapply(packages, library, character.only=T))
#read in config file
-config <- read_yaml("../config.yaml")
+config <- read_yaml("./config.yaml")
#read in escape profiles file
-profiles_config <- read_yaml(file=paste("../",config$escape_profiles_config,sep=""))
+profiles_config <- read_yaml(file=config$escape_profiles_config)
#make output directory
-output_dir <- "../results/custom_plots_LY"
+output_dir <- config$custom_plots_dir
if(!file.exists(output_dir)){
dir.create(file.path(output_dir))
}
@@ -49,7 +49,7 @@ sessionInfo()
Read in escape fractions, rename some things to make them easier to work with.
```{r data input}
-scores <- data.table(read.csv(file=paste("../",config$escape_fracs,sep=""),stringsAsFactors=F))
+scores <- data.table(read.csv(file=config$escape_fracs,stringsAsFactors=F))
scores <- scores[selection %in% names(profiles_config$`LY_cocktail`$conditions) & library=="average", .(selection,condition,site,protein_site,wildtype,mutation,mut_escape_frac_epistasis_model,site_total_escape_frac_epistasis_model)]
@@ -72,7 +72,7 @@ We read in table reporting circulating variants. We add new columns to our data
```{r mutation_escape_v_freq, echo=T, fig.width=10, fig.height=3.75, fig.align="center", dpi=300,dev="png"}
#read in table giving mutant frequencies on GISAID
-counts <- read.csv(paste("../",config$gisaid_mutation_counts,sep=""),stringsAsFactors=F)
+counts <- read.csv(config$gisaid_mutation_counts,stringsAsFactors=F)
#add to scores table
scores[,count:=0];scores[,n_countries:=0];scores[,frequency:=0]
for(i in 1:nrow(counts)){
@@ -108,7 +108,7 @@ For contextualizing ACE2 and expression deficits of antigenic mutations, what is
```{r GISAID_DFE, echo=T, fig.width=7, fig.height=4, fig.align="center", dpi=300,dev="png"}
#read in DMS scores
-dms <- data.table(read.csv(file=paste("../",config$mut_bind_expr,sep=""),stringsAsFactors = F))
+dms <- data.table(read.csv(file=config$mut_bind_expr,stringsAsFactors = F))
setnames(dms,"site_SARS2","site")
dms <- dms[mutant != "*",]
dms <- dms[as.character(wildtype) != as.character(mutant),]
@@ -160,8 +160,8 @@ We want to try a few plots -- at the per-mut and per-site level, compare distrib
First, load pdbs, and generate list of residues that are <4A or 4-8A from each mAb
```{r define_structural_contacts}
-pdb_LY555 <- read.pdb(file="../data/pdbs/7kmg_single.pdb")
-pdb_LY016 <- read.pdb(file="../data/pdbs/7c01_single.pdb")
+pdb_LY555 <- read.pdb(file="./data/pdbs/7kmg_single.pdb")
+pdb_LY016 <- read.pdb(file="./data/pdbs/7c01_single.pdb")
contacts_LY555_4A <- binding.site(pdb_LY555,
a.inds=atom.select(pdb_LY555, chain="C"),
diff --git a/custom_analyses/README.md b/custom_analyses/README.md
deleted file mode 100755
index 723c995..0000000
--- a/custom_analyses/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Custom analyses
-
-This directory contains custom analyses for specific sets of antibodies or sera.
-
-## Analyses in this directory:
-### For Bjorkman / Barnes antibodies:
-#### Write pymol commmands to make PSEs
-The notebook [write_pymol_commands.ipynb](./write_pymol_commands.ipynb) takes as input the pdb files output by `output_pdbs.ipynb`, the escape sites called by `call_strong_escape_sites.ipynb`, and the contact sites called by `annotate_structural_contacts.Rmd`.
-
-It writes text files that contain commands that can be pasted into the PyMol command line to create a PSE that can be used for further structure-gazing and manual analysis.
-
-This notebook might work for other antibodies, but I haven't tested it.
-
-### `custom-plots_xxx` scripts
-These scripts make customized plots for different antibody sets that vary from the primary computational pipeline. They are currently just ran manually in R, but when each `xxx` antibody set splits off to its own repo, these can be incorporated into the `Snakemake` pipeline.
\ No newline at end of file
diff --git a/custom_analyses/write_pymol_commands.ipynb b/custom_analyses/write_pymol_commands.ipynb
deleted file mode 100755
index 4ea0289..0000000
--- a/custom_analyses/write_pymol_commands.ipynb
+++ /dev/null
@@ -1,445 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Make text files with PyMol commands\n",
- "Make text files with commands that can be run in PyMol to read in PDB files with reassigned b-factors according to antibody escape.\n",
- "\n",
- "This is specifically designed for the Barnes et al. structures of S2P trimer with Fab, but I think I have set up the config such that it could work for RBD-Fab alone."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "from IPython.display import display, HTML\n",
- "import pandas as pd\n",
- "import yaml"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Read in configuration and PSE config:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Reading PSE specs from ../data/pse_config.yaml\n"
- ]
- }
- ],
- "source": [
- "with open('../config.yaml') as f:\n",
- " config = yaml.safe_load(f)\n",
- " \n",
- "print(f\"Reading PSE specs from {config['pse_config']}\")\n",
- "with open(config['pse_config']) as f:\n",
- " pse_config = yaml.safe_load(f)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Make output directory"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "os.makedirs(config['pse_dir'], exist_ok=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Define some global parameters:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "outprefix = 'pymol_commands' # make a separate text file for each\n",
- "\n",
- "# all maps should also be aligned to 6m0j\n",
- "ACE2_pdb = '6m0j'\n",
- "\n",
- "# view1\n",
- "view1 = \"\"\"\\nset_view (\\\n",
- " 0.833226204, -0.528404295, 0.162850752,\\\n",
- " -0.477664202, -0.539514899, 0.693361998,\\\n",
- " -0.278514773, -0.655512929, -0.701937377,\\\n",
- " 0.000000000, -0.000000000, -286.698059082,\\\n",
- " -23.955575943, 16.451217651, 0.807323456,\\\n",
- " -15130.423828125, 15703.833984375, -20.000000000 )\"\"\"\n",
- "\n",
- "# which set of data to show, and how to color surfaces.\n",
- "pymol_specs = {\n",
- " 'metric' : 'max',\n",
- " 'color_min' : 'white',\n",
- " 'color_max' : 'red',\n",
- " 'view' : view1,\n",
- " }"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Write function to perform write the same generic commands to text file for every antibody:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "def initialize_commands():\n",
- " text = \"\"\"\n",
- "#commands to load pdbs\n",
- "\n",
- "reinitialize\n",
- "set seq_view, 0\n",
- "set ray_shadows, 0\n",
- "set spec_reflect, 1\n",
- "set spec_power, 100000\n",
- "set sphere_scale, 0.75\n",
- "\n",
- "load 6m0j_b-factor-mean-bind.pdb\n",
- "load 6m0j_b-factor-mean-expr.pdb\n",
- "\n",
- "# create ACE2, RBD_bind, RBD_expr\n",
- "# hide all\n",
- "create ACE2, 6m0j_b-factor-mean-bind and chain A\n",
- "create RBD_bind, 6m0j_b-factor-mean-bind and chain E; remove RBD_bind and chain A\n",
- "create RBD_expr, 6m0j_b-factor-mean-expr and chain E; remove RBD_expr and chain A\n",
- "delete 6m0j_b-factor-mean-bind\n",
- "delete 6m0j_b-factor-mean-expr\n",
- "\n",
- "# start showing stuff\n",
- "show cartoon, ACE2; color gray20, ACE2; set cartoon_transparency, 0.5, ACE2\n",
- "show cartoon, RBD_bind; spectrum b, red white, RBD_bind, minimum=-2, maximum=0; show sticks, RBD_bind and resn NAG\n",
- "show cartoon, RBD_expr; spectrum b, red white, RBD_expr, minimum=-2, maximum=0; show sticks, RBD_expr and resn NAG\n",
- "\n",
- "\"\"\" \n",
- " f.write(text)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Write function to align trimer-Fab structure to 6m0j and to create Fab, RBD, and spike chains\n",
- "\n",
- "Might be able to use formatting like this [example](https://stackoverflow.com/questions/16162383/how-to-easily-write-a-multi-line-file-with-variables-python-2-6/16162599)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "def load_trimer_fab(ab, sln, pdb, metric, RBD_chains, heavy_chains, light_chains, trimer):\n",
- "\n",
- " f.write('# load Fab-trimer PDB and align structures\\n')\n",
- " f.write(f'load data/{ab}_{sln}_{pdb}_{metric}_escape.pdb\\n')\n",
- " f.write(f'set_name {ab}_{sln}_{pdb}_{metric}_escape, {pdb}\\n')\n",
- " f.write(f'align {pdb} and (chain {RBD_chains[0]} and resi 333-526), RBD_bind\\n\\n')\n",
- "\n",
- " f.write('# create Fab chains\\n')\n",
- " f.write(f'show_as cartoon, {pdb}; color deepblue, {pdb};\\n')\n",
- " \n",
- " for hc, lc in zip(heavy_chains, light_chains):\n",
- " f.write(f'create {hc}, {pdb} and chain {hc}; create {lc}, {pdb} and chain {lc}\\n')\n",
- " \n",
- " hc_string = ' or '.join(heavy_chains)\n",
- " lc_string = ' or '.join(light_chains)\n",
- " f.write(f'color deepblue, ({hc_string}); color marine, ({lc_string})\\n\\n')\n",
- "\n",
- " f.write('# create RBD chains\\n')\n",
- " for rbd in RBD_chains:\n",
- " f.write(f'create RBD_{rbd}, {pdb} and chain {rbd} and resi 333-526; show cartoon, RBD_{rbd}; show sticks, RBD_{rbd} and resn NAG; spectrum b, white red, RBD_{rbd}, minimum=0\\n')\n",
- " \n",
- " if trimer:\n",
- " spikes = ['spike_{0}'.format(rbd) for rbd in RBD_chains]\n",
- " spike_string = ' or '.join(spikes)\n",
- " f.write('\\n# create non-RBD spike chains\\n')\n",
- " for rbd in RBD_chains:\n",
- " f.write(f'create spike_{rbd}, {pdb} and chain {rbd} and resi 1-332+527-1253\\n')\n",
- " f.write(f'color slate, ({spike_string})\\n\\n')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Write function to show ACE2 contact sites as mesh"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "def ace2_contact(RBD_chains):\n",
- " rbds = ['RBD_{0}'.format(rbd) for rbd in RBD_chains]\n",
- " rbd_string = ' or '.join(rbds)\n",
- " \n",
- " f.write('# show ACE2 contact sites as mesh\\n')\n",
- " f.write(f'create ACE2_contacts, ({rbd_string}) and resi 417+446+449+453+455+456+475+486+487+489+493+496+498+500+501+502+505\\n')\n",
- " f.write(f'show mesh, ACE2_contacts; color gray40, ACE2_contacts\\n')\n",
- " f.write(f'sele ACE2_cont, ({rbd_string}) and resi 417+446+449+453+455+456+475+486+487+489+493+496+498+500+501+502+505\\n\\n')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Write function to create antibody contact sites"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "def fab_contacts(RBD_chains, rbd_fab, all_ct_sites):\n",
- " \"\"\"\n",
- " rbd_fab is a dictionary keyed by RBD chain (eg A, B, C)\n",
- " values are a list of the contact sites for that chain\n",
- " all_ct_sites is a list of sites that contact any RBD chain \n",
- " \"\"\"\n",
- " rbds = ['RBD_{0}'.format(rbd) for rbd in RBD_chains]\n",
- " rbd_string = ' or '.join(rbds)\n",
- " all_contacts = '+'.join(all_ct_sites)\n",
- " \n",
- " f.write('# create antibody contact sites and show as spheres\\n')\n",
- " for rbd, sites in rbd_fab.items():\n",
- " rbd_contacts = '+'.join(sites)\n",
- " f.write(f'sele RBD_{rbd}_ct, RBD_{rbd} and resi {rbd_contacts}\\n')\n",
- " f.write(f'show spheres, RBD_{rbd}_ct and name ca\\n')\n",
- " f.write(f'sele all_ct, ({rbd_string}) and resi {all_contacts}\\n')\n",
- " f.write(f'show spheres, all_ct and name ca\\n\\n')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Read in contact sites"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " name | \n",
- " pdb | \n",
- " chain | \n",
- " position | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " ACE2 | \n",
- " 6M0J | \n",
- " E | \n",
- " 417 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " ACE2 | \n",
- " 6M0J | \n",
- " E | \n",
- " 446 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " ACE2 | \n",
- " 6M0J | \n",
- " E | \n",
- " 449 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " ACE2 | \n",
- " 6M0J | \n",
- " E | \n",
- " 453 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " ACE2 | \n",
- " 6M0J | \n",
- " E | \n",
- " 455 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " name pdb chain position\n",
- "0 ACE2 6M0J E 417\n",
- "1 ACE2 6M0J E 446\n",
- "2 ACE2 6M0J E 449\n",
- "3 ACE2 6M0J E 453\n",
- "4 ACE2 6M0J E 455"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "contacts = pd.read_csv(os.path.join('../', config['structural_contacts']))\n",
- "contacts['position'] = contacts['position'].astype(str)\n",
- "contacts.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Loop through all the PDBs in the config and write output file."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "../results/pymol_commands/pymol_C105_6xcn.txt\n",
- "../results/pymol_commands/pymol_C105_6xcm.txt\n",
- "../results/pymol_commands/pymol_C144_7k90.txt\n",
- "../results/pymol_commands/pymol_C002_7k8s.txt\n",
- "../results/pymol_commands/pymol_C002_7k8t.txt\n",
- "../results/pymol_commands/pymol_C121_7k8x.txt\n",
- "../results/pymol_commands/pymol_C121_7k8y.txt\n",
- "../results/pymol_commands/pymol_C135_7k8z.txt\n",
- "../results/pymol_commands/pymol_C110_7k8v.txt\n"
- ]
- }
- ],
- "source": [
- "for name, specs in pse_config.items():\n",
- " pdb = name\n",
- " ab = specs[\"antibody\"]\n",
- " sln = specs[\"selection\"]\n",
- " RBD_chains = specs[\"RBD_chains\"]\n",
- " heavy_chains = specs[\"heavy_chains\"]\n",
- " light_chains = specs[\"light_chains\"]\n",
- " trimer = specs[\"trimer\"]\n",
- " \n",
- " # rbd_fab is a dict, keyed by RBD chain; values are list of the contact sites\n",
- " rbd_fab = ((contacts\n",
- " .query('name==@ab & pdb==@pdb'))\n",
- " [['chain', 'position']]\n",
- " .reset_index(drop=True)\n",
- " .groupby('chain')['position'].apply(list).to_dict()\n",
- " )\n",
- " \n",
- " # create list of all contact sites for a given antibody\n",
- " all_ct = []\n",
- " for v in rbd_fab.values():\n",
- " all_ct.extend(v)\n",
- " all_ct_sites = sorted(list(set(all_ct))) \n",
- " \n",
- " metric = pymol_specs['metric']\n",
- " \n",
- " outFile = os.path.join(config['pse_dir'], f'pymol_{ab}_{name}.txt')\n",
- " print(outFile)\n",
- " \n",
- " f = open(outFile, \"w\")\n",
- " f.write(f\"# PyMol commands for {ab} {name}\")\n",
- " initialize_commands()\n",
- " load_trimer_fab(ab, sln, pdb, metric, RBD_chains, heavy_chains, light_chains, trimer)\n",
- " ace2_contact(RBD_chains)\n",
- " fab_contacts(RBD_chains, rbd_fab, all_ct_sites)\n",
- " \n",
- " f.write(\"\"\"\n",
- "#view1\n",
- "set_view (\\\n",
- " 0.833226204, -0.528404295, 0.162850752,\\\n",
- " -0.477664202, -0.539514899, 0.693361998,\\\n",
- " -0.278514773, -0.655512929, -0.701937377,\\\n",
- " 0.000000000, -0.000000000, -286.698059082,\\\n",
- " -23.955575943, 16.451217651, 0.807323456,\\\n",
- " -15130.423828125, 15703.833984375, -20.000000000 )\n",
- " \n",
- "\"\"\")\n",
- " f.write(f'save {ab}_{pdb}.pse\\n')\n",
- " f.write(f'# png {ab}_{pdb}_view1.png, ray=1, 600, 600')\n",
- " \n",
- " f.close()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "language_info": {
- "name": "python"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/results/summary/custom-plots_LY555.md b/results/summary/custom-plots_LY555.md
new file mode 100644
index 0000000..866262f
--- /dev/null
+++ b/results/summary/custom-plots_LY555.md
@@ -0,0 +1,290 @@
+Custom analyses LY mAbs
+================
+Tyler Starr
+2/6/2021
+
+- [Data input and formatting](#data-input-and-formatting)
+- [Part 1: Circulating variants at the per-mut
+ level](#part-1-circulating-variants-at-the-per-mut-level)
+- [Part 2: Distribution of functional effects of mutations circulating
+ at different
+ levels](#part-2-distribution-of-functional-effects-of-mutations-circulating-at-different-levels)
+- [Part 3: escape within or without the structural
+ epitope](#part-3-escape-within-or-without-the-structural-epitope)
+
+This notebook does some random analyses on the clinical antibodies set
+that vary from the more constrained global pipeline.
+
+ require("knitr")
+ knitr::opts_chunk$set(echo = T)
+ knitr::opts_chunk$set(dev.args = list(png = list(type = "cairo")))
+
+ #list of packages to install/load
+ packages = c("yaml","data.table","tidyverse","ggrepel","bio3d","gridExtra")
+ #install any packages not already installed
+ installed_packages <- packages %in% rownames(installed.packages())
+ if(any(installed_packages == F)){
+ install.packages(packages[!installed_packages])
+ }
+ #load packages
+ invisible(lapply(packages, library, character.only=T))
+
+ #read in config file
+ config <- read_yaml("./config.yaml")
+
+ #read in escape profiles file
+ profiles_config <- read_yaml(file=config$escape_profiles_config)
+
+ #make output directory
+ output_dir <- config$custom_plots_dir
+ if(!file.exists(output_dir)){
+ dir.create(file.path(output_dir))
+ }
+
+Session info for reproducing environment:
+
+ sessionInfo()
+
+ ## R version 3.6.2 (2019-12-12)
+ ## Platform: x86_64-pc-linux-gnu (64-bit)
+ ## Running under: Ubuntu 18.04.4 LTS
+ ##
+ ## Matrix products: default
+ ## BLAS/LAPACK: /app/software/OpenBLAS/0.3.7-GCC-8.3.0/lib/libopenblas_haswellp-r0.3.7.so
+ ##
+ ## locale:
+ ## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
+ ## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
+ ## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
+ ## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
+ ## [9] LC_ADDRESS=C LC_TELEPHONE=C
+ ## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
+ ##
+ ## attached base packages:
+ ## [1] stats graphics grDevices utils datasets methods base
+ ##
+ ## other attached packages:
+ ## [1] gridExtra_2.3 bio3d_2.4-0 ggrepel_0.8.1 forcats_0.4.0
+ ## [5] stringr_1.4.0 dplyr_0.8.3 purrr_0.3.3 readr_1.3.1
+ ## [9] tidyr_1.0.0 tibble_3.0.2 ggplot2_3.3.0 tidyverse_1.3.0
+ ## [13] data.table_1.12.8 yaml_2.2.0 knitr_1.26
+ ##
+ ## loaded via a namespace (and not attached):
+ ## [1] tidyselect_1.1.0 xfun_0.11 haven_2.2.0 colorspace_1.4-1
+ ## [5] vctrs_0.3.1 generics_0.0.2 htmltools_0.4.0 rlang_0.4.7
+ ## [9] pillar_1.4.5 glue_1.3.1 withr_2.1.2 DBI_1.1.0
+ ## [13] dbplyr_1.4.2 modelr_0.1.5 readxl_1.3.1 lifecycle_0.2.0
+ ## [17] munsell_0.5.0 gtable_0.3.0 cellranger_1.1.0 rvest_0.3.5
+ ## [21] evaluate_0.14 parallel_3.6.2 fansi_0.4.0 broom_0.7.0
+ ## [25] Rcpp_1.0.3 scales_1.1.0 backports_1.1.5 jsonlite_1.6
+ ## [29] fs_1.3.1 hms_0.5.2 digest_0.6.23 stringi_1.4.3
+ ## [33] grid_3.6.2 cli_2.0.0 tools_3.6.2 magrittr_1.5
+ ## [37] crayon_1.3.4 pkgconfig_2.0.3 ellipsis_0.3.0 xml2_1.2.2
+ ## [41] reprex_0.3.0 lubridate_1.7.4 assertthat_0.2.1 rmarkdown_2.0
+ ## [45] httr_1.4.1 rstudioapi_0.10 R6_2.4.1 compiler_3.6.2
+
+Data input and formatting
+-------------------------
+
+Read in escape fractions, rename some things to make them easier to work
+with.
+
+ scores <- data.table(read.csv(file=config$escape_fracs,stringsAsFactors=F))
+
+ scores <- scores[selection %in% names(profiles_config$`LY_cocktail`$conditions) & library=="average", .(selection,condition,site,protein_site,wildtype,mutation,mut_escape_frac_epistasis_model,site_total_escape_frac_epistasis_model)]
+
+ setnames(scores,"mut_escape_frac_epistasis_model","mut_escape_frac");setnames(scores,"site_total_escape_frac_epistasis_model","site_total_escape")
+
+ scores[,antibody:=as.character(profiles_config$`LY_cocktail`$conditions[condition])]
+
+ scores <- scores[,.(antibody,site,protein_site,wildtype,mutation,mut_escape_frac,site_total_escape)]
+
+ scores[,site_max_escape:=max(mut_escape_frac,na.rm=T),by=c("antibody","site")]
+
+Part 1: Circulating variants at the per-mut level
+-------------------------------------------------
+
+Current notebook on circulating mutations considers all mutations at a
+site together, regardless of the escape conferred by the particular
+mutation that is circulating. Make plots at the per-mut level.
+
+We read in table reporting circulating variants. We add new columns to
+our data frame indicating the nobs and frequency on GISAID, and the
+number of countries in which a mutant has been observed. Then, for each
+antibody, we plot per-mutation escape fraction versus frequency (log10),
+with a ‘pseudo-frequency’ of 0.1x the lowest actual frequency, to enable
+log10 plotting)
+
+ #read in table giving mutant frequencies on GISAID
+ counts <- read.csv(config$gisaid_mutation_counts,stringsAsFactors=F)
+ #add to scores table
+ scores[,count:=0];scores[,n_countries:=0];scores[,frequency:=0]
+ for(i in 1:nrow(counts)){
+ scores[protein_site==counts[i,"site"] & mutation==counts[i,"mutant"],count:=counts[i,"count"]]
+ scores[protein_site==counts[i,"site"] & mutation==counts[i,"mutant"],n_countries:=counts[i,"n_countries"]]
+ scores[protein_site==counts[i,"site"] & mutation==counts[i,"mutant"],frequency:=counts[i,"frequency"]]
+ }
+
+ #set factor order for antibodies
+ scores$antibody <- factor(scores$antibody,levels=c("LY-CoV555","LY-CoV016","LY-CoV555+LY-CoV016"))
+
+ scores[,pseudo_frequency:=frequency]
+ scores[frequency==0,pseudo_frequency:=0.1*min(scores[frequency>0,frequency])]
+
+ p1 <- ggplot(scores)+aes(x=pseudo_frequency,y=mut_escape_frac)+
+ geom_point(shape=16, alpha=0.5, size=2.25)+
+ facet_wrap(~antibody,nrow=1)+
+ scale_x_continuous(trans="log10")+
+ scale_y_continuous(limits=c(0,1.05))+
+ theme_classic()+
+ xlab('mutant frequency on GISAID (log10 scale)')+
+ ylab('mutant escape fraction')+
+ geom_text_repel(aes(label=ifelse((mut_escape_frac>0.15 & frequency>1e-6) | (mut_escape_frac>0.05 & frequency>1e-3),as.character(paste(wildtype,protein_site,mutation,sep="")),'')),size=3,color="gray40")
+ p1
+
+
+
+ invisible(dev.print(pdf, paste(output_dir,"/circ-mut-scatter_mAbs_LY.pdf",sep="")))
+
+Part 2: Distribution of functional effects of mutations circulating at different levels
+---------------------------------------------------------------------------------------
+
+For contextualizing ACE2 and expression deficits of antigenic mutations,
+what is the distribution of these phenotypes among sequences on GISAID?
+
+ #read in DMS scores
+ dms <- data.table(read.csv(file=config$mut_bind_expr,stringsAsFactors = F))
+ setnames(dms,"site_SARS2","site")
+ dms <- dms[mutant != "*",]
+ dms <- dms[as.character(wildtype) != as.character(mutant),]
+
+ dms[,count:=0]
+ for(i in 1:nrow(counts)){
+ dms[site==counts[i,"site"] & mutant==counts[i,"mutant"],count:=counts[i,"count"]]
+ }
+
+ #define factor for different nobs cutoffs, and collate into long data table for plotting
+ muts_temp <- dms
+ muts_temp$nobs_indicator<-factor("all muts",levels=c("all muts",">=1", ">=10",">=25"))
+ muts_temp_add0 <- dms[count>=1, ]
+ muts_temp_add0$nobs_indicator<-factor(">=1",levels=c("all muts",">=1", ">=10",">=25"))
+ muts_temp_add10 <- dms[count>=10, ]
+ muts_temp_add10$nobs_indicator<-factor(">=10",levels=c("all muts",">=1", ">=10",">=25"))
+ muts_temp_add25 <- dms[count>=25, ]
+ muts_temp_add25$nobs_indicator<-factor(">=25",levels=c("all muts",">=1", ">=10",">=25"))
+
+ muts_temp <- rbind(muts_temp,muts_temp_add0,muts_temp_add10,muts_temp_add25)
+ muts_temp$nobs_indicator <- factor(muts_temp$nobs_indicator, levels=c("all muts",">=1", ">=10",">=25"))
+
+ set.seed(198)
+ p1 <- ggplot(muts_temp[!is.na(muts_temp$bind_avg),],aes(x=nobs_indicator,y=bind_avg))+
+ geom_boxplot(outlier.shape=16, width=0.4, outlier.alpha=0.5)+
+ #geom_jitter(width=0.2, alpha=0.1, shape=16)+
+ xlab("mutation count in GISAID sequences")+ylab("mutation effect on binding")+
+ theme_classic()+geom_hline(yintercept=-2.35, linetype = 'dotted', col = 'gray50')+
+ geom_hline(yintercept=0,linetype='dashed',col='gray50')
+
+ p2 <- ggplot(muts_temp[!is.na(muts_temp$expr_avg),],aes(x=nobs_indicator,y=expr_avg))+
+ geom_boxplot(outlier.shape=16, width=0.4, outlier.alpha=0.5)+
+ #geom_jitter(width=0.2, alpha=0.1, shape=16)+
+ xlab("mutation count in GISAID sequences")+ylab("mutation effect on expression")+
+ theme_classic()+geom_hline(yintercept=-1, linetype = 'dotted', col = 'gray50')+
+ geom_hline(yintercept=0,linetype='dashed',col='gray50')
+
+ grid.arrange(p1,p2,ncol=2)
+
+
+
+ invisible(dev.print(pdf, paste(output_dir,"/distribution-ACE2-expr-dms_v_nobs-GISAID.pdf",sep="")))
+
+Part 3: escape within or without the structural epitope
+-------------------------------------------------------
+
+We want to try a few plots – at the per-mut and per-site level, compare
+distribution of escape scores between structural contacts and
+non-contacts. We should try the contact/non-contact, but also a “close”
+category corresponding to 4-8A distance? Means we’ll probably want to
+re-do the structural analysis here within this “custom analyses” script.
+
+First, load pdbs, and generate list of residues that are <4A or 4-8A
+from each mAb
+
+ pdb_LY555 <- read.pdb(file="./data/pdbs/7kmg_single.pdb")
+
+ ## PDB has ALT records, taking A only, rm.alt=TRUE
+
+ pdb_LY016 <- read.pdb(file="./data/pdbs/7c01_single.pdb")
+
+ contacts_LY555_4A <- binding.site(pdb_LY555,
+ a.inds=atom.select(pdb_LY555, chain="C"),
+ b.inds=atom.select(pdb_LY555,chain=c("A","B")),
+ cutoff=4,hydrogens=F)$resno
+ contacts_LY555_4to8A <- binding.site(pdb_LY555,
+ a.inds=atom.select(pdb_LY555, chain="C"),
+ b.inds=atom.select(pdb_LY555,chain=c("B","D")),
+ cutoff=8,hydrogens=F)$resno
+ contacts_LY555_4to8A <- contacts_LY555_4to8A[!(contacts_LY555_4to8A %in% contacts_LY555_4A)]
+
+
+ contacts_LY016_4A <- binding.site(pdb_LY016,
+ a.inds=atom.select(pdb_LY016, chain="A"),
+ b.inds=atom.select(pdb_LY016,chain=c("H","L")),
+ cutoff=4,hydrogens=F)$resno
+ contacts_LY016_4to8A <- binding.site(pdb_LY016,
+ a.inds=atom.select(pdb_LY016, chain="A"),
+ b.inds=atom.select(pdb_LY016,chain=c("H","L")),
+ cutoff=8,hydrogens=F)$resno
+ contacts_LY016_4to8A <- contacts_LY016_4to8A[!(contacts_LY016_4to8A %in% contacts_LY016_4A)]
+
+Next, make jitterplots showing site total escape for the three
+categories of structural class.
+
+ temp <- scores[antibody %in% c("LY-CoV016","LY-CoV555"),]
+ #add column whether a site is in direct contact, proximal, or distal to interface
+ temp[,contact:="distal"]
+ temp[antibody=="LY-CoV016" & protein_site %in% contacts_LY016_4A,contact:="contact"]
+ temp[antibody=="LY-CoV016" & protein_site %in% contacts_LY016_4to8A,contact:="proximal"]
+ temp[antibody=="LY-CoV555" & protein_site %in% contacts_LY555_4A,contact:="contact"]
+ temp[antibody=="LY-CoV555" & protein_site %in% contacts_LY555_4to8A,contact:="proximal"]
+ temp$contact <- factor(temp$contact,levels=c("contact","proximal","distal"))
+
+ #add value indicating the heuristic cutoff for "strong" escape for each mAb
+ cutoff_LYCoV016 <- max(5*median(temp[antibody=="LY-CoV016",site_total_escape]),0.05*max(temp[antibody=="LY-CoV016",site_total_escape]))
+ cutoff_LYCoV555 <- max(5*median(temp[antibody=="LY-CoV555",site_total_escape]),0.05*max(temp[antibody=="LY-CoV555",site_total_escape]))
+
+ temp[antibody=="LY-CoV016",cutoff:=cutoff_LYCoV016]
+ temp[antibody=="LY-CoV555",cutoff:=cutoff_LYCoV555]
+
+ #sitewise
+ ggplot(unique(temp[,.(antibody,protein_site,site_total_escape,contact)]),aes(x=contact,y=site_total_escape))+
+ geom_jitter(width=0.15, alpha=0.5,height=0,size=2.25,shape=16)+
+ geom_hline(data=unique(temp[,.(antibody,cutoff)]),aes(yintercept=cutoff),linetype="dashed",color="gray50")+
+ theme_classic()+
+ theme(axis.text.x=element_text(angle=90,hjust=1))+
+ xlab("structural context")+ylab("sitewise total escape")+
+ facet_wrap(~antibody,nrow=1)
+
+
+
+ invisible(dev.print(pdf, paste(output_dir,"/escape_by_contact_type.pdf",sep="")))
+
+Table with number of sites in each structural category, and number of
+sites of “strong” escape
+
+ temp <- unique(temp[,.(antibody,protein_site,site_total_escape,contact,cutoff)])
+
+ dt <- data.frame(antibody=c("LY-CoV555","LY-CoV016"),n_contact=NA,n_contact_escape=NA,n_proximal=NA,n_proximal_escape=NA,n_distal=NA,n_distal_escape=NA)
+ for(i in 1:nrow(dt)){
+ dt$n_contact[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="contact",])
+ dt$n_contact_escape[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="contact" & site_total_escape>cutoff,])
+ dt$n_proximal[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="proximal",])
+ dt$n_proximal_escape[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="proximal" & site_total_escape>cutoff,])
+ dt$n_distal[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="distal",])
+ dt$n_distal_escape[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="distal" & site_total_escape>cutoff,])
+ }
+ kable(dt)
+
+| antibody | n\_contact | n\_contact\_escape | n\_proximal | n\_proximal\_escape | n\_distal | n\_distal\_escape |
+|:----------|-----------:|-------------------:|------------:|--------------------:|----------:|------------------:|
+| LY-CoV555 | 15 | 7 | 3 | 0 | 155 | 2 |
+| LY-CoV016 | 23 | 15 | 23 | 3 | 128 | 1 |
diff --git a/results/summary/custom-plots_LY555_files/figure-gfm/GISAID_DFE-1.png b/results/summary/custom-plots_LY555_files/figure-gfm/GISAID_DFE-1.png
new file mode 100644
index 0000000..4f84806
Binary files /dev/null and b/results/summary/custom-plots_LY555_files/figure-gfm/GISAID_DFE-1.png differ
diff --git a/results/summary/custom-plots_LY555_files/figure-gfm/mutation_escape_v_freq-1.png b/results/summary/custom-plots_LY555_files/figure-gfm/mutation_escape_v_freq-1.png
new file mode 100644
index 0000000..b7d6b08
Binary files /dev/null and b/results/summary/custom-plots_LY555_files/figure-gfm/mutation_escape_v_freq-1.png differ
diff --git a/results/summary/custom-plots_LY555_files/figure-gfm/plot_total_escape_contacts-1.png b/results/summary/custom-plots_LY555_files/figure-gfm/plot_total_escape_contacts-1.png
new file mode 100644
index 0000000..f097096
Binary files /dev/null and b/results/summary/custom-plots_LY555_files/figure-gfm/plot_total_escape_contacts-1.png differ
diff --git a/results/summary/rulegraph.svg b/results/summary/rulegraph.svg
index 6307fba..01de77f 100644
--- a/results/summary/rulegraph.svg
+++ b/results/summary/rulegraph.svg
@@ -4,49 +4,49 @@
-