diff --git a/Snakefile b/Snakefile index 340df54..5e049e8 100644 --- a/Snakefile +++ b/Snakefile @@ -86,6 +86,7 @@ rule make_summary: homolog_escape=nb_markdown('homolog_escape.ipynb'), make_supp_data=nb_markdown('make_supp_data.ipynb'), structural_contacts='results/summary/annotate_structural_contacts.md', + custom_plots='results/summary/custom-plots_LY555.md', output: summary = os.path.join(config['summary_dir'], 'summary.md') run: @@ -146,6 +147,8 @@ rule make_summary: which are [here]({path(config['supp_data_dir'])}). These include `dms-view` input files. + 16. Make custom plots for this project that fall out of the core pipeline, summarzied in [this notebook]({path(input.custom_plots)}) + """ ).strip()) @@ -158,6 +161,26 @@ rule make_rulegraph: shell: "snakemake --forceall --rulegraph | dot -Tsvg > {output}" +rule custom_plots: + input: + config['escape_fracs'], + config['gisaid_mutation_counts'] + output: + md='results/summary/custom-plots_LY555.md', + md_files=directory('results/summary/custom-plots_LY555_files') + envmodules: + 'R/3.6.2-foss-2019b' + params: + nb='custom-plots_LY555.Rmd', + md='custom-plots_LY555.md', + md_files='custom-plots_LY555_files' + shell: + """ + R -e \"rmarkdown::render(input=\'{params.nb}\')\"; + mv {params.md} {output.md}; + mv {params.md_files} {output.md_files} + """ + rule structural_contacts: input: config['structural_contacts_config'] diff --git a/cluster.yaml b/cluster.yaml index 44537a2..ae0f3ff 100644 --- a/cluster.yaml +++ b/cluster.yaml @@ -31,3 +31,9 @@ structural_contacts: time: 0-1 mem: 50000 constraint: gizmok + +custom_plots: + cpus: 4 + time: 0-1 + mem: 50000 + constraint: gizmok diff --git a/config.yaml b/config.yaml index 1e66fd9..4550310 100644 --- a/config.yaml +++ b/config.yaml @@ -179,3 +179,4 @@ structural_contacts_dir: results/structural_contacts structural_contacts: results/structural_contacts/structural_contacts.csv antibody_contacts: results/structural_contacts/antibody_contacts.csv escape_selections_dir: results/escape_selections +custom_plots_dir: results/custom_plots_LY \ No newline at end of file diff --git a/custom_analyses/custom-plots_LY555.Rmd b/custom-plots_LY555.Rmd similarity index 95% rename from custom_analyses/custom-plots_LY555.Rmd rename to custom-plots_LY555.Rmd index fc64eba..e25f55f 100644 --- a/custom_analyses/custom-plots_LY555.Rmd +++ b/custom-plots_LY555.Rmd @@ -28,13 +28,13 @@ if(any(installed_packages == F)){ invisible(lapply(packages, library, character.only=T)) #read in config file -config <- read_yaml("../config.yaml") +config <- read_yaml("./config.yaml") #read in escape profiles file -profiles_config <- read_yaml(file=paste("../",config$escape_profiles_config,sep="")) +profiles_config <- read_yaml(file=config$escape_profiles_config) #make output directory -output_dir <- "../results/custom_plots_LY" +output_dir <- config$custom_plots_dir if(!file.exists(output_dir)){ dir.create(file.path(output_dir)) } @@ -49,7 +49,7 @@ sessionInfo() Read in escape fractions, rename some things to make them easier to work with. ```{r data input} -scores <- data.table(read.csv(file=paste("../",config$escape_fracs,sep=""),stringsAsFactors=F)) +scores <- data.table(read.csv(file=config$escape_fracs,stringsAsFactors=F)) scores <- scores[selection %in% names(profiles_config$`LY_cocktail`$conditions) & library=="average", .(selection,condition,site,protein_site,wildtype,mutation,mut_escape_frac_epistasis_model,site_total_escape_frac_epistasis_model)] @@ -72,7 +72,7 @@ We read in table reporting circulating variants. We add new columns to our data ```{r mutation_escape_v_freq, echo=T, fig.width=10, fig.height=3.75, fig.align="center", dpi=300,dev="png"} #read in table giving mutant frequencies on GISAID -counts <- read.csv(paste("../",config$gisaid_mutation_counts,sep=""),stringsAsFactors=F) +counts <- read.csv(config$gisaid_mutation_counts,stringsAsFactors=F) #add to scores table scores[,count:=0];scores[,n_countries:=0];scores[,frequency:=0] for(i in 1:nrow(counts)){ @@ -108,7 +108,7 @@ For contextualizing ACE2 and expression deficits of antigenic mutations, what is ```{r GISAID_DFE, echo=T, fig.width=7, fig.height=4, fig.align="center", dpi=300,dev="png"} #read in DMS scores -dms <- data.table(read.csv(file=paste("../",config$mut_bind_expr,sep=""),stringsAsFactors = F)) +dms <- data.table(read.csv(file=config$mut_bind_expr,stringsAsFactors = F)) setnames(dms,"site_SARS2","site") dms <- dms[mutant != "*",] dms <- dms[as.character(wildtype) != as.character(mutant),] @@ -160,8 +160,8 @@ We want to try a few plots -- at the per-mut and per-site level, compare distrib First, load pdbs, and generate list of residues that are <4A or 4-8A from each mAb ```{r define_structural_contacts} -pdb_LY555 <- read.pdb(file="../data/pdbs/7kmg_single.pdb") -pdb_LY016 <- read.pdb(file="../data/pdbs/7c01_single.pdb") +pdb_LY555 <- read.pdb(file="./data/pdbs/7kmg_single.pdb") +pdb_LY016 <- read.pdb(file="./data/pdbs/7c01_single.pdb") contacts_LY555_4A <- binding.site(pdb_LY555, a.inds=atom.select(pdb_LY555, chain="C"), diff --git a/custom_analyses/README.md b/custom_analyses/README.md deleted file mode 100755 index 723c995..0000000 --- a/custom_analyses/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Custom analyses - -This directory contains custom analyses for specific sets of antibodies or sera. - -## Analyses in this directory: -### For Bjorkman / Barnes antibodies: -#### Write pymol commmands to make PSEs -The notebook [write_pymol_commands.ipynb](./write_pymol_commands.ipynb) takes as input the pdb files output by `output_pdbs.ipynb`, the escape sites called by `call_strong_escape_sites.ipynb`, and the contact sites called by `annotate_structural_contacts.Rmd`. - -It writes text files that contain commands that can be pasted into the PyMol command line to create a PSE that can be used for further structure-gazing and manual analysis. - -This notebook might work for other antibodies, but I haven't tested it. - -### `custom-plots_xxx` scripts -These scripts make customized plots for different antibody sets that vary from the primary computational pipeline. They are currently just ran manually in R, but when each `xxx` antibody set splits off to its own repo, these can be incorporated into the `Snakemake` pipeline. \ No newline at end of file diff --git a/custom_analyses/write_pymol_commands.ipynb b/custom_analyses/write_pymol_commands.ipynb deleted file mode 100755 index 4ea0289..0000000 --- a/custom_analyses/write_pymol_commands.ipynb +++ /dev/null @@ -1,445 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Make text files with PyMol commands\n", - "Make text files with commands that can be run in PyMol to read in PDB files with reassigned b-factors according to antibody escape.\n", - "\n", - "This is specifically designed for the Barnes et al. structures of S2P trimer with Fab, but I think I have set up the config such that it could work for RBD-Fab alone." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from IPython.display import display, HTML\n", - "import pandas as pd\n", - "import yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Read in configuration and PSE config:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Reading PSE specs from ../data/pse_config.yaml\n" - ] - } - ], - "source": [ - "with open('../config.yaml') as f:\n", - " config = yaml.safe_load(f)\n", - " \n", - "print(f\"Reading PSE specs from {config['pse_config']}\")\n", - "with open(config['pse_config']) as f:\n", - " pse_config = yaml.safe_load(f)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make output directory" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "os.makedirs(config['pse_dir'], exist_ok=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Define some global parameters:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "outprefix = 'pymol_commands' # make a separate text file for each\n", - "\n", - "# all maps should also be aligned to 6m0j\n", - "ACE2_pdb = '6m0j'\n", - "\n", - "# view1\n", - "view1 = \"\"\"\\nset_view (\\\n", - " 0.833226204, -0.528404295, 0.162850752,\\\n", - " -0.477664202, -0.539514899, 0.693361998,\\\n", - " -0.278514773, -0.655512929, -0.701937377,\\\n", - " 0.000000000, -0.000000000, -286.698059082,\\\n", - " -23.955575943, 16.451217651, 0.807323456,\\\n", - " -15130.423828125, 15703.833984375, -20.000000000 )\"\"\"\n", - "\n", - "# which set of data to show, and how to color surfaces.\n", - "pymol_specs = {\n", - " 'metric' : 'max',\n", - " 'color_min' : 'white',\n", - " 'color_max' : 'red',\n", - " 'view' : view1,\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Write function to perform write the same generic commands to text file for every antibody:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def initialize_commands():\n", - " text = \"\"\"\n", - "#commands to load pdbs\n", - "\n", - "reinitialize\n", - "set seq_view, 0\n", - "set ray_shadows, 0\n", - "set spec_reflect, 1\n", - "set spec_power, 100000\n", - "set sphere_scale, 0.75\n", - "\n", - "load 6m0j_b-factor-mean-bind.pdb\n", - "load 6m0j_b-factor-mean-expr.pdb\n", - "\n", - "# create ACE2, RBD_bind, RBD_expr\n", - "# hide all\n", - "create ACE2, 6m0j_b-factor-mean-bind and chain A\n", - "create RBD_bind, 6m0j_b-factor-mean-bind and chain E; remove RBD_bind and chain A\n", - "create RBD_expr, 6m0j_b-factor-mean-expr and chain E; remove RBD_expr and chain A\n", - "delete 6m0j_b-factor-mean-bind\n", - "delete 6m0j_b-factor-mean-expr\n", - "\n", - "# start showing stuff\n", - "show cartoon, ACE2; color gray20, ACE2; set cartoon_transparency, 0.5, ACE2\n", - "show cartoon, RBD_bind; spectrum b, red white, RBD_bind, minimum=-2, maximum=0; show sticks, RBD_bind and resn NAG\n", - "show cartoon, RBD_expr; spectrum b, red white, RBD_expr, minimum=-2, maximum=0; show sticks, RBD_expr and resn NAG\n", - "\n", - "\"\"\" \n", - " f.write(text)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Write function to align trimer-Fab structure to 6m0j and to create Fab, RBD, and spike chains\n", - "\n", - "Might be able to use formatting like this [example](https://stackoverflow.com/questions/16162383/how-to-easily-write-a-multi-line-file-with-variables-python-2-6/16162599)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def load_trimer_fab(ab, sln, pdb, metric, RBD_chains, heavy_chains, light_chains, trimer):\n", - "\n", - " f.write('# load Fab-trimer PDB and align structures\\n')\n", - " f.write(f'load data/{ab}_{sln}_{pdb}_{metric}_escape.pdb\\n')\n", - " f.write(f'set_name {ab}_{sln}_{pdb}_{metric}_escape, {pdb}\\n')\n", - " f.write(f'align {pdb} and (chain {RBD_chains[0]} and resi 333-526), RBD_bind\\n\\n')\n", - "\n", - " f.write('# create Fab chains\\n')\n", - " f.write(f'show_as cartoon, {pdb}; color deepblue, {pdb};\\n')\n", - " \n", - " for hc, lc in zip(heavy_chains, light_chains):\n", - " f.write(f'create {hc}, {pdb} and chain {hc}; create {lc}, {pdb} and chain {lc}\\n')\n", - " \n", - " hc_string = ' or '.join(heavy_chains)\n", - " lc_string = ' or '.join(light_chains)\n", - " f.write(f'color deepblue, ({hc_string}); color marine, ({lc_string})\\n\\n')\n", - "\n", - " f.write('# create RBD chains\\n')\n", - " for rbd in RBD_chains:\n", - " f.write(f'create RBD_{rbd}, {pdb} and chain {rbd} and resi 333-526; show cartoon, RBD_{rbd}; show sticks, RBD_{rbd} and resn NAG; spectrum b, white red, RBD_{rbd}, minimum=0\\n')\n", - " \n", - " if trimer:\n", - " spikes = ['spike_{0}'.format(rbd) for rbd in RBD_chains]\n", - " spike_string = ' or '.join(spikes)\n", - " f.write('\\n# create non-RBD spike chains\\n')\n", - " for rbd in RBD_chains:\n", - " f.write(f'create spike_{rbd}, {pdb} and chain {rbd} and resi 1-332+527-1253\\n')\n", - " f.write(f'color slate, ({spike_string})\\n\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Write function to show ACE2 contact sites as mesh" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def ace2_contact(RBD_chains):\n", - " rbds = ['RBD_{0}'.format(rbd) for rbd in RBD_chains]\n", - " rbd_string = ' or '.join(rbds)\n", - " \n", - " f.write('# show ACE2 contact sites as mesh\\n')\n", - " f.write(f'create ACE2_contacts, ({rbd_string}) and resi 417+446+449+453+455+456+475+486+487+489+493+496+498+500+501+502+505\\n')\n", - " f.write(f'show mesh, ACE2_contacts; color gray40, ACE2_contacts\\n')\n", - " f.write(f'sele ACE2_cont, ({rbd_string}) and resi 417+446+449+453+455+456+475+486+487+489+493+496+498+500+501+502+505\\n\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Write function to create antibody contact sites" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def fab_contacts(RBD_chains, rbd_fab, all_ct_sites):\n", - " \"\"\"\n", - " rbd_fab is a dictionary keyed by RBD chain (eg A, B, C)\n", - " values are a list of the contact sites for that chain\n", - " all_ct_sites is a list of sites that contact any RBD chain \n", - " \"\"\"\n", - " rbds = ['RBD_{0}'.format(rbd) for rbd in RBD_chains]\n", - " rbd_string = ' or '.join(rbds)\n", - " all_contacts = '+'.join(all_ct_sites)\n", - " \n", - " f.write('# create antibody contact sites and show as spheres\\n')\n", - " for rbd, sites in rbd_fab.items():\n", - " rbd_contacts = '+'.join(sites)\n", - " f.write(f'sele RBD_{rbd}_ct, RBD_{rbd} and resi {rbd_contacts}\\n')\n", - " f.write(f'show spheres, RBD_{rbd}_ct and name ca\\n')\n", - " f.write(f'sele all_ct, ({rbd_string}) and resi {all_contacts}\\n')\n", - " f.write(f'show spheres, all_ct and name ca\\n\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Read in contact sites" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namepdbchainposition
0ACE26M0JE417
1ACE26M0JE446
2ACE26M0JE449
3ACE26M0JE453
4ACE26M0JE455
\n", - "
" - ], - "text/plain": [ - " name pdb chain position\n", - "0 ACE2 6M0J E 417\n", - "1 ACE2 6M0J E 446\n", - "2 ACE2 6M0J E 449\n", - "3 ACE2 6M0J E 453\n", - "4 ACE2 6M0J E 455" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "contacts = pd.read_csv(os.path.join('../', config['structural_contacts']))\n", - "contacts['position'] = contacts['position'].astype(str)\n", - "contacts.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Loop through all the PDBs in the config and write output file." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "../results/pymol_commands/pymol_C105_6xcn.txt\n", - "../results/pymol_commands/pymol_C105_6xcm.txt\n", - "../results/pymol_commands/pymol_C144_7k90.txt\n", - "../results/pymol_commands/pymol_C002_7k8s.txt\n", - "../results/pymol_commands/pymol_C002_7k8t.txt\n", - "../results/pymol_commands/pymol_C121_7k8x.txt\n", - "../results/pymol_commands/pymol_C121_7k8y.txt\n", - "../results/pymol_commands/pymol_C135_7k8z.txt\n", - "../results/pymol_commands/pymol_C110_7k8v.txt\n" - ] - } - ], - "source": [ - "for name, specs in pse_config.items():\n", - " pdb = name\n", - " ab = specs[\"antibody\"]\n", - " sln = specs[\"selection\"]\n", - " RBD_chains = specs[\"RBD_chains\"]\n", - " heavy_chains = specs[\"heavy_chains\"]\n", - " light_chains = specs[\"light_chains\"]\n", - " trimer = specs[\"trimer\"]\n", - " \n", - " # rbd_fab is a dict, keyed by RBD chain; values are list of the contact sites\n", - " rbd_fab = ((contacts\n", - " .query('name==@ab & pdb==@pdb'))\n", - " [['chain', 'position']]\n", - " .reset_index(drop=True)\n", - " .groupby('chain')['position'].apply(list).to_dict()\n", - " )\n", - " \n", - " # create list of all contact sites for a given antibody\n", - " all_ct = []\n", - " for v in rbd_fab.values():\n", - " all_ct.extend(v)\n", - " all_ct_sites = sorted(list(set(all_ct))) \n", - " \n", - " metric = pymol_specs['metric']\n", - " \n", - " outFile = os.path.join(config['pse_dir'], f'pymol_{ab}_{name}.txt')\n", - " print(outFile)\n", - " \n", - " f = open(outFile, \"w\")\n", - " f.write(f\"# PyMol commands for {ab} {name}\")\n", - " initialize_commands()\n", - " load_trimer_fab(ab, sln, pdb, metric, RBD_chains, heavy_chains, light_chains, trimer)\n", - " ace2_contact(RBD_chains)\n", - " fab_contacts(RBD_chains, rbd_fab, all_ct_sites)\n", - " \n", - " f.write(\"\"\"\n", - "#view1\n", - "set_view (\\\n", - " 0.833226204, -0.528404295, 0.162850752,\\\n", - " -0.477664202, -0.539514899, 0.693361998,\\\n", - " -0.278514773, -0.655512929, -0.701937377,\\\n", - " 0.000000000, -0.000000000, -286.698059082,\\\n", - " -23.955575943, 16.451217651, 0.807323456,\\\n", - " -15130.423828125, 15703.833984375, -20.000000000 )\n", - " \n", - "\"\"\")\n", - " f.write(f'save {ab}_{pdb}.pse\\n')\n", - " f.write(f'# png {ab}_{pdb}_view1.png, ray=1, 600, 600')\n", - " \n", - " f.close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/results/summary/custom-plots_LY555.md b/results/summary/custom-plots_LY555.md new file mode 100644 index 0000000..866262f --- /dev/null +++ b/results/summary/custom-plots_LY555.md @@ -0,0 +1,290 @@ +Custom analyses LY mAbs +================ +Tyler Starr +2/6/2021 + +- [Data input and formatting](#data-input-and-formatting) +- [Part 1: Circulating variants at the per-mut + level](#part-1-circulating-variants-at-the-per-mut-level) +- [Part 2: Distribution of functional effects of mutations circulating + at different + levels](#part-2-distribution-of-functional-effects-of-mutations-circulating-at-different-levels) +- [Part 3: escape within or without the structural + epitope](#part-3-escape-within-or-without-the-structural-epitope) + +This notebook does some random analyses on the clinical antibodies set +that vary from the more constrained global pipeline. + + require("knitr") + knitr::opts_chunk$set(echo = T) + knitr::opts_chunk$set(dev.args = list(png = list(type = "cairo"))) + + #list of packages to install/load + packages = c("yaml","data.table","tidyverse","ggrepel","bio3d","gridExtra") + #install any packages not already installed + installed_packages <- packages %in% rownames(installed.packages()) + if(any(installed_packages == F)){ + install.packages(packages[!installed_packages]) + } + #load packages + invisible(lapply(packages, library, character.only=T)) + + #read in config file + config <- read_yaml("./config.yaml") + + #read in escape profiles file + profiles_config <- read_yaml(file=config$escape_profiles_config) + + #make output directory + output_dir <- config$custom_plots_dir + if(!file.exists(output_dir)){ + dir.create(file.path(output_dir)) + } + +Session info for reproducing environment: + + sessionInfo() + + ## R version 3.6.2 (2019-12-12) + ## Platform: x86_64-pc-linux-gnu (64-bit) + ## Running under: Ubuntu 18.04.4 LTS + ## + ## Matrix products: default + ## BLAS/LAPACK: /app/software/OpenBLAS/0.3.7-GCC-8.3.0/lib/libopenblas_haswellp-r0.3.7.so + ## + ## locale: + ## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C + ## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 + ## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 + ## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C + ## [9] LC_ADDRESS=C LC_TELEPHONE=C + ## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C + ## + ## attached base packages: + ## [1] stats graphics grDevices utils datasets methods base + ## + ## other attached packages: + ## [1] gridExtra_2.3 bio3d_2.4-0 ggrepel_0.8.1 forcats_0.4.0 + ## [5] stringr_1.4.0 dplyr_0.8.3 purrr_0.3.3 readr_1.3.1 + ## [9] tidyr_1.0.0 tibble_3.0.2 ggplot2_3.3.0 tidyverse_1.3.0 + ## [13] data.table_1.12.8 yaml_2.2.0 knitr_1.26 + ## + ## loaded via a namespace (and not attached): + ## [1] tidyselect_1.1.0 xfun_0.11 haven_2.2.0 colorspace_1.4-1 + ## [5] vctrs_0.3.1 generics_0.0.2 htmltools_0.4.0 rlang_0.4.7 + ## [9] pillar_1.4.5 glue_1.3.1 withr_2.1.2 DBI_1.1.0 + ## [13] dbplyr_1.4.2 modelr_0.1.5 readxl_1.3.1 lifecycle_0.2.0 + ## [17] munsell_0.5.0 gtable_0.3.0 cellranger_1.1.0 rvest_0.3.5 + ## [21] evaluate_0.14 parallel_3.6.2 fansi_0.4.0 broom_0.7.0 + ## [25] Rcpp_1.0.3 scales_1.1.0 backports_1.1.5 jsonlite_1.6 + ## [29] fs_1.3.1 hms_0.5.2 digest_0.6.23 stringi_1.4.3 + ## [33] grid_3.6.2 cli_2.0.0 tools_3.6.2 magrittr_1.5 + ## [37] crayon_1.3.4 pkgconfig_2.0.3 ellipsis_0.3.0 xml2_1.2.2 + ## [41] reprex_0.3.0 lubridate_1.7.4 assertthat_0.2.1 rmarkdown_2.0 + ## [45] httr_1.4.1 rstudioapi_0.10 R6_2.4.1 compiler_3.6.2 + +Data input and formatting +------------------------- + +Read in escape fractions, rename some things to make them easier to work +with. + + scores <- data.table(read.csv(file=config$escape_fracs,stringsAsFactors=F)) + + scores <- scores[selection %in% names(profiles_config$`LY_cocktail`$conditions) & library=="average", .(selection,condition,site,protein_site,wildtype,mutation,mut_escape_frac_epistasis_model,site_total_escape_frac_epistasis_model)] + + setnames(scores,"mut_escape_frac_epistasis_model","mut_escape_frac");setnames(scores,"site_total_escape_frac_epistasis_model","site_total_escape") + + scores[,antibody:=as.character(profiles_config$`LY_cocktail`$conditions[condition])] + + scores <- scores[,.(antibody,site,protein_site,wildtype,mutation,mut_escape_frac,site_total_escape)] + + scores[,site_max_escape:=max(mut_escape_frac,na.rm=T),by=c("antibody","site")] + +Part 1: Circulating variants at the per-mut level +------------------------------------------------- + +Current notebook on circulating mutations considers all mutations at a +site together, regardless of the escape conferred by the particular +mutation that is circulating. Make plots at the per-mut level. + +We read in table reporting circulating variants. We add new columns to +our data frame indicating the nobs and frequency on GISAID, and the +number of countries in which a mutant has been observed. Then, for each +antibody, we plot per-mutation escape fraction versus frequency (log10), +with a ‘pseudo-frequency’ of 0.1x the lowest actual frequency, to enable +log10 plotting) + + #read in table giving mutant frequencies on GISAID + counts <- read.csv(config$gisaid_mutation_counts,stringsAsFactors=F) + #add to scores table + scores[,count:=0];scores[,n_countries:=0];scores[,frequency:=0] + for(i in 1:nrow(counts)){ + scores[protein_site==counts[i,"site"] & mutation==counts[i,"mutant"],count:=counts[i,"count"]] + scores[protein_site==counts[i,"site"] & mutation==counts[i,"mutant"],n_countries:=counts[i,"n_countries"]] + scores[protein_site==counts[i,"site"] & mutation==counts[i,"mutant"],frequency:=counts[i,"frequency"]] + } + + #set factor order for antibodies + scores$antibody <- factor(scores$antibody,levels=c("LY-CoV555","LY-CoV016","LY-CoV555+LY-CoV016")) + + scores[,pseudo_frequency:=frequency] + scores[frequency==0,pseudo_frequency:=0.1*min(scores[frequency>0,frequency])] + + p1 <- ggplot(scores)+aes(x=pseudo_frequency,y=mut_escape_frac)+ + geom_point(shape=16, alpha=0.5, size=2.25)+ + facet_wrap(~antibody,nrow=1)+ + scale_x_continuous(trans="log10")+ + scale_y_continuous(limits=c(0,1.05))+ + theme_classic()+ + xlab('mutant frequency on GISAID (log10 scale)')+ + ylab('mutant escape fraction')+ + geom_text_repel(aes(label=ifelse((mut_escape_frac>0.15 & frequency>1e-6) | (mut_escape_frac>0.05 & frequency>1e-3),as.character(paste(wildtype,protein_site,mutation,sep="")),'')),size=3,color="gray40") + p1 + + + + invisible(dev.print(pdf, paste(output_dir,"/circ-mut-scatter_mAbs_LY.pdf",sep=""))) + +Part 2: Distribution of functional effects of mutations circulating at different levels +--------------------------------------------------------------------------------------- + +For contextualizing ACE2 and expression deficits of antigenic mutations, +what is the distribution of these phenotypes among sequences on GISAID? + + #read in DMS scores + dms <- data.table(read.csv(file=config$mut_bind_expr,stringsAsFactors = F)) + setnames(dms,"site_SARS2","site") + dms <- dms[mutant != "*",] + dms <- dms[as.character(wildtype) != as.character(mutant),] + + dms[,count:=0] + for(i in 1:nrow(counts)){ + dms[site==counts[i,"site"] & mutant==counts[i,"mutant"],count:=counts[i,"count"]] + } + + #define factor for different nobs cutoffs, and collate into long data table for plotting + muts_temp <- dms + muts_temp$nobs_indicator<-factor("all muts",levels=c("all muts",">=1", ">=10",">=25")) + muts_temp_add0 <- dms[count>=1, ] + muts_temp_add0$nobs_indicator<-factor(">=1",levels=c("all muts",">=1", ">=10",">=25")) + muts_temp_add10 <- dms[count>=10, ] + muts_temp_add10$nobs_indicator<-factor(">=10",levels=c("all muts",">=1", ">=10",">=25")) + muts_temp_add25 <- dms[count>=25, ] + muts_temp_add25$nobs_indicator<-factor(">=25",levels=c("all muts",">=1", ">=10",">=25")) + + muts_temp <- rbind(muts_temp,muts_temp_add0,muts_temp_add10,muts_temp_add25) + muts_temp$nobs_indicator <- factor(muts_temp$nobs_indicator, levels=c("all muts",">=1", ">=10",">=25")) + + set.seed(198) + p1 <- ggplot(muts_temp[!is.na(muts_temp$bind_avg),],aes(x=nobs_indicator,y=bind_avg))+ + geom_boxplot(outlier.shape=16, width=0.4, outlier.alpha=0.5)+ + #geom_jitter(width=0.2, alpha=0.1, shape=16)+ + xlab("mutation count in GISAID sequences")+ylab("mutation effect on binding")+ + theme_classic()+geom_hline(yintercept=-2.35, linetype = 'dotted', col = 'gray50')+ + geom_hline(yintercept=0,linetype='dashed',col='gray50') + + p2 <- ggplot(muts_temp[!is.na(muts_temp$expr_avg),],aes(x=nobs_indicator,y=expr_avg))+ + geom_boxplot(outlier.shape=16, width=0.4, outlier.alpha=0.5)+ + #geom_jitter(width=0.2, alpha=0.1, shape=16)+ + xlab("mutation count in GISAID sequences")+ylab("mutation effect on expression")+ + theme_classic()+geom_hline(yintercept=-1, linetype = 'dotted', col = 'gray50')+ + geom_hline(yintercept=0,linetype='dashed',col='gray50') + + grid.arrange(p1,p2,ncol=2) + + + + invisible(dev.print(pdf, paste(output_dir,"/distribution-ACE2-expr-dms_v_nobs-GISAID.pdf",sep=""))) + +Part 3: escape within or without the structural epitope +------------------------------------------------------- + +We want to try a few plots – at the per-mut and per-site level, compare +distribution of escape scores between structural contacts and +non-contacts. We should try the contact/non-contact, but also a “close” +category corresponding to 4-8A distance? Means we’ll probably want to +re-do the structural analysis here within this “custom analyses” script. + +First, load pdbs, and generate list of residues that are <4A or 4-8A +from each mAb + + pdb_LY555 <- read.pdb(file="./data/pdbs/7kmg_single.pdb") + + ## PDB has ALT records, taking A only, rm.alt=TRUE + + pdb_LY016 <- read.pdb(file="./data/pdbs/7c01_single.pdb") + + contacts_LY555_4A <- binding.site(pdb_LY555, + a.inds=atom.select(pdb_LY555, chain="C"), + b.inds=atom.select(pdb_LY555,chain=c("A","B")), + cutoff=4,hydrogens=F)$resno + contacts_LY555_4to8A <- binding.site(pdb_LY555, + a.inds=atom.select(pdb_LY555, chain="C"), + b.inds=atom.select(pdb_LY555,chain=c("B","D")), + cutoff=8,hydrogens=F)$resno + contacts_LY555_4to8A <- contacts_LY555_4to8A[!(contacts_LY555_4to8A %in% contacts_LY555_4A)] + + + contacts_LY016_4A <- binding.site(pdb_LY016, + a.inds=atom.select(pdb_LY016, chain="A"), + b.inds=atom.select(pdb_LY016,chain=c("H","L")), + cutoff=4,hydrogens=F)$resno + contacts_LY016_4to8A <- binding.site(pdb_LY016, + a.inds=atom.select(pdb_LY016, chain="A"), + b.inds=atom.select(pdb_LY016,chain=c("H","L")), + cutoff=8,hydrogens=F)$resno + contacts_LY016_4to8A <- contacts_LY016_4to8A[!(contacts_LY016_4to8A %in% contacts_LY016_4A)] + +Next, make jitterplots showing site total escape for the three +categories of structural class. + + temp <- scores[antibody %in% c("LY-CoV016","LY-CoV555"),] + #add column whether a site is in direct contact, proximal, or distal to interface + temp[,contact:="distal"] + temp[antibody=="LY-CoV016" & protein_site %in% contacts_LY016_4A,contact:="contact"] + temp[antibody=="LY-CoV016" & protein_site %in% contacts_LY016_4to8A,contact:="proximal"] + temp[antibody=="LY-CoV555" & protein_site %in% contacts_LY555_4A,contact:="contact"] + temp[antibody=="LY-CoV555" & protein_site %in% contacts_LY555_4to8A,contact:="proximal"] + temp$contact <- factor(temp$contact,levels=c("contact","proximal","distal")) + + #add value indicating the heuristic cutoff for "strong" escape for each mAb + cutoff_LYCoV016 <- max(5*median(temp[antibody=="LY-CoV016",site_total_escape]),0.05*max(temp[antibody=="LY-CoV016",site_total_escape])) + cutoff_LYCoV555 <- max(5*median(temp[antibody=="LY-CoV555",site_total_escape]),0.05*max(temp[antibody=="LY-CoV555",site_total_escape])) + + temp[antibody=="LY-CoV016",cutoff:=cutoff_LYCoV016] + temp[antibody=="LY-CoV555",cutoff:=cutoff_LYCoV555] + + #sitewise + ggplot(unique(temp[,.(antibody,protein_site,site_total_escape,contact)]),aes(x=contact,y=site_total_escape))+ + geom_jitter(width=0.15, alpha=0.5,height=0,size=2.25,shape=16)+ + geom_hline(data=unique(temp[,.(antibody,cutoff)]),aes(yintercept=cutoff),linetype="dashed",color="gray50")+ + theme_classic()+ + theme(axis.text.x=element_text(angle=90,hjust=1))+ + xlab("structural context")+ylab("sitewise total escape")+ + facet_wrap(~antibody,nrow=1) + + + + invisible(dev.print(pdf, paste(output_dir,"/escape_by_contact_type.pdf",sep=""))) + +Table with number of sites in each structural category, and number of +sites of “strong” escape + + temp <- unique(temp[,.(antibody,protein_site,site_total_escape,contact,cutoff)]) + + dt <- data.frame(antibody=c("LY-CoV555","LY-CoV016"),n_contact=NA,n_contact_escape=NA,n_proximal=NA,n_proximal_escape=NA,n_distal=NA,n_distal_escape=NA) + for(i in 1:nrow(dt)){ + dt$n_contact[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="contact",]) + dt$n_contact_escape[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="contact" & site_total_escape>cutoff,]) + dt$n_proximal[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="proximal",]) + dt$n_proximal_escape[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="proximal" & site_total_escape>cutoff,]) + dt$n_distal[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="distal",]) + dt$n_distal_escape[i] <- nrow(temp[as.character(antibody)==dt[i,"antibody"] & contact=="distal" & site_total_escape>cutoff,]) + } + kable(dt) + +| antibody | n\_contact | n\_contact\_escape | n\_proximal | n\_proximal\_escape | n\_distal | n\_distal\_escape | +|:----------|-----------:|-------------------:|------------:|--------------------:|----------:|------------------:| +| LY-CoV555 | 15 | 7 | 3 | 0 | 155 | 2 | +| LY-CoV016 | 23 | 15 | 23 | 3 | 128 | 1 | diff --git a/results/summary/custom-plots_LY555_files/figure-gfm/GISAID_DFE-1.png b/results/summary/custom-plots_LY555_files/figure-gfm/GISAID_DFE-1.png new file mode 100644 index 0000000..4f84806 Binary files /dev/null and b/results/summary/custom-plots_LY555_files/figure-gfm/GISAID_DFE-1.png differ diff --git a/results/summary/custom-plots_LY555_files/figure-gfm/mutation_escape_v_freq-1.png b/results/summary/custom-plots_LY555_files/figure-gfm/mutation_escape_v_freq-1.png new file mode 100644 index 0000000..b7d6b08 Binary files /dev/null and b/results/summary/custom-plots_LY555_files/figure-gfm/mutation_escape_v_freq-1.png differ diff --git a/results/summary/custom-plots_LY555_files/figure-gfm/plot_total_escape_contacts-1.png b/results/summary/custom-plots_LY555_files/figure-gfm/plot_total_escape_contacts-1.png new file mode 100644 index 0000000..f097096 Binary files /dev/null and b/results/summary/custom-plots_LY555_files/figure-gfm/plot_total_escape_contacts-1.png differ diff --git a/results/summary/rulegraph.svg b/results/summary/rulegraph.svg index 6307fba..01de77f 100644 --- a/results/summary/rulegraph.svg +++ b/results/summary/rulegraph.svg @@ -4,49 +4,49 @@ - + snakemake_dag - + 0 - -make_summary + +make_summary 1 - + make_rulegraph -1->0 - - +1->0 + + 2 - + get_codon_variant_table -2->0 - - +2->0 + + 3 - + aggregate_variant_counts -2->3 +2->3 16 - + count_variants @@ -55,13 +55,13 @@ -3->0 - - +3->0 + + 4 - + counts_to_cells_ratio @@ -71,223 +71,223 @@ 5 - -counts_to_scores + +counts_to_scores -3->5 - - +3->5 + + -4->0 - - +4->0 + + -5->0 - - +5->0 + + 6 - -scores_to_frac_escape + +scores_to_frac_escape 5->6 - - + + -6->0 - - +6->0 + + 7 - -call_strong_escape_sites + +call_strong_escape_sites 6->7 - - + + 8 - -escape_profiles + +escape_profiles -6->8 - - +6->8 + + 9 - -mds_escape_profiles + +mds_escape_profiles 6->9 - - + + 10 - -output_pdbs + +output_pdbs 6->10 - - + + + + +11 + +natural_mutations + + +6->11 + + 12 - -natural_mutations + +homolog_escape -6->12 - - +6->12 + + 13 - -homolog_escape + +make_supp_data 6->13 - - + + - -14 - -make_supp_data + +15 + +custom_plots - -6->14 - - + +6->15 + + -7->0 - - +7->0 + + -7->8 - - +7->8 + + - -7->12 - - + +7->11 + + -8->0 - - +8->0 + + - -8->14 - - + +8->13 + + -9->0 - - +9->0 + + -10->0 - - - - -11 - -gisaid_rbd_mutations +10->0 + + -11->0 - - - - -11->12 - - +11->0 + + -12->0 - - +12->0 + + -13->0 - - +13->0 + + - -14->0 - - + +14 + +structural_contacts - -15 - -structural_contacts + +14->0 + + -15->0 - - +15->0 + + -16->3 +16->3 17 - -get_mut_bind_expr + +get_mut_bind_expr -17->5 - - +17->5 + + 17->8 - - + + 18 - -get_variant_expr + +get_variant_expr -18->5 - - +18->5 + + 19 - -get_variant_bind + +get_variant_bind -19->5 - - +19->5 + + diff --git a/results/summary/summary.md b/results/summary/summary.md index faab56d..c636aa9 100644 --- a/results/summary/summary.md +++ b/results/summary/summary.md @@ -48,4 +48,6 @@ Here is the Markdown output of each notebook in the workflow: 15. [Make supplementary data files](make_supp_data.md), which are [here](../supp_data). These include - `dms-view` input files. \ No newline at end of file + `dms-view` input files. + +16. Make custom plots for this project that fall out of the core pipeline, summarzied in [this notebook](custom-plots_LY555.md) \ No newline at end of file diff --git a/structural_views/README.md b/structural_views/README.md deleted file mode 100755 index a3f9ff9..0000000 --- a/structural_views/README.md +++ /dev/null @@ -1,14 +0,0 @@ -## Structural images - -This directory contains python scripts and the resulting text files with PyMol commands that can be run to generate structural views and alignments for various figures and analyses. - -- The file [surface_constraint_features.pse](surface_constraint_features.pse) was copied from the [SARS-CoV-2-RBD_DMS repository](https://github.com/jbloomlab/SARS-CoV-2-RBD_DMS). -It was originally constructed from a set of `pdb` structures in the `data` subdirectory and the output `pdb` files from the `structure_function.Rmd` script, which replaces the RBD b factor column with the mean mutational effects per site from our DMS measurements. - -- The script [write_pymol_commands.py](write_pymol_commands.py) takes a list of antibodies and a set of PDB files in the [../results/pdb_outputs/](../results/pdb_outputs/) directory generated by [output_pdbs.Rmd](../output_pdbs.Rmd) notebook, in which the b factor has been replaced by escape fractions for each antibody. -It outputs the next text file: - -- The commands in [pymol_commands.txt](pymol_commands.txt) can be run in the `surface_constraint_features.pse` PyMol session to generate a *new* `.pse` file, [surface_escape.pse](surface_escape.pse) that includes an object for each RBD antibody escape profile, with the surface colored by the max `escape_frac` for a mutation at each site. -This is colored from white to red, where the strongest escape mutations are colored red. -These commands also re-color the `RBD_bind` and `RBD_expression` objects on a blue to white scale (rather than the original red to white), such that any site with mean mutational effect less than or equal to -2 is the darkest blue, and mean effects from -2 to zero scale from blue to white. -This is to avoid confusion with the antibody escape profiles, which are scaled white to red. diff --git a/structural_views/pymol_commands.txt b/structural_views/pymol_commands.txt deleted file mode 100755 index e90b1db..0000000 --- a/structural_views/pymol_commands.txt +++ /dev/null @@ -1,93 +0,0 @@ -#commands to load pdbs for antibody mapping - -set seq_view, 0 - -load ../results/pdb_outputs/CR3022_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2050_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2082_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2094_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2096_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2130_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2165_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2196_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2479_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2499_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2677_400_6m0j_max_escape.pdb -load ../results/pdb_outputs/COV2-2832_400_6m0j_max_escape.pdb - -set_name CR3022_400_6m0j_max_escape, CR3022_max -set_name COV2-2050_400_6m0j_max_escape, COV2-2050_max -set_name COV2-2082_400_6m0j_max_escape, COV2-2082_max -set_name COV2-2094_400_6m0j_max_escape, COV2-2094_max -set_name COV2-2096_400_6m0j_max_escape, COV2-2096_max -set_name COV2-2130_400_6m0j_max_escape, COV2-2130_max -set_name COV2-2165_400_6m0j_max_escape, COV2-2165_max -set_name COV2-2196_400_6m0j_max_escape, COV2-2196_max -set_name COV2-2479_400_6m0j_max_escape, COV2-2479_max -set_name COV2-2499_400_6m0j_max_escape, COV2-2499_max -set_name COV2-2677_400_6m0j_max_escape, COV2-2677_max -set_name COV2-2832_400_6m0j_max_escape, COV2-2832_max - -hide all -create ACE2, CR3022_max and chain A -remove CR3022_max and chain A -remove COV2-2050_max and chain A -remove COV2-2082_max and chain A -remove COV2-2094_max and chain A -remove COV2-2096_max and chain A -remove COV2-2130_max and chain A -remove COV2-2165_max and chain A -remove COV2-2196_max and chain A -remove COV2-2479_max and chain A -remove COV2-2499_max and chain A -remove COV2-2677_max and chain A -remove COV2-2832_max and chain A - -show cartoon, ACE2; color gray20, ACE2; set cartoon_transparency, 0.5, ACE2 - -show surface, CR3022_max; show sticks, CR3022_max and resn NAG; spectrum b, white red, CR3022_max, minimum=0 -show surface, COV2-2050_max; show sticks, COV2-2050_max and resn NAG; spectrum b, white red, COV2-2050_max, minimum=0 -show surface, COV2-2082_max; show sticks, COV2-2082_max and resn NAG; spectrum b, white red, COV2-2082_max, minimum=0 -show surface, COV2-2094_max; show sticks, COV2-2094_max and resn NAG; spectrum b, white red, COV2-2094_max, minimum=0 -show surface, COV2-2096_max; show sticks, COV2-2096_max and resn NAG; spectrum b, white red, COV2-2096_max, minimum=0 -show surface, COV2-2130_max; show sticks, COV2-2130_max and resn NAG; spectrum b, white red, COV2-2130_max, minimum=0 -show surface, COV2-2165_max; show sticks, COV2-2165_max and resn NAG; spectrum b, white red, COV2-2165_max, minimum=0 -show surface, COV2-2196_max; show sticks, COV2-2196_max and resn NAG; spectrum b, white red, COV2-2196_max, minimum=0 -show surface, COV2-2479_max; show sticks, COV2-2479_max and resn NAG; spectrum b, white red, COV2-2479_max, minimum=0 -show surface, COV2-2499_max; show sticks, COV2-2499_max and resn NAG; spectrum b, white red, COV2-2499_max, minimum=0 -show surface, COV2-2677_max; show sticks, COV2-2677_max and resn NAG; spectrum b, white red, COV2-2677_max, minimum=0 -show surface, COV2-2832_max; show sticks, COV2-2832_max and resn NAG; spectrum b, white red, COV2-2832_max, minimum=0 - -hide all -show surface, CR3022_max -show surface, COV2-2050_max -show surface, COV2-2082_max -show surface, COV2-2094_max -show surface, COV2-2096_max -show surface, COV2-2130_max -show surface, COV2-2165_max -show surface, COV2-2196_max -show surface, COV2-2479_max -show surface, COV2-2499_max -show surface, COV2-2677_max -show surface, COV2-2832_max - -show surface, RBD_bind; spectrum b, blue white, RBD_bind, minimum=-2, maximum=0; show sticks, RBD_bind and resn NAG -show surface, RBD_expr; spectrum b, blue white, RBD_expr, minimum=-2, maximum=0; show sticks, RBD_expr and resn NAG - -set_view (0.179795116, -0.585253060, 0.790621936,-0.975049794, 0.000042952, 0.221752375,-0.129811451, -0.810812414, -0.570666671,0.002255535, 0.000589155, -237.384719849,-32.869243622, 26.272405624, 17.597675323,-55707.390625000, 56183.121093750, -20.000000000 ) - -show surface, CR3022_max -show surface, COV2-2050_max -show surface, COV2-2082_max -show surface, COV2-2094_max -show surface, COV2-2096_max -show surface, COV2-2130_max -show surface, COV2-2165_max -show surface, COV2-2196_max -show surface, COV2-2479_max -show surface, COV2-2499_max -show surface, COV2-2677_max -show surface, COV2-2832_max - -save surface_escape.pse \ No newline at end of file diff --git a/structural_views/surface_constraint_features.pse b/structural_views/surface_constraint_features.pse deleted file mode 100755 index 2a95f6e..0000000 Binary files a/structural_views/surface_constraint_features.pse and /dev/null differ diff --git a/structural_views/surface_escape.pse b/structural_views/surface_escape.pse deleted file mode 100755 index b3d2fe3..0000000 Binary files a/structural_views/surface_escape.pse and /dev/null differ diff --git a/structural_views/write_pymol_commands.py b/structural_views/write_pymol_commands.py deleted file mode 100755 index 0efc553..0000000 --- a/structural_views/write_pymol_commands.py +++ /dev/null @@ -1,92 +0,0 @@ -#Make text file with pymol commands - -def main(): - outFile = 'pymol_commands.txt' - crowe_mabs = ['2050', '2082', '2094', '2096', '2130', '2165', '2196', '2479', '2499', '2677', '2832'] - antibodies = ['CR3022'] - - # which set of data to show, and how to color surfaces. - metric = 'max' # `max` looks way better than `total` - color_min = 'white' - color_max = 'red' - - # dictionary of views here: - views = {'view1': """\nset_view (\ -0.179795116, -0.585253060, 0.790621936,\ --0.975049794, 0.000042952, 0.221752375,\ --0.129811451, -0.810812414, -0.570666671,\ -0.002255535, 0.000589155, -237.384719849,\ --32.869243622, 26.272405624, 17.597675323,\ --55707.390625000, 56183.121093750, -20.000000000 )\n""", - - } - - # save png files of views? - save_png = False - - # make full list of antibodies - for mab in crowe_mabs: - ab = 'COV2-'+mab - antibodies.append('COV2-'+mab) - - # begin writing to output file - f = open(outFile, "w") - f.write('#commands to load pdbs for antibody mapping\n\n') - - # set sequence view to off - f.write('set seq_view, 0\n\n') - - # load pdb file for every antibody - for ab in antibodies: - f.write(f'load ../results/pdb_outputs/{ab}_400_6m0j_{metric}_escape.pdb\n') - - # rename structure for every antibody - f.write('\n') - for ab in antibodies: - f.write(f'set_name {ab}_400_6m0j_{metric}_escape, {ab}_{metric}\n') - - # one-time commands: - f.write('\nhide all\n') - f.write(f'create ACE2, {antibodies[0]}_{metric} and chain A\n') - - # remove chain A (ACE2) from each structure. - for ab in antibodies: - f.write(f'remove {ab}_{metric} and chain A\n') - - # show ACE2 as gray cartoon - f.write('\nshow cartoon, ACE2; color gray20, ACE2; set cartoon_transparency, 0.5, ACE2\n\n') - - # color surface representation by b-factor (which has been recoded to {metric}_escape) - for ab in antibodies: - f.write(f'show surface, {ab}_{metric}; show sticks, {ab}_{metric} and resn NAG; spectrum b, {color_min} {color_max}, {ab}_{metric}, minimum=0\n') - - # show each as surface - f.write('\nhide all\n') - - for ab in antibodies: - f.write(f'show surface, {ab}_{metric}\n') - - # change coloring of RBD_bind and RBD_express to blue white (where blue is negative) - f.write(f'\nshow surface, RBD_bind; spectrum b, blue white, RBD_bind, minimum=-2, maximum=0; show sticks, RBD_bind and resn NAG') - f.write(f'\nshow surface, RBD_expr; spectrum b, blue white, RBD_expr, minimum=-2, maximum=0; show sticks, RBD_expr and resn NAG\n') - - # iterate through each view we define above, get view, show as surface, take picture - for view_n in views.keys(): - - f.write(f'{views[view_n]}\n') # set one view you like - - # now get pictures of each - for ab in antibodies: - if save_png: - f.write('\nhide all\n') - f.write(f'show surface, {ab}_{metric}\n') - - if save_png: - f.write(f'png ../structural_views/{ab}_{metric}_{view_n}.png, ray=1, 600, 600\n') - if not save_png: - f.write('\nsave surface_escape.pse') - break - - f.close() - -main()