# Figure generation using polyclonal
We would like to use some functions from polyclonal to visualize the selections datsets. 


Note: running this notebook requires the conda environment specified in "environment_polyclonal.yml"

In [1]:
# load modules
import os
import glob
import numpy
import pandas as pd
import scipy
import collections 
import requests
import tempfile
import altair as alt

import polyclonal
from polyclonal import pdb_utils

In [2]:
# identify data and results directory
datadir = "./data"
resultsdir = "./results"
reassignedpdbdir = os.path.join(resultsdir, 'reassignedpdb')

## Reassign b-factor in PDB files
We want to assign the b-factor column in PDB files with data from our selections. This will allow us to visualize in PyMOL, which gives us greater flexibility in figure generation than dms-view. 

In [3]:
# generate files using these polyclonal input files
files = ['alldiffsel.csv']

In [4]:
# identify PDB files
pdb_url = 'https://files.rcsb.org/download/5IRE.pdb'

In [5]:
# generate files for this antibody
antibodylist = ['EDE1_C10',
                'EDE1_C8',
                'MZ4',
                'SIgN-3C',
                'ZV_67']

# generate files using these dms-data
# polyclonal_datafiles = ['dimer_polyclonal_data.csv']

# create dict missing_metric that assigns -1 to sites with missing metrics in chain C, and 0 to sites in other chains:
missing_metric = collections.defaultdict(lambda: 0)
missing_metric['C'] = -1

# identify input dms data
for antibody in antibodylist:
    print(f'Generating reassigned B-factor PDB file for {antibody}...')
    for data in files:
        dms_data = os.path.join(reassignedpdbdir, data)
        df = (pd.read_csv(dms_data, index_col=False)
              .drop(['label_site','site',
                     # 'wildtype','mutation'
                    ], axis=1)
              .rename(columns=({'protein_chain':'chain',
                                'protein_site':'site'}))
              .drop_duplicates()
              .dropna()
              .query('condition == "' + str(antibody) + '"')
                  # [['site','wildtype','mutation','condition',
                  #   'site_abs_diffsel','site_positive_diffsel',
                  #   'site_max_diffsel','site_min_diffsel']]
                 )

        # download PDB, re-assign B factors, read the lines from the resulting re-assigned PDB:
        r = requests.get(pdb_url)
        with tempfile.TemporaryDirectory() as tmpdir:
           original_pdbfile = os.path.join(tmpdir, 'original.pdb')
           with open(original_pdbfile, 'wb') as f:
               _ = f.write(r.content)
           reassigned_pdbfile = os.path.join(tmpdir, 'reassigned.pdb')
           pdb_utils.reassign_b_factor(input_pdbfile=original_pdbfile,
                             output_pdbfile=reassigned_pdbfile,
                             df=df,
                             metric_col='positive_diffsel',
                             missing_metric=missing_metric)
           pdb_text = open(reassigned_pdbfile).readlines()
        # save reassigned pdbfiles
        outfile = os.path.join(reassignedpdbdir + f"/{antibody}_5IRE.pdb")

        with open(outfile, 'w') as f:
            for line in pdb_text:
                f.write(f"{line}\n")

Generating reassigned B-factor PDB file for EDE1_C10...
Generating reassigned B-factor PDB file for EDE1_C8...
Generating reassigned B-factor PDB file for MZ4...
Generating reassigned B-factor PDB file for SIgN-3C...
Generating reassigned B-factor PDB file for ZV_67...


In [6]:
# peak at a single strip of PDB file
print(pdb_text[0].strip())

ATOM      1  N   ILE A   1    -161.070 -67.005-130.595  1.00  0.00           N


In [7]:
# peak at single strip of PDB file
print('\n'.join(line.strip() for line in pdb_text[5010: 5025]))

ATOM   5009  N   THR C 205    -109.384-140.147-124.230  1.00  0.00           N
ATOM   5010  CA  THR C 205    -109.051-140.764-125.507  1.00  0.00           C
ATOM   5011  C   THR C 205    -109.063-139.722-126.612  1.00  0.00           C
ATOM   5012  O   THR C 205    -110.125-139.192-126.945  1.00  0.00           O
ATOM   5013  CB  THR C 205    -110.053-141.848-125.872  1.00  0.00           C
ATOM   5014  OG1 THR C 205    -111.313-141.232-126.144  1.00  0.00           O
ATOM   5015  CG2 THR C 205    -110.243-142.777-124.728  1.00  0.00           C
ATOM   5016  N   MET C 206    -107.908-139.468-127.207  1.00  0.08           N
ATOM   5017  CA  MET C 206    -107.863-138.843-128.515  1.00  0.08           C
ATOM   5018  C   MET C 206    -108.099-139.927-129.555  1.00  0.08           C
ATOM   5019  O   MET C 206    -108.562-141.018-129.234  1.00  0.08           O
ATOM   5020  CB  MET C 206    -106.543-138.126-128.743  1.00  0.08           C
ATOM   5021  CG  MET C 206    -106.392-136.871-127.9