## Output PDB with Neff or entropy as B factors

Want to color 229E spike based on site variability.

In [1]:
import collections
import copy
import os
import warnings

import Bio.PDB

import dms_variants.pdb_utils

from IPython.display import display, HTML

import pandas as pd

In [2]:
pdbs_dir = './results/pdbs/pdb_outputs'
os.makedirs(pdbs_dir, exist_ok=True)

In [3]:
site_var_df = pd.read_csv('./results/variation_analysis/site_variability.csv')

In [4]:
display(HTML(site_var_df.head().to_html(index=False)))

site,label_site,wildtype,mutation,condition,protein_chain,protein_site,site_n_effective_amino_acids,site_entropy,mut_aa_frequency
1,1,M,-,229E_alignment,A B C,,1.0,0.0,0.0
1,1,M,A,229E_alignment,A B C,,1.0,0.0,0.0
1,1,M,C,229E_alignment,A B C,,1.0,0.0,0.0
1,1,M,D,229E_alignment,A B C,,1.0,0.0,0.0
1,1,M,E,229E_alignment,A B C,,1.0,0.0,0.0


In [5]:
entropy_df = site_var_df[['site', 'protein_chain', 'site_entropy']]\
                        .rename(columns={'protein_chain': 'chain', 'site_entropy': 'metric'})\
                        .drop_duplicates()
for chain in entropy_df['chain'].iloc[0].split(' '):
    entropy_df[chain] = entropy_df['metric']
entropy_df.drop(['chain', 'metric'], axis=1, inplace=True)
entropy_df = pd.melt(entropy_df, id_vars=['site'], var_name='chain', value_name='metric')
display(HTML(entropy_df.head().to_html(index=False)))

site,chain,metric
1,A,0.0
2,A,0.0
3,A,0.0
4,A,0.0
5,A,0.0


In [6]:
neff_df = site_var_df[['site', 'protein_chain', 'site_n_effective_amino_acids']]\
                        .rename(columns={'protein_chain': 'chain', 'site_n_effective_amino_acids': 'metric'})\
                        .drop_duplicates()
for chain in neff_df['chain'].iloc[0].split(' '):
    neff_df[chain] = neff_df['metric']
neff_df.drop(['chain', 'metric'], axis=1, inplace=True)
neff_df = pd.melt(neff_df, id_vars=['site'], var_name='chain', value_name='metric')
display(HTML(neff_df.head().to_html(index=False)))
print(neff_df['metric'].unique())


site,chain,metric
1,A,1.0
2,A,1.0
3,A,1.0
4,A,1.0
5,A,1.0


[1.      1.19283 1.16417 2.50278 1.09234 1.34119 1.67306 1.5002  1.27086
 2.26333 1.97323 2.14611 1.28929 1.40653 1.63434 1.40003 1.22899 1.63316
 1.54676 1.71075 1.99231 1.72309 1.74624 2.25113 2.24356 2.09233 1.96936
 2.10976 1.98494 1.45133 1.34612 2.55997 2.446   2.85293 1.96287 2.94529
 1.97512 1.59107 2.13667]


In [7]:
original_pdbfile = './results/pdbs/6u7h.pdb'
reassigned_pdbfile_entropy = os.path.join(pdbs_dir, 'entropy_reassigned.pdb')
entropy_missing_metric=-1
dms_variants.pdb_utils.reassign_b_factor(input_pdbfile=original_pdbfile,
                                         output_pdbfile=reassigned_pdbfile_entropy,
                                         df=entropy_df,
                                         metric_col='metric',
                                         missing_metric=entropy_missing_metric)

In [8]:
reassigned_pdbfile_neff = os.path.join(pdbs_dir, 'neff_reassigned.pdb')
neff_missing_metric = 0
dms_variants.pdb_utils.reassign_b_factor(input_pdbfile=original_pdbfile,
                                         output_pdbfile=reassigned_pdbfile_neff,
                                         df=neff_df,
                                         metric_col='metric',
                                         missing_metric=neff_missing_metric)