In [2]:
#import packages
import numpy as np
import pandas as pd
from correlationExtraction import CorrelationExtraction
from Bio.PDB.Polypeptide import is_aa
from sklearn.metrics import adjusted_mutual_info_score

In [6]:
#config
pdb_data = 'DATA/6svc.pdb'

def extraction ('pdbData'):
    # set up the PDBcor object
    model = CorrelationExtraction(
                     pdb_data,
                     mode='backbone',
                     nstates=2,
                     therm_fluct=0.5,
                     therm_iter=5,
                     loop_start=-1,
                     loop_end=-1)

    # extract torsion angles
    angle_data = model.angCor.get_angle_data('A')
    angle_df = pd.DataFrame(angle_data, columns=[
        "conformer_id",
        "residue_id",
        "phi",
        "psi"
    ])
    # SET UP for correlation extraction
    resid = []
    for res in model.structure[0]['A'].get_residues():
        if is_aa(res, standard=True):
            resid.append(res._id[1])

    # aaS and aaF need to be defined in order to use the calc_ami function
    model.resid = resid
    model.aaS = min(resid)
    model.aaF = max(resid)


    # extract angular correlation values
    ang_clusters, ang_banres = model.angCor.clust_cor("A", resid)
    ang_ami, ang_hm = model.calc_ami( ang_clusters, ang_banres)
    ang_cor = np.mean(np.nan_to_num(ang_hm), axis=0)
    ang_cor

    #extract distance correlation values
    smth, smth1 = model.DistanceCor.clust_cor('A',resid)
    smth2, smth3 = model.calc_ami()

In [11]:

angle_df

Unnamed: 0,conformer_id,residue_id,phi,psi
0,0.0,5.0,0.000000,126.064975
1,0.0,6.0,-156.977950,69.964940
2,0.0,7.0,-117.957219,157.323476
3,0.0,8.0,-69.881665,172.246656
4,0.0,9.0,-69.776993,115.030068
...,...,...,...,...
695,19.0,35.0,-145.266359,156.539311
696,19.0,36.0,-53.322427,109.361605
697,19.0,37.0,-69.726279,125.756246
698,19.0,38.0,-70.324544,-73.229199


ANGLE CLUSTERING PROCESS:


100%|██████████| 35/35 [00:00<00:00, 55.36it/s]
100%|██████████| 35/35 [00:00<00:00, 129.53it/s]


array([ 0.03334949,  0.00610413,  0.25023528,  0.25023528,  0.25023528,
        0.25023528,  0.10597715,  0.10597715,  0.06853351,  0.15975566,
        0.16275983, -0.00196907,  0.09135871,  0.09135871,  0.06147615,
        0.00326272,  0.04484879,  0.00749997,  0.15561766,  0.0791457 ,
        0.14507991,  0.06382218,  0.09018067,  0.09018067,  0.09035934,
        0.00457374,  0.0791457 ,  0.0791457 ,  0.05951347,  0.20929106,
        0.20929106,  0.11885645,  0.07821084,  0.07440854,  0.08023449])

In [22]:
# final table: residue, phi, psi, ang correlation
# i just inserted the angular correlation as a final column
correlations = ang_cor[np.array(angle_df["residue_id"].values - 5, dtype=int)]
angle_df["ang_cor"] = correlations
angle_df.to_csv("DATA/angle_df.csv", index=False)
angle_df


Unnamed: 0,conformer_id,residue_id,phi,psi,ang_cor
0,0.0,5.0,0.000000,126.064975,0.033349
1,0.0,6.0,-156.977950,69.964940,0.006104
2,0.0,7.0,-117.957219,157.323476,0.250235
3,0.0,8.0,-69.881665,172.246656,0.250235
4,0.0,9.0,-69.776993,115.030068,0.250235
...,...,...,...,...,...
695,19.0,35.0,-145.266359,156.539311,0.209291
696,19.0,36.0,-53.322427,109.361605,0.118856
697,19.0,37.0,-69.726279,125.756246,0.078211
698,19.0,38.0,-70.324544,-73.229199,0.074409
