# Visualisation of eigenmodes

In this notebook we will visualise the direction and magnitude of a selected eigenmode. Clearly, there are some interesting patterns.

In [23]:

import sys
import pickle
import numpy as np
import gc
import glob
import scipy.stats
import sklearn.cluster
import shutil
sys.path.append('..')
sys.path.append('../../')
sys.path.append('../../InchingLiteInteger/')
import InchingLiteInteger.Burn.Orthogonalization.T3
import InchingLiteInteger.Burn.Orthogonalization.T2
import InchingLiteInteger.Burn.Orthogonalization.T1
import InchingLiteInteger.Burn.Krylov.T3
import InchingLiteInteger.util
from InchingLiteInteger.Fuel.Coordinate.T1 import HeigvecOne_BoxCoxMagnitude

# Visualization parameters
To visualize the eigenmodes with arrows, change `pdbavail = ['../../DataRepo/CifShowcase/AirplaneYesH.cif']` to the initial structure. The `Bechmarking_folder = "../../DataRepo/VisualizationExample%s/" %(pdbid)` defined automatically in the subsequent for loop will store the eigenvector calculated, where we will look for `{Bechmarking_folder}/Eigvec_*_{pdbid}*.pkl`. 

In [24]:

pdbavail = [  
            '../../DataRepo/PdbByAtomCount/4y5z.pdb']
User_TheModeToShow = 6 # NOTE for airplane it should be 0,1 as EED was applied. For 3j3q 6. For Npc 0,1,2
User_QuantileDisplay = 0.3 # NOTE Control the upper quantile threshold of what to be displayed
User_RandomPickArrows = 10000
User_EigenvectorTwinDirection = 1 # NOTE This is +/- of the eigevector

# =========
# Big arrow
# ===========
User_BigClusterArrowFloatingFactor = 0.5
User_DBscanMinDist = 1.5 # NOTE THis roughly cluster the 90% percentile arrows. largerr the less arrows


# =================
# animation
# =========================
User_ScalingAmplitudeAnimation = 4   # NOTE typicla value (2,1) for small amplitude fine movement or (4,2) for really large amplitude extrapolation where crack start filling in
User_ScalingStepSizeAnimation  = 4   # 


# Check Connectivity and Report DIsconnect
8 angstrom

In [25]:


benchmark_inching = []
for pdbfn in pdbavail:
    print(pdbfn)
    #if "5h2f" in pdbfn:
    #    continue

    PART00_IO = True
    if PART00_IO:

        pdbid = pdbfn.split("/")[-1].split(".")[0]

        # ============================
        # Where to store
        # ==========================


        X_df, X_top = InchingLiteInteger.util.BasicPdbCifLoading(pdbfn)
        #protein_xyz = X_df[['x','y','z']].to_numpy().astype(np.float64)
        #protein_xyz -= protein_xyz.mean(axis= 0)
        X_df.loc[:,'serial'] = X_df.index.tolist()


../../DataRepo/PdbByAtomCount/4y5z.pdb


# CA

For simplicity we will do CA clustering

In [26]:
import sklearn.cluster
Ca_df = X_df.loc[X_df['name'] == 'CA'] 
Ca_xyz = Ca_df[['x','y','z']].to_numpy().astype(np.float32)

clustering = sklearn.cluster.DBSCAN(eps=0.6, min_samples=1, 
                        metric='euclidean', metric_params=None, 
                        algorithm='auto', leaf_size=30, p=2, n_jobs=None).fit(Ca_xyz)

Ca_df.loc[:,'cluster'] = clustering.labels_.tolist()
print("BELOW ARE DISCONNECTED")
print(np.unique(clustering.labels_, return_counts=True))
print(Ca_df.loc[~Ca_df['cluster'].isin([0,-1])])

BELOW ARE DISCONNECTED
(array([0]), array([11534]))
Empty DataFrame
Columns: [serial, name, element, resSeq, resName, chainID, segmentID, x, y, z, cluster]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


# All atoms

In [22]:
import sklearn.cluster
X_df_ = X_df
X_xyz = X_df[['x','y','z']].to_numpy().astype(np.float32)

clustering2 = sklearn.cluster.DBSCAN(eps=0.6, min_samples=1, 
                        metric='euclidean', metric_params=None, 
                        algorithm='auto', leaf_size=30, p=2, n_jobs=None).fit(X_xyz)

X_df_.loc[:,'cluster'] = clustering2.labels_.tolist()
print("BELOW ARE DISCONNECTED")
print(np.unique(clustering2.labels_, return_counts=True))
print(X_df_.loc[~X_df_['cluster'].isin([0,-1])])

if X_df_.loc[~X_df_['cluster'].isin([0,-1])].shape[0] == 0:
    print("CONGRATULATIONS! No disconnection!")

BELOW ARE DISCONNECTED
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]), array([66941,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1]))
       serial name element  resSeq resName  chainID segmentID        x  \
63305   63305    O       O     547     HOH       31             8.9433   
63459   63459    O       O     701     HOH       31            10.4241   
63800   63800    O       O     586     HOH       33            -2.6256   
64116   64116    O       O    6114     HOH       34            -7.3706   
64236   64236    O       O    6009     HOH       35             0.3716   
64376   64376    O       O    5955     HOH       36             0.0251   
64491   64491    O       O    6070     HOH       36            -3.3166   
64742   64742    O       O     670     HOH       37            -3.9618   
64939   64939    O       O     654     HOH       38            -5.8590   
65197   65197    O       O     686 

# WRite out the disconnected with b factor = 1 else 0

In [7]:
InchingLiteInteger.util.BasicPdbCifWriting( ref_structure_dir = pdbfn,         # Expect a pdb file directory
                        save_structure_dir = "haha.cif",        # Expect a pdb file directory
                        position = X_df[['x','y','z']].to_numpy()[None,:,:],   # Accepting a 3D tensor (t,n,3)
                        keepIds=True,
                        SaveFormat = 'cif', SaveSeparate = False, 
                        User_Bfactor = None)