
Before starting, remember to activate the environment:  <br>
**source env/bin/activate**




In [1]:
from intrinsic_dimension import intrinsic_dimension, section_id, secondary_structure_id
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import ListedColormap
import seaborn as sns
import logging
from moleculekit.molecule import Molecule

# Intrinsic Dimension


Computes the ID of the system over the entire molecular dynamics trajectory. <br>
If <span class="mark"><code>id_method</code></span> is ``local`` the function returns:
* the averaged value of instantaneous ID computed on the entire trajectory.
* the averaged value of instantaneous ID computed from the ``last`` frames to the end of the trajectory.
* the instantaneous ID computed frame by frame on the entire trajectory.

In [None]:
mean_all, mean_last, local_id = intrinsic_dimension(topology = 'examples/villin/2F4K.pdb', trajectory = 'examples/villin/2F4K_1.xtc', projection_method='Dihedral', id_method='local', verbose=False)

print('Mean instantaneous ID of the entire trajectory:', mean_all)
print('Mean instantaneous ID of the last 100 frames:', mean_last)
print('Istantaneous ID of the entire trajectory: \n', local_id[5:])

Mean instantaneous ID of the entire trajectory: 11.157584494989262
Mean instantaneous ID of the last 100 frames: 11.123019242450509
Istantaneous ID of the entire trajectory: 
 [11.41462822 11.41576278 11.15977586 ... 11.06020263 11.04114149
 11.77013457]


If <span class="mark"><code>id_method</code></span> is ``global`` the function returns:
* the value of global ID computed on the entire trajectory.
* the value of global ID computed on the ``last`` number of frames of the trajectory.


In [None]:
global_all, global_last = intrinsic_dimension(topology = 'examples/villin/2F4K.pdb', trajectory = 'examples/villin/2F4K_1.xtc', projection_method='Dihedral', id_method='global', verbose = False)

print('Global ID of the entire trajectory:', global_all)
print('Global ID of the last 100 frames:', global_last)

Global ID of the entire trajectory: 13.984373299745423
Global ID of the last 100 frames: 13.4134570887855


# Section ID

This function computes ID over **sliding windows** of a protein sequence.


**Additional Parameters**


- ``window_size`` (int): window length in residues (default = 10)
- ``stride`` (int): number of residues between windows (default = 1)

Returns a DataFrame.

In [None]:
results = section_id(topology = 'examples/villin/2F4K.pdb', trajectory = 'examples/villin/2F4K_1.xtc', window_size=15, stride=5 , projection_method='Dihedral', verbose =False)
print(f'ID table: \n {results.head()}')

ID table: 
    start  end  entire simulation  last simulation  \
0     42   56          10.570039         9.672545   
1     47   61           9.640473         9.017598   
2     52   66           9.933815         9.259813   
3     57   71           9.037672         8.861538   
4     62   76          11.017036        10.985726   

                                       instantaneous  
0  [8.414117105805504, 8.39326233568671, 6.782742...  
1  [9.241642994778248, 8.184672178727354, 9.80033...  
2  [9.353881004445087, 6.935150593639138, 9.70686...  
3  [10.083695605419093, 9.187159871326793, 9.5521...  
4  [11.016174986168613, 11.075169067215636, 11.76...  


# Secondary Structure ID


This function computes ID over ** secondary structure elements**.

**Additional Parameters**

- ``simplified`` (bool): if True (default), uses simplified DSSP codes coil (C), strand (S) or helix (H); else helix (H), beta bridge (B), extended strand (E), three helix (G), hydrogen bonded turn (T), bend (S), loop or irregular element ( ).

Returns

- A DataFrame with ID values per secondary structure
- A DataFrame with DSSP assignment per residue


In [None]:
results, secStr =secondary_structure_id(topology = 'examples/villin/2F4K.pdb', trajectory = 'examples/villin/2F4K_1.xtc', simplified = True , projection_method='Dihedral', id_method='local', verbose = False)

print(f'ID table:\n {results.head(5)}')
print(f'\n Secondary structure assignments:\n {secStr.head(5)}')

ID table:
    start  end sec str type  entire simulation  last simulation  \
0     42   76            C          11.157584        11.123019   

                                       instantaneous  
0  [11.600833536129166, 11.216419476809541, 11.42...  

 Secondary structure assignments:
    resid index resname sec str type
0           42     LEU            C
1           43     SER            C
2           44     ASP            C
3           45     GLU            C
4           46     ASP            C
