# Ensemble Analysis

This example compares experimental structural data analyzed using Principal Component Analysis (PCA) with the theoretical data predicted by Anisotropic Network Model (ANM):

## Retrieve dataset

In [None]:
from prody import *
from numpy import *
from matplotlib.pyplot import *
%matplotlib inline

In [None]:
p38 = parsePDB('1p38') # MAP KINASE

In [None]:
p38_sequence = p38.getHierView()['A'].getSequence()

In [None]:
p38_sequence

In [None]:
blast_record = blastPDB(p38_sequence)

In [None]:
pdbids = blast_record.getHits(percent_identity=90, percent_overlap=70)
len(list(pdbids))

In [None]:
pathPDBFolder('./PDB')

In [None]:
pdbfiles = fetchPDB(*pdbids)

## Set reference chain

In [None]:
ref_structure = parsePDB('1p38')
ref_selection = ref_structure.select('resnum 5 to 31 36 to 114 122 to '
                                     '169 185 to 351 and calpha')

In [None]:
ref_chain = ref_selection.getHierView().getChain('A')
repr(ref_chain)

## Ensemble Preparation

We will prepare a **PDBEnsemble** by mapping each structure against the reference chain and adding a coordinates set corresponding to the mapped atoms.

In [None]:
ensemble = PDBEnsemble('p38 X-ray')

In [None]:
ensemble.setAtoms(ref_chain) # Set ensemble atoms
ensemble.setCoords(ref_chain) # Set reference coordinates
# The reference structure is automatically the first member of list provided, which in this case is 1p38.

In [None]:
for pdbfile in pdbfiles:
    structure = parsePDB(pdbfile, subset='calpha')
    if structure is None:
        plog('Failed to parse ' + pdb_file)
        continue
    mappings = mapOntoChain(structure, ref_chain)
    if len(mappings) == 0:
        plog('Failed to map', structure.getTitle()[:4])
        continue
    atommap = mappings[0][0]
    ensemble.addCoordset(atommap, weights=atommap.getFlags('mapped'))

In [None]:
repr(ensemble)

In [None]:
len(ensemble) == len(pdbfiles)

In [None]:
ensemble.iterpose()

## PCA calculation

In [None]:
pca = PCA('p38 xray')           # Instantiate a PCA instance

pca.buildCovariance(ensemble)   # Build covariance for the ensemble

pca.calcModes()                 # Calculate modes (20 of the by default)

## ANM calculation

In [None]:
anm = ANM('1p38')             # Instantiate a ANM instance

anm.buildHessian(ref_chain)   # Build Hessian for the reference chain

anm.calcModes()               # Calculate slowest non-trivial 20 modes

## Save your work

In [None]:
saveModel(pca)
saveModel(anm)
saveEnsemble(ensemble)
writePDB('p38_ref_chain.pdb', ref_chain)

## Fractional Variance

In [None]:
for mode in pca[:3]:    # Print % variance explained by top PCs
    var = calcFractVariance(mode)*100
    print('{0:s}  % variance = {1:.2f}'.format(mode, var))

## Collectivity of modes

In [None]:
for mode in pca[:3]:    # Print PCA mode collectivity
    coll = calcCollectivity(mode)
    print('{0:s}  collectivity = {1:.2f}'.format(mode, coll))

In [None]:
for mode in anm[:3]:    # Print ANM mode collectivity
    coll = calcCollectivity(mode)
    print('{0:s}  collectivity = {1:.2f}'.format(mode, coll))

## PCA - ANM overlap

In [None]:
printOverlapTable(pca[:3], anm[:3]) # Top 3 PCs vs slowest 3 ANM modes

In [None]:
showOverlapTable(pca[:6], anm[:6]);
title('PCA - ANM Overlap Table');

In [None]:
showOverlap(pca[0], anm);

In [None]:
showCumulOverlap(pca[0], anm);

## Square Fluctuations

In [None]:
showSqFlucts(pca[:3]);

In [None]:
showSqFlucts(anm[:3]);

In [None]:
showScaledSqFlucts(pca[0], anm[2]);
legend();

In [None]:
showNormedSqFlucts(pca[0], anm[1]);
legend();

## Cross Correlations

In [None]:
showCrossCorr(anm)

In [None]:
showCrossCorr(pca)

## Preparation of VMD Output

In [None]:
writeNMD('p38_anm.nmd',anm,ref_chain)

In [None]:
writeNMD('p38_pca.nmd',pca,ref_chain)