## Discrimantory analysis of nanoparticles in implicit solvent 

In [2]:
import sys
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning, message='.*use @default decorator instead.*')
import numpy as np
import mdtraj as md
import matplotlib.pyplot as plt
from IPython.display import Image
import time
#%matplotlib inline

Loading specially designed analysis tools

In [3]:
sys.path.append("../Analysis")
from AnalysisTools import * 

Computing the average sphericity, packing fraction, morphability, and compressability of each nanoparticle.

In [19]:
ligs = ['1e','2m','3t','4r','5s','6s','7t','8t','9t','10v']

In [None]:
descriptors = []
t = time.time()
for l in ligs:
    traj = md.load(l+'/trajectory.dcd',top=l+"/snapshots.pdb")
    c, av_vol,vol = compressibilty(traj,('LIG','DYE'),skip=600)
    m, av_sasa = morphability(traj,('LIG','DYE'),skip=600)
    phi = sphericity(av_vol,av_sasa)
    trash, vol_dye = ResidueVolume(traj,'DYE')
    trash, vol_mol = ResidueVolume(traj,'LIG')
    pe = (vol_mol+vol_dye)/av_vol    # Packing efficiency
    descriptors.append([c,m,phi,pe])
t = time.time() - t
print "It look {0} seconds to process {1} simulations".format(t,len(ligs))

Standardizing the descriptors

In [None]:
def standardise(A):
    return(A - np.mean(A, axis=0)) / np.std(A, axis=0)

des = np.array(descriptors)
des = standardise(descriptors)

As extracting the descriptors takes around 2 hours, below is the save result:

In [4]:
des= np.array([[ 0.18263378, -0.33637152,  0.04829871, -0.40323382],
       [-0.92273344, -1.2758494 ,  1.27691654, -0.20621727],
       [ 0.33496161,  1.01195691,  0.77665493,  0.62534314],
       [ 1.221911  ,  0.98075983, -1.59226289, -2.56355541],
       [-1.63870599, -1.28957176, -0.10826687,  0.20862868],
       [-0.93046896, -0.41861486,  1.87472984,  0.85278088],
       [-0.38798065,  1.60757083, -0.31622351,  1.28498054],
       [ 1.46922317,  0.53137365, -0.20739172, -0.09577617],
       [-0.50835127,  0.38151164, -0.98523618,  0.57088601],
       [ 1.17951074, -1.19276532, -0.76721885, -0.27383658]])

# Cluster analysis
_Question_: is it possible to distinguish nanoparticle forming molecules an unsupervised way?

In [None]:
true_nanos =  ['3t','4r','6s','9t','10v']
false_nanos = ['1e','2m','5s','7t','8t']

## K-means

In [None]:
from sklearn.cluster import KMeans

**kmeans** with k=2

In [84]:
kclusts = KMeans(n_clusters=2,n_init=100)
clusts = kclusts.fit_predict(des)

print 'First group:', [ligs[ind] for ind in np.where(clusts==0)[0]]
print 'Second group:',[ligs[ind] for ind in np.where(clusts==1)[0]]

First group: ['2m', '5s', '6s']
Second group: ['1e', '3t', '4r', '7t', '8t', '9t', '10v']


While 5s and 6s don't form nanopartciles, the false positive rate is far too high for more conclusive results.

**kmeans** with k=3

In [87]:
kclusts = KMeans(n_clusters=3,n_init=150)
clusts = kclusts.fit_predict(des)

print 'First group:', [ligs[ind] for ind in np.where(clusts==0)[0]]
print 'Second group:',[ligs[ind] for ind in np.where(clusts==1)[0]]
print 'Third group:',[ligs[ind] for ind in np.where(clusts==2)[0]]

First group: ['2m', '5s', '6s']
Second group: ['1e', '3t', '7t', '8t', '9t', '10v']
Third group: ['4r']


Inconclusive results

## Hierarchical clustering

In [89]:
from sklearn.cluster import AgglomerativeClustering

In [91]:
hclusts = AgglomerativeClustering(n_clusters=2,linkage='ward')
clusts = hclusts.fit_predict(des)

print 'First group:', [ligs[ind] for ind in np.where(clusts==0)[0]]
print 'Second group:',[ligs[ind] for ind in np.where(clusts==1)[0]]

First group: ['1e', '3t', '4r', '7t', '8t', '9t', '10v']
Second group: ['2m', '5s', '6s']


Consistent with K means with 2 clusters.