# Numpy Distance versus Tensorflow 

In [6]:
from MDAnalysis.core.distances import distance_array, self_distance_array
from MDAnalysis.analysis.distances import contact_matrix
import scipy.sparse
from scipy.spatial.distance import cdist
import numpy as np
import time, os, sys, gc
import datetime
import logging
logger = logging.getLogger("py4j")
logger.setLevel(logging.ERROR)

# Small Test Data

In [8]:
x = np.array((np.random.rand(10, 3)*10).astype(int))

# Numpy Broadcast Version

In [9]:
def dist(x): 
    return np.sqrt(((x[:, None, :] - x) ** 2).sum(-1))

In [10]:
dist(x)

array([[  0.        ,  10.72380529,   7.34846923,   4.12310563,
          3.        ,   8.30662386,   6.40312424,   7.68114575,
          5.        ,   7.14142843],
       [ 10.72380529,   0.        ,  11.35781669,   9.48683298,
          9.69535971,   6.164414  ,   9.05538514,   9.16515139,
          6.78232998,   6.        ],
       [  7.34846923,  11.35781669,   0.        ,   3.60555128,
          8.66025404,   8.66025404,   7.68114575,   5.        ,
          9.43398113,   5.38516481],
       [  4.12310563,   9.48683298,   3.60555128,   0.        ,
          5.09901951,   7.34846923,   5.09901951,   4.24264069,
          6.        ,   4.24264069],
       [  3.        ,   9.69535971,   8.66025404,   5.09901951,
          0.        ,   9.16515139,   4.47213595,   7.07106781,
          3.16227766,   7.34846923],
       [  8.30662386,   6.164414  ,   8.66025404,   7.34846923,
          9.16515139,   0.        ,  10.19803903,   9.48683298,
          7.34846923,   4.69041576],
       [  

# Scikit Learn

In [11]:
from sklearn.metrics.pairwise import euclidean_distances
euclidean_distances(x,x)

array([[  0.        ,  10.72380529,   7.34846923,   4.12310563,
          3.        ,   8.30662386,   6.40312424,   7.68114575,
          5.        ,   7.14142843],
       [ 10.72380529,   0.        ,  11.35781669,   9.48683298,
          9.69535971,   6.164414  ,   9.05538514,   9.16515139,
          6.78232998,   6.        ],
       [  7.34846923,  11.35781669,   0.        ,   3.60555128,
          8.66025404,   8.66025404,   7.68114575,   5.        ,
          9.43398113,   5.38516481],
       [  4.12310563,   9.48683298,   3.60555128,   0.        ,
          5.09901951,   7.34846923,   5.09901951,   4.24264069,
          6.        ,   4.24264069],
       [  3.        ,   9.69535971,   8.66025404,   5.09901951,
          0.        ,   9.16515139,   4.47213595,   7.07106781,
          3.16227766,   7.34846923],
       [  8.30662386,   6.164414  ,   8.66025404,   7.34846923,
          9.16515139,   0.        ,  10.19803903,   9.48683298,
          7.34846923,   4.69041576],
       [  

In [12]:
from scipy.spatial.distance import pdist
pdist(x)

array([ 10.72380529,   7.34846923,   4.12310563,   3.        ,
         8.30662386,   6.40312424,   7.68114575,   5.        ,
         7.14142843,  11.35781669,   9.48683298,   9.69535971,
         6.164414  ,   9.05538514,   9.16515139,   6.78232998,
         6.        ,   3.60555128,   8.66025404,   8.66025404,
         7.68114575,   5.        ,   9.43398113,   5.38516481,
         5.09901951,   7.34846923,   5.09901951,   4.24264069,
         6.        ,   4.24264069,   9.16515139,   4.47213595,
         7.07106781,   3.16227766,   7.34846923,  10.19803903,
         9.48683298,   7.34846923,   4.69041576,   3.74165739,
         5.09901951,   6.4807407 ,   7.34846923,   4.89897949,   6.164414  ])

# MD Analysis

In [18]:
def compute_distance_mdanalysis(coord):
    start = time.time()
    contact_matrix(coord, returntype="sparse")
    result="ComputeDistanceMDAnalysisSparse, %d, %.2f"%(len(coord), (time.time()-start))
    return result

# Tensorflow

In [None]:
import tensorflow as tf
import numpy as np

def compute_distance_tf(coord):
    start = time.time()
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    matrix1 = tf.convert_to_tensor(coord.astype("float32"))
    matrix_extend=tf.expand_dims(matrix1, 1)
    dist_matrix=tf.sub(matrix_extend, matrix1)
    dist_matrix_pow=tf.pow(dist_matrix, 2)
    dist_matrix_pow_red=tf.reduce_sum(dist_matrix_pow, 2)
    dist_matrix_euc=tf.sqrt(dist_matrix_pow_red)
    rc=sess.run(dist_matrix_euc)
    result="ComputeDistanceTensorflow, %d, %.2f"%(len(coord), (time.time()-start))
    sess.close()
    return result

In [22]:
compute_distance_tf(x)

'ComputeDistanceTensorflow, 10, 0.06'

# Benchmark

In [None]:
DATA_PATH="../../data/mdanalysis/synthetic/traj/"
files=[os.path.join(DATA_PATH, i) for i in os.listdir(DATA_PATH)]

for file_name in files:
    print "Process: " + file_name
    coord = np.loadtxt(file_name, dtype='float32')
    start = time.time()
    result=compute_distance_tf(coord)
    print result

Process: ../../data/mdanalysis/synthetic/traj/10.np_txt
ComputeDistanceTensorflow, 10, 0.10
Process: ../../data/mdanalysis/synthetic/traj/100.np_txt
ComputeDistanceTensorflow, 100, 0.05