# Timing Analysis 

In [1]:
from timeit import timeit 
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_distances
from sklearn.neighbors import NearestNeighbors
from scipy import sparse
import scipy
import submodlib_cpp as subcp
from submodlib.helper import create_kernel
import random

## 1) CPP Helper V/S Python Helper

In [28]:
random.seed(0)
def generate_datapoint(arg):
    #random.seed(arg)
    x=random.randrange(1000)-random.randrange(1000)
    y=random.randrange(10)-random.randrange(10)
    z=(random.randrange(100)/100)-(random.randrange(100)/100)
    return (x,y,z)

l = list(map(generate_datapoint, range(100000)))#Generating 100K 3D datapoints
data = np.array(l)

l_record=[]

In [29]:
def fun1():# cpp_helper_euclidean (Non-vectorized, min-heap based approach)
    subcp.create_kernel(data.tolist(), "euclidean" ,num_neigh)
t=timeit('fun1', 'from __main__ import fun1')
l_record.append(("cpp_helper_euclidean", t))
print("cpp_helper_euclidean:", t,'\n')

cpp_helper_euclidean: 0.017577499999788415 



In [30]:
def fun2(): #python_helper_euclidean(vectorized knn clustering approach) 
    #Here I have put a part of inner code of python helper module instead of a function call to 
    #the module.This is because, latter involves one extra call to scipy's sparse matrix method which incurs
    #additional operations that aren't present in CPP helper module and would make comparision unfair.
    ED = euclidean_distances(data) 
    gamma = 1/np.shape(data)[1] 
    ES = np.exp(-ED* gamma) 
    nbrs = NearestNeighbors(n_neighbors=num_neigh, metric="euclidean").fit(data)
    _, ind = nbrs.kneighbors(data)
    ind_l = [(index[0],x) for index, x in np.ndenumerate(ind)]
    row, col = zip(*ind_l)
    mat = np.zeros(np.shape(ES))
    mat[row, col]=1
    ES_ = ES*mat
t=timeit('fun2', 'from __main__ import fun2')
l_record.append(("python_helper_euclidean", t))
print("python_helper_euclidean:", t,'\n')

python_helper_euclidean: 0.017012199999953737 



In [31]:
def fun3():# cpp_helper_cosine (Non-vectorized, min-heap based approach)
    subcp.create_kernel(data.tolist(), "cosine" ,num_neigh)
t=timeit('fun3', 'from __main__ import fun3')
l_record.append(("cpp_helper_cosine", t))
print("cpp_helper_cosine:", t,'\n')

cpp_helper_cosine: 0.015906500000710366 



In [32]:
def fun4(): #python_helper_cosine(vectorized knn clustering approach) 
    CS = cosine_similarity(data) 
    nbrs = NearestNeighbors(n_neighbors=num_neigh, metric="cosine").fit(data)
    _, ind = nbrs.kneighbors(data)
    ind_l = [(index[0],x) for index, x in np.ndenumerate(ind)]
    row, col = zip(*ind_l)
    mat = np.zeros(np.shape(CS))
    mat[row, col]=1
    CS_ = CS*mat
t=timeit('fun4', 'from __main__ import fun4')
l_record.append(("python_helper_cosine", t))
print("python_helper_cosine:", t,'\n')

python_helper_cosine: 0.0179097999998703 



**CPP Helper V/S Python Helper: SUMMARY**

In [33]:
df = pd.DataFrame(columns = ['name', 'time'],data=l_record)
df

Unnamed: 0,name,time
0,cpp_helper_euclidean,0.017577
1,python_helper_euclidean,0.017012
2,cpp_helper_cosine,0.015907
3,python_helper_cosine,0.01791
