<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Timing-Analysis" data-toc-modified-id="Timing-Analysis-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Timing Analysis</a></span><ul class="toc-item"><li><span><a href="#Getting-Large-randomly-generated-data" data-toc-modified-id="Getting-Large-randomly-generated-data-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Getting Large randomly generated data</a></span></li><li><span><a href="#1)-CPP-Helper-V/S-Python-Helper" data-toc-modified-id="1)-CPP-Helper-V/S-Python-Helper-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>1) CPP Helper V/S Python Helper</a></span></li><li><span><a href="#2)-FacilityLocation:-evaluate()-maximize()-and-marginalGain()-against-various-modes" data-toc-modified-id="2)-FacilityLocation:-evaluate()-maximize()-and-marginalGain()-against-various-modes-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>2) FacilityLocation: evaluate() maximize() and marginalGain() against various modes</a></span></li></ul></li></ul></div>

# Timing Analysis 

In [8]:
from timeit import timeit 
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_distances
from sklearn.neighbors import NearestNeighbors
from scipy import sparse
import scipy
import submodlib_cpp as subcp
from submodlib.helper import create_kernel
import random
from time import time

## Getting Large randomly generated data

In [9]:
data = None
try: #Simply load data if already created
    with open('/content/drive/MyDrive/submodlib_data/large_data.npy', 'rb') as f:
        data = np.load(f, allow_pickle=True)
except: #Create data
    random.seed(0)
    def generate_datapoint(arg):
        #random.seed(arg)
        #x=random.randrange(1000)-random.randrange(1000)
        #y=random.randrange(10)-random.randrange(10)
        #z=(random.randrange(100)/100)-(random.randrange(100)/100)
        l_feat = []
        for i in range(2000):
            toss = random.randrange(0, 2)
            if toss==0:
                bound_val=random.randrange(10, 10000)
                #val=random.randrange(bound_val)-random.randrange(bound_val)
                val=random.randrange(-1*bound_val, bound_val)
            else:
                val = random.randrange(-100, 100)/100

            l_feat.append(val)
        if arg%10000==0:
            print(arg)
        #return (x,y,z)
        return l_feat

    l = list(map(generate_datapoint, range(100000)))#Generating 100K datapoints
    data = np.array(l)
    
    with open('/content/drive/MyDrive/submodlib_data/large_data.npy', 'wb') as f:
        np.save(f,data, allow_pickle=True)

l_record=[]

In [19]:
np.shape(data)
n = 100000

## 1) CPP Helper V/S Python Helper

In [11]:
def fun1():# cpp_helper_euclidean (Non-vectorized, min-heap based approach)
    subcp.create_kernel(data.tolist(), "euclidean" ,num_neigh)
t=timeit('fun1', 'from __main__ import fun1')
l_record.append(("cpp_helper_euclidean", t))
print("cpp_helper_euclidean:", t,'\n')

cpp_helper_euclidean: 0.009766310000031808 



In [12]:
def fun2(): #python_helper_euclidean(vectorized knn clustering approach) 
    n_, K_dense = create_kernel(data, 'dense','euclidean')
t=timeit('fun2', 'from __main__ import fun2')
l_record.append(("python_helper_euclidean", t))
print("python_helper_euclidean:", t,'\n')

python_helper_euclidean: 0.017564612000001034 



In [13]:
def fun3():# cpp_helper_cosine (Non-vectorized, min-heap based approach)
    subcp.create_kernel(data.tolist(), "cosine" ,num_neigh)
t=timeit('fun3', 'from __main__ import fun3')
l_record.append(("cpp_helper_cosine", t))
print("cpp_helper_cosine:", t,'\n')

cpp_helper_cosine: 0.01104083300003822 



In [14]:
def fun4(): #python_helper_cosine(vectorized knn clustering approach) 
    n_, K_dense = create_kernel(data, 'dense','cosine')
t=timeit('fun4', 'from __main__ import fun4')
l_record.append(("python_helper_cosine", t))
print("python_helper_cosine:", t,'\n')

python_helper_cosine: 0.016881258000012167 



**CPP Helper V/S Python Helper: SUMMARY**

In [15]:
df = pd.DataFrame(columns = ['name', 'time'],data=l_record)
df

Unnamed: 0,name,time
0,cpp_helper_euclidean,0.009766
1,python_helper_euclidean,0.017565
2,cpp_helper_cosine,0.011041
3,python_helper_cosine,0.016881


## 2) FacilityLocation: evaluate() maximize() and marginalGain() against various modes

In [None]:
#Collab Session crashed while running this (memory overflow)
from submodlib.functions.facilityLocation import FacilityLocationFunction
obj = FacilityLocationFunction(n=n, data=data, mode="sparse", metric="euclidean", num_neigh=1000)
l_record=[]