In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm.notebook import tqdm

In [None]:
from pyEulerCurves import ECC_from_pointcloud, plot_euler_curve, difference_ECC

# Some parameters

In [None]:
NUMBER_OF_POINTS = 50
NUMBER_OF_SAMPLES = 200

EPSILON = 0.5

# Sample points from the unit circle


In [None]:
point_clouds = []

random.seed(42)
for n in range(NUMBER_OF_SAMPLES):
    this_cloud = []
    
    for i in range(NUMBER_OF_POINTS):
        angle = random.uniform(0,2*np.pi)
        this_cloud.append([np.cos(angle), np.sin(angle)])
    
    point_clouds.append(np.array(this_cloud))

# Compute ECCs

In [None]:
trans = ECC_from_pointcloud(epsilon=EPSILON, # max filtration
                            workers=1    # number of CPU cores
                           )

list_of_ECC = [trans.fit_transform(cloud) for cloud in tqdm(point_clouds)]

# Compute distance matrix

## Naive way with double for loop

In [None]:
%%time

distance_matrix = np.zeros((NUMBER_OF_SAMPLES, NUMBER_OF_SAMPLES))

for i in range(len(list_of_ECC)):
    for j in range(i+1, len(list_of_ECC)):
        distance_matrix[i,j] = difference_ECC(list_of_ECC[i], list_of_ECC[j], max_f = EPSILON)
        distance_matrix[j,i] = distance_matrix[i,j]

## Compute one row at a time

In [None]:
# the matrix is symmetric, we only compute upper triangular part
def compute_row_distance_matrix(list_of_ECC, i, max_f):
    return [difference_ECC(list_of_ECC[i], other_ECC, max_f) 
            for other_ECC in list_of_ECC[i+1:]]

In [None]:
%%time

distance_matrix2 = np.zeros((NUMBER_OF_SAMPLES, NUMBER_OF_SAMPLES))

for i in range(len(list_of_ECC)):
    distance_matrix2[i, i+1:] = compute_row_distance_matrix(list_of_ECC, i, max_f = EPSILON)
    distance_matrix2[i+1: ,i] = distance_matrix2[i, i+1:]

In [None]:
# check that we computed the same thing
(distance_matrix == distance_matrix2).all()

## Compute one row at a time, in parallel

In [None]:
import itertools
from concurrent.futures import ProcessPoolExecutor

In [None]:
%%time

# If max_workers is None or not given, it will default to the number of processors on the machine.
with ProcessPoolExecutor(max_workers=None) as executor:
    distance_list = executor.map(
        compute_row_distance_matrix,
        itertools.repeat(list_of_ECC),
        [i for i in range(len(list_of_ECC))],
        itertools.repeat(EPSILON),
        )
    
distance_matrix3 = np.zeros((NUMBER_OF_SAMPLES, NUMBER_OF_SAMPLES))
for i, row in enumerate(distance_list):
    distance_matrix3[i, i+1:] = row
    distance_matrix3[i+1: ,i] = row


In [None]:
# check that we computed the same thing
(distance_matrix == distance_matrix3).all()