In [11]:
from numpy import mean, std, array
from numpy.random import normal, uniform, exponential, poisson
from app.algorithms.point import Point
from app.algorithms.interface import ALGORITHMS
import time
from typing import Callable

DISTRIBUTIONS = {
    "uniform": lambda dimensionality, cardinality: [Point(uniform(0,2, size=(dimensionality,))) for _ in range(cardinality)],
    "normal": lambda dimensionality, cardinality: [Point(normal(1,1, size=(dimensionality,))) for _ in range(cardinality)],
    "exponential": lambda dimensionality, cardinality: [Point(exponential(1, size=(dimensionality,))) for _ in range(cardinality)],
    "poisson": lambda dimensionality, cardinality: [Point(poisson(2, size=(dimensionality,))) for _ in range(cardinality)]
}



def run_algorithm(
        distribution: str, 
        datasets: list[list[Point]], 
        dimensionality: int,
        repeats: int, 
        algorithm: str, 
        cardinality: int,
        sorted: bool = False
        ) -> dict[str, any]:
    times = []
    comparison_point_counter = []
    comparison_coordinates_counter = []
    for dataset in datasets:
        Point.reset_counter()
        start = time.perf_counter()

        ALGORITHMS[algorithm](dataset)

        times.append(time.perf_counter() - start)
        comparison_point_counter.append(Point.get_global_point_counter())
        comparison_coordinates_counter.append(Point.get_global_coordinate_counter())

    return {
        "distribution": distribution,
        "algorithm": algorithm,
        "dimensionality": dimensionality,
        "cardinality": cardinality,
        "sorted": sorted,
        "times": mean(times),
        "comparison_point_counter": mean(comparison_point_counter, dtype=int),
        "comparison_coordinates_counter": mean(comparison_coordinates_counter, dtype=int)
    }

In [15]:
import pandas as pd
from app.algorithms.interface import ALGORITHMS
df = pd.DataFrame({
    "distribution": [],
    "algorithm": [],
    "dimensionality": [],
    "cardinality": [],
    "times": [],
    "comparison_point_counter": [],
    "comparison_coordinates_counter": []
})

dicts = []

for distribution in DISTRIBUTIONS:
    for repeats, cardinality in [(1,100), (1, 1000)]:
        for dimensionality in [2, 3, 4, 5, 10]:
            for is_sorted in [False, True]:
                datasets = [DISTRIBUTIONS[distribution](dimensionality, cardinality) for _ in range(repeats)]
                for algorithm in ALGORITHMS:
                    if is_sorted: datasets = [sorted(dataset, key=lambda point: point.x[0]) for dataset in datasets]
                    dicts += [run_algorithm(distribution, datasets, dimensionality, repeats, algorithm, cardinality, is_sorted)]


In [22]:
# make dataframe from list of dicts
df = pd.DataFrame(dicts)
def parse(dict) -> None:
    dict["parsed"] = f"t:{format(dict['times'], '.4f')}\n pc:{dict['comparison_point_counter']}\n cc:{dict['comparison_coordinates_counter']}"

for dc in dicts:
    parse(dc)
df_ = pd.DataFrame(dicts)
df_.to_csv("benchmark__.csv", index=False)
