# Notebook for computing hashes, buckets and similarity values for the disk scheme. 

Utilizes the disk scheme

Incorporates:
* Hashing of trajectories using disk scheme
* Bucketing of hashes made from disk scheme
* Similarity computation between trajectories within buckets.
    * Both for DTW and Frechet

Produces:
* JSON file containing buckets
* Similarity values for trajectories within buckets


In [4]:
import os
import sys

def find_project_root(target_folder="masteroppgave"):
    """Find the absolute path of a folder by searching upward."""
    currentdir = os.path.abspath("__file__")  # Get absolute script path
    while True:
        if os.path.basename(currentdir) == target_folder:
            return currentdir  # Found the target folder
        parentdir = os.path.dirname(currentdir)
        if parentdir == currentdir:  # Stop at filesystem root
            return None
        currentdir = parentdir  # Move one level up

# Example usage
project_root = find_project_root("masteroppgave")

if project_root:
    sys.path.append(project_root)
    print(f"Project root found: {project_root}")
else:
    raise RuntimeError("Could not find 'masteroppgave' directory")


from computation.disk_similarity import generate_disk_hash_similarity

Project root found: c:\Users\eivin\dev\JoonEndreLSH\masteroppgave


# Rome


### DTW


In [5]:
from computation.disk_similarity import generate_disk_hash_similarity_with_bucketing


measure = "dtw"
similarities, bucket_system = generate_disk_hash_similarity_with_bucketing(
    city="rome", diameter=0.6, layers=5, disks=50, measure=measure, size=50
)

# print all elements in bucket_system
for key, value in bucket_system.items():
    print(key, value)


output_path = f"similarity_values/disk/rome/disk_rome-{measure}.csv"
# similarities.to_csv(os.path.abspath(output_path))

166744161221902542004058533148384088477 ['R_CAV']
233957727560782777306570829571576023499 ['R_CAV']
238046696725579720171126480913544027419 ['R_CAV']
256898292949653608219959073610588720367 ['R_CAV']
123947719320887122190760302096278072734 ['R_CAV']
129267158742228495387463226954784235435 ['R_DYX']
207453888360302109591966988229751767438 ['R_DYX', 'R_AKY', 'R_BCU']
106494560717719836131624178699739026244 ['R_DYX']
57667203397453069380187837130983884876 ['R_DYX']
283570291307918985970376025804178423236 ['R_DYX']
327546917972349610422090858210987595069 ['R_CDU']
9216697131837469474951491715826800914 ['R_CDU']
163775299575882938059641363338897073370 ['R_CDU']
72560857223428224986671329813415899098 ['R_CDU']
217212618058687440290238332911603007584 ['R_CDU']
274587334513285851696185494465980127684 ['R_ECN']
71651125947262871998529802971428410439 ['R_ECN']
219226619652616272735184385813593318425 ['R_ECN']
275153935943731799786804106966955931660 ['R_ECN']
3365169236185034007921072489735109866

### Frechet


In [None]:
measure = "frechet"
similarities = generate_disk_hash_similarity(
    city="rome", diameter=1.6, layers=5, disks=100, measure=measure, size=500
)
output_path = f"similarity_values/disk/rome/disk_rome-{measure}.csv"
# similarities.to_csv(os.path.abspath(output_path))

# Porto


In [None]:
measure = "dtw"
similarities = generate_disk_hash_similarity(
    "porto", diameter=2.2, layers=4, disks=60, measure=measure, size=50
)
output_path = f"similarity_values/disk/porto/disk_porto-{measure}.csv"
similarities.to_csv(os.path.abspath(output_path))