# Notebook for the creation of the grid lsh hashes for both datasets

Sheet that converts the extracted data from the data/chosen_data folder to hashes that will be stored in data/hashed_data/disk

In [15]:
# Importing nescessary modules
import os
import sys
import shutil
from multiprocessing import Pool


def find_project_root(target_folder="masteroppgave"):
    """Find the absolute path of a folder by searching upward."""
    currentdir = os.path.abspath("__file__")  # Get absolute script path
    while True:
        if os.path.basename(currentdir) == target_folder:
            return currentdir  # Found the target folder
        parentdir = os.path.dirname(currentdir)
        if parentdir == currentdir:  # Stop at filesystem root
            return None
        currentdir = parentdir  # Move one level up

# Example usage
project_root = find_project_root("masteroppgave")

if project_root:
    sys.path.append(project_root)
    print(f"Project root found: {project_root}")
else:
    raise RuntimeError("Could not find 'masteroppgave' directory")

from utils.helpers.save_trajectory import save_trajectory_hashes
from utils.helpers import file_handler as fh
from utils.helpers import metafile_handler as mfh
from schemes.lsh_grid import GridLSH

Project root found: c:\Users\eivin\dev\JoonEndreLSH\masteroppgave


In [16]:
from constants import (
    P_MAX_LON,
    P_MIN_LON,
    P_MAX_LAT,
    P_MIN_LAT,
    R_MAX_LON,
    R_MIN_LON,
    R_MAX_LAT,
    R_MIN_LAT,
)

# Declaring global variables:

SHOULD_DELETE_OLD_FILES = True

PORTO_OUTPUT_FOLDER = "../../dataset/hashed_data/grid/porto/"
ROME_OUTPUT_FOLDER = "../../dataset/hashed_data/grid/rome/"

PORTO_DATA_FOLDER = "../../dataset/porto/output/"
ROME_DATA_FOLDER = "../../dataset/rome/output/"


# Rome LSH Grid


In [17]:
# Run this cell to clear the chosen files in the ROME folder

if SHOULD_DELETE_OLD_FILES:
    fh.delete_old_files(ROME_OUTPUT_FOLDER, ".gitkeep")

In [18]:
# Create Grid hash object for Rome and saves them to output folder. Also copies the metafiles denoting the different datasets

resolution = 1.2  # km
layers = 5
meta_file = f"{ROME_OUTPUT_FOLDER}META-50.txt"

GridRome = GridLSH(
    "Rome G1",
    R_MIN_LAT,
    R_MAX_LAT,
    R_MIN_LON,
    R_MAX_LON,
    resolution,
    layers,
    meta_file,
    ROME_DATA_FOLDER,
)

In [19]:
# Copying the meta_files:
meta_files = mfh.get_meta_files(ROME_DATA_FOLDER)

for filename in meta_files:
    shutil.copy(ROME_DATA_FOLDER + filename, ROME_OUTPUT_FOLDER)

# Generate the hashes and save them to output folder

hashes = GridRome.compute_dataset_hashes()

save_trajectory_hashes(ROME_OUTPUT_FOLDER, hashes)



# Porto LSH Grid


In [20]:
# Run this cell to clear the chosen files in the PORTO folder

if SHOULD_DELETE_OLD_FILES:
    fh.delete_old_files(PORTO_OUTPUT_FOLDER, ".gitkeep")

In [21]:
# Create Grid LSH objec for Porto

resolution = 1.6  # km
layers = 5
meta_file = f"{PORTO_OUTPUT_FOLDER}META-50.txt"

GridPorto = GridLSH(
    "Porto G1",
    P_MIN_LAT,
    P_MAX_LAT,
    P_MIN_LON,
    P_MAX_LON,
    resolution,
    layers,
    meta_file,
    PORTO_DATA_FOLDER,
)

In [23]:
# Copying the meta_files
meta_files = mfh.get_meta_files(PORTO_DATA_FOLDER)

for filename in meta_files:
    shutil.copy(PORTO_DATA_FOLDER + filename, PORTO_OUTPUT_FOLDER)
    
# Creating the hashes and saving them to output folder

hashes = GridPorto.compute_dataset_hashes()

save_trajectory_hashes(PORTO_OUTPUT_FOLDER, hashes)

