In [41]:
# Importing nescessary modules
from utils.similarity_measures import dtw
from utils.helpers import file_handler as fh
from utils.helpers import metafile_handler as mfh
import os
import sys
import shutil
import pandas as pd

currentdir = os.path.dirname(os.path.abspath("__file__"))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)

In [42]:
DATA_ROME = "../dataset/rome/output/"
TEST_SET_ROME = "../dataset/rome/output/META-50.txt"
FULL_SET_ROME = "../dataset/rome/output/META-1000.txt"
SIMILARITIES_OUTPUT_FOLDER_ROME = "../benchmarks/similarities/rome/"
DTW_FILENAME_ROME = "rome-dtw-fullset.csv"
DTW_FILENAME_TEST_ROME = "rome-dtw-testset.csv"

DATA_PORTO = "../dataset/porto/output/"
TEST_SET_PORTO = "../dataset/porto/output/META-50.txt"
FULL_SET_PORTO = "../dataset/porto/output/META-1000.txt"
SIMILARITIES_OUTPUT_FOLDER_PORTO = "../benchmarks/similarities/porto/"
DTW_FILENAME_PORTO = "porto-dtw-fullset.csv"
DTW_FILENAME_TEST_PORTO = "porto-dtw-testset.csv"

DATA_KOLUMBUS = "../dataset/kolumbus/output/"
TEST_SET_KOLUMBUS = "../dataset/kolumbus/output/META-50.txt"
FULL_SET_KOLUMBUS = "../dataset/kolumbus/output/META-1000.txt"
SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS = "../benchmarks/similarities/kolumbus/"
DTW_FILENAME_KOLUMBUS = "../kolumbus-dtw-fullset.csv"
DTW_FILENAME_TEST_KOLUMBUS = "../kolumbus-dtw-testset.csv"

In [43]:
def deleteFile(file_name: str, folder_name: str) -> None:
    file_path = os.path.join(folder_name, file_name)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print("Failed to remove %s. Reason: %s" % (file_path, e))


def romeSet(file_size: int) -> str:
    return f"../dataset/rome/output/META-{file_size}.txt"


def portoSet(file_size: int) -> str:
    return f"../dataset/porto/output/META-{file_size}.txt"

In [44]:
# Using Cython DTW, to speed things up


def generate_dtw_similarities(
    data_folder: str, meta_file: str, file_name: str, similarities_output_folder: str
):
    deleteFile(file_name, similarities_output_folder)

    files = mfh.read_meta_file(meta_file)
    trajectories = fh.load_trajectory_files(files, data_folder)

    df = dtw.cy_dtw(trajectories)

    df.to_csv(os.path.join(similarities_output_folder, file_name))


def generate_parallell_dtw_similarities(
    data_folder: str, meta_file: str, file_name: str, similarities_output_folder: str
):
    deleteFile(file_name, similarities_output_folder)

    files = mfh.read_meta_file(meta_file)
    trajectories = fh.load_trajectory_files(files, data_folder)

    df = dtw.cy_dtw_pool(trajectories)
    df.to_csv(os.path.join(similarities_output_folder, file_name))

# DTW similarities for Rome


In [19]:
# DTW similarities for Rome

# Test set
# generate_dtw_similarities(
#     DATA_ROME, TEST_SET_ROME, DTW_FILENAME_TEST_ROME, SIMILARITIES_OUTPUT_FOLDER_ROME
# )
generate_parallell_dtw_similarities(
    DATA_ROME, TEST_SET_ROME, DTW_FILENAME_TEST_ROME, SIMILARITIES_OUTPUT_FOLDER_ROME
)

# Full set
# generate_dtw_similarities(DATA_ROME, FULL_SET_ROME, DTW_FILENAME_ROME)
generate_parallell_dtw_similarities(
    DATA_ROME, FULL_SET_ROME, DTW_FILENAME_ROME, SIMILARITIES_OUTPUT_FOLDER_ROME
)

# DTW similarities for Porto


In [47]:
# DTW similarities for Porto
# Test set
# generate_dtw_similarities(
#     DATA_PORTO,
#     TEST_SET_PORTO,
#     DTW_FILENAME_TEST_PORTO,
#     SIMILARITIES_OUTPUT_FOLDER_PORTO,
# )
generate_parallell_dtw_similarities(
    DATA_PORTO,
    TEST_SET_PORTO,
    DTW_FILENAME_TEST_PORTO,
    SIMILARITIES_OUTPUT_FOLDER_PORTO,
)

# Full set
# generate_dtw_similarities(
#     DATA_PORTO, FULL_SET_PORTO, DTW_FILENAME_PORTO, SIMILARITIES_OUTPUT_FOLDER_PORTO
# )
generate_parallell_dtw_similarities(
    DATA_PORTO, FULL_SET_PORTO, DTW_FILENAME_PORTO, SIMILARITIES_OUTPUT_FOLDER_PORTO
)

# DTW similarities for Kolumbus


In [21]:
# DTW similarities for KOLUMBUS
# Test set
# generate_dtw_similarities(DATA_KOLUMBUS, TEST_SET_KOLUMBUS, DTW_FILENAME_TEST_KOLUMBUS, SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS)
generate_parallell_dtw_similarities(
    DATA_KOLUMBUS,
    TEST_SET_KOLUMBUS,
    DTW_FILENAME_TEST_KOLUMBUS,
    SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS,
)

# Full set
# generate_dtw_similarities(
#     DATA_PORTO, FULL_SET_PORTO, DTW_FILENAME_PORTO, SIMILARITIES_OUTPUT_FOLDER_PORTO
# )
# generate_dtw_similarities(DATA_KOLUMBUS, FULL_SET_KOLUMBUS, DTW_FILENAME_KOLUMBUS, SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS)
generate_parallell_dtw_similarities(
    DATA_KOLUMBUS,
    FULL_SET_KOLUMBUS,
    DTW_FILENAME_KOLUMBUS,
    SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS,
)

# Time measurement


In [26]:
# Time Efficiency
from multiprocessing import Pool

# Using Python, for measuring computation time fairly against the hash computations

sim = {
    "dtw_py": dtw.measure_py_dtw,
    # "frechet_py" : frechet.measure_py_frechet,
    "dtw_cy": dtw.measure_cy_dtw,
    # "frechet_cy" : frechet.measure_cy_frechet
}


def measure_similarities(
    measure: str, data_folder: str, meta_file: str, parallell_jobs: int = 10
):
    """Common method for measuring the efficiency of the similarity algorithms"""
    files = mfh.read_meta_file(meta_file)
    trajectories = fh.load_trajectory_files(files, data_folder)

    with Pool() as pool:
        result = pool.map(sim[measure], [[trajectories, 1, 1]
                          for _ in range(10)])
    return result

In [27]:
measure_similarities("dtw_py", DATA_PORTO, portoSet(50))

[[8.573358],
 [7.394511999999999],
 [7.455424999999999],
 [7.920072],
 [7.530314],
 [7.440797],
 [7.554117000000001],
 [7.3441089999999996],
 [7.400907999999999],
 [8.389637]]

In [28]:
measure_similarities("dtw_cy", DATA_PORTO, portoSet(1000))

[[81.843833],
 [81.40755],
 [81.003604],
 [83.63785],
 [81.311147],
 [81.403529],
 [84.997414],
 [80.710053],
 [81.303134],
 [81.381238]]

In [29]:
# Measuring the computation times of true similarities

runs = 10
data_sets = range(500, 501, 100)
output_folder = "../benchmarks/similarities/timing"
file_name = "similarity_runtimes_true_dtw_rome_500.csv"

df = pd.DataFrame(
    index=[f"run_{x+1}" for x in range(runs)], columns=[x for x in data_sets]
)

for size in data_sets:
    print(f"Computing size {size}", end="\r")
    execution_times = measure_similarities(
        "dtw_py", DATA_PORTO, portoSet(size), parallell_jobs=runs
    )
    df[size] = [element[0] for element in execution_times]

df.to_csv(os.path.join(output_folder, file_name))
df
of = pd.read_csv(
    "../benchmarks/similarities/rome/similarity_runtimes_true_dtw_rome.csv", index_col=0
)
f1 = pd.read_csv(
    "../benchmarks/similarities/rome/similarity_runtimes_true_dtw_rome_500.csv",
    index_col=0,
)

# of["500"] = f1["500"]
# of["700"] = f1["700"]

of.to_csv("../benchmarks/similarities/timing/similarity_runtimes_true_dtw_rome.csv")
20

Computing size 500

KeyboardInterrupt: 