In [1]:
# Importing nescessary modules
import os, sys
import shutil

currentdir = os.path.dirname(os.path.abspath("__file__"))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)

from utils.helpers import file_handler as fh
from utils.helpers import metafile_handler as mfh
from utils.similarity_measures import dtw

In [2]:
from constants import PORTO_OUTPUT_FOLDER, ROME_OUTPUT_FOLDER, KOLUMBUS_OUTPUT_FOLDER, SIMILARITIES_OUTPUT_FOLDER_PORTO, SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS, SIMILARITIES_OUTPUT_FOLDER_ROME, NUMBER_OF_TRAJECTORIES

DATA_ROME = f"../{ROME_OUTPUT_FOLDER}/"
TEST_SET_ROME = f"../{ROME_OUTPUT_FOLDER}/META-50.txt"
FULL_SET_ROME = f"../{ROME_OUTPUT_FOLDER}/META-{NUMBER_OF_TRAJECTORIES}.txt"
SIMILARITIES_OUTPUT_FOLDER_ROME = f"../{SIMILARITIES_OUTPUT_FOLDER_ROME}/"
DTW_FILENAME_ROME = f"rome-dtw-{NUMBER_OF_TRAJECTORIES}.csv"
DTW_FILENAME_TEST_ROME = "rome-dtw-50.csv"

DATA_PORTO = f"../{PORTO_OUTPUT_FOLDER}/"
TEST_SET_PORTO = f"../{PORTO_OUTPUT_FOLDER}/META-50.txt"
FULL_SET_PORTO = f"../{PORTO_OUTPUT_FOLDER}/META-{NUMBER_OF_TRAJECTORIES}.txt"
SIMILARITIES_OUTPUT_FOLDER_PORTO = f"../{SIMILARITIES_OUTPUT_FOLDER_PORTO}/"
DTW_FILENAME_PORTO = f"porto-dtw-{NUMBER_OF_TRAJECTORIES}.csv"
DTW_FILENAME_TEST_PORTO = "porto-dtw-50.csv"

DATA_KOLUMBUS = f"../{KOLUMBUS_OUTPUT_FOLDER}/"
TEST_SET_KOLUMBUS = f"../{KOLUMBUS_OUTPUT_FOLDER}/META-50.txt"
FULL_SET_KOLUMBUS = f"../{KOLUMBUS_OUTPUT_FOLDER}/META-{NUMBER_OF_TRAJECTORIES}.txt"
SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS = f"../{SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS}/"
DTW_FILENAME_KOLUMBUS = f"kolumbus-dtw-{NUMBER_OF_TRAJECTORIES}.csv"
DTW_FILENAME_TEST_KOLUMBUS = "../kolumbus-dtw-50.csv"

In [3]:
def deleteFile(file_name: str, folder_name: str) -> None:
    file_path = os.path.join(folder_name, file_name)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print("Failed to remove %s. Reason: %s" % (file_path, e))

In [4]:
# Using Cython DTW, to speed things up


def generate_dtw_similarities(
    data_folder: str, meta_file: str, file_name: str, similarities_output_folder: str
):
    deleteFile(file_name, similarities_output_folder)

    files = mfh.read_meta_file(meta_file)
    trajectories = fh.load_trajectory_files(files, data_folder)

    df = dtw.cy_dtw(trajectories)

    df.to_csv(os.path.join(similarities_output_folder, file_name))


def generate_parallell_dtw_similarities(
    data_folder: str, meta_file: str, file_name: str, similarities_output_folder: str
):
    deleteFile(file_name, similarities_output_folder)

    files = mfh.read_meta_file(meta_file)
    trajectories = fh.load_trajectory_files(files, data_folder)

    df = dtw.cy_dtw_pool(trajectories)
    df.to_csv(os.path.join(similarities_output_folder, file_name))

# DTW similarities for Rome


In [None]:
# DTW similarities for Rome

# Test set
# generate_dtw_similarities(
#     DATA_ROME, TEST_SET_ROME, DTW_FILENAME_TEST_ROME, SIMILARITIES_OUTPUT_FOLDER_ROME
# )
# generate_parallell_dtw_similarities(
#     DATA_ROME, TEST_SET_ROME, DTW_FILENAME_TEST_ROME, SIMILARITIES_OUTPUT_FOLDER_ROME
# )

# Full set
# generate_dtw_similarities(DATA_ROME, FULL_SET_ROME, DTW_FILENAME_ROME)
generate_parallell_dtw_similarities(
    DATA_ROME, FULL_SET_ROME, DTW_FILENAME_ROME, SIMILARITIES_OUTPUT_FOLDER_ROME
)

# DTW similarities for Porto


In [6]:
# DTW similarities for Porto
# Test set
# generate_dtw_similarities(
#     DATA_PORTO,
#     TEST_SET_PORTO,
#     DTW_FILENAME_TEST_PORTO,
#     SIMILARITIES_OUTPUT_FOLDER_PORTO,
# )
# generate_parallell_dtw_similarities(
#     DATA_PORTO,
#     TEST_SET_PORTO,
#     DTW_FILENAME_TEST_PORTO,
#     SIMILARITIES_OUTPUT_FOLDER_PORTO,
# )

# Full set
generate_dtw_similarities(
    DATA_PORTO, FULL_SET_PORTO, DTW_FILENAME_PORTO, SIMILARITIES_OUTPUT_FOLDER_PORTO
)
# generate_parallell_dtw_similarities(
#     DATA_PORTO, FULL_SET_PORTO, DTW_FILENAME_PORTO, SIMILARITIES_OUTPUT_FOLDER_PORTO
# )

# DTW similarities for Kolumbus


In [8]:
# DTW similarities for KOLUMBUS
# Test set
# generate_dtw_similarities(DATA_KOLUMBUS, TEST_SET_KOLUMBUS, DTW_FILENAME_TEST_KOLUMBUS, SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS)
# generate_parallell_dtw_similarities(
#     DATA_KOLUMBUS,
#     TEST_SET_KOLUMBUS,
#     DTW_FILENAME_TEST_KOLUMBUS,
#     SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS,
# )

# Full set
# generate_dtw_similarities(
#     DATA_PORTO, FULL_SET_PORTO, DTW_FILENAME_PORTO, SIMILARITIES_OUTPUT_FOLDER_PORTO
# )
# generate_dtw_similarities(DATA_KOLUMBUS, FULL_SET_KOLUMBUS, DTW_FILENAME_KOLUMBUS, SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS)
generate_parallell_dtw_similarities(
    DATA_KOLUMBUS,
    FULL_SET_KOLUMBUS,
    DTW_FILENAME_KOLUMBUS,
    SIMILARITIES_OUTPUT_FOLDER_KOLUMBUS,
)

# Time measurement


In [8]:
def romeSet(file_size: int) -> str:
    return f"../dataset/rome/output/META-{file_size}.txt"


def portoSet(file_size: int) -> str:
    return f"../dataset/porto/output/META-{file_size}.txt"

In [9]:
# Time Efficiency
from multiprocessing import Pool

# Using Python, for measuring computation time fairly against the hash computations

sim = {
    "dtw_py": dtw.measure_py_dtw,
    # "frechet_py" : frechet.measure_py_frechet,
    "dtw_cy": dtw.measure_cy_dtw,
    # "frechet_cy" : frechet.measure_cy_frechet
}


def measure_similarities(
    measure: str, data_folder: str, meta_file: str, parallell_jobs: int = 10
):
    """Common method for measuring the efficiency of the similarity algorithms"""
    files = mfh.read_meta_file(meta_file)
    trajectories = fh.load_trajectory_files(files, data_folder)

    with Pool() as pool:
        result = pool.map(sim[measure], [[trajectories, 1, 1]
                          for _ in range(parallell_jobs)])
    return result

In [10]:
measure_similarities("dtw_py", DATA_PORTO, portoSet(50))

KeyboardInterrupt: 

In [None]:
measure_similarities("dtw_cy", DATA_PORTO, portoSet(50))

[[0.39924800000000005],
 [0.19247100000000006],
 [0.18231700000000006],
 [0.18209399999999998],
 [0.16656500000000007],
 [0.1754070000000001],
 [0.1780870000000001],
 [0.17242499999999983],
 [0.18212300000000003],
 [0.17462500000000003]]

In [None]:
# Measuring the computation times of true similarities

runs = 10
data_sets = range(200, 201, 100)
output_folder = "../benchmarks/similarities_runtimes/"
file_name = "similarity_runtimes_true_dtw_porto_200.csv"

df = pd.DataFrame(
    index=[f"run_{x+1}" for x in range(runs)], columns=[x for x in data_sets]
)

for size in data_sets:
    print(f"Computing size {size}", end="\r")
    execution_times = measure_similarities(
        "dtw_py", DATA_ROME, romeSet(size), parallell_jobs=runs
    )
    df[size] = [element[0] for element in execution_times]

df.to_csv(os.path.join(output_folder, file_name))
df
of = pd.read_csv(
    "../benchmarks/similarities_runtimes/similarity_runtimes_true_dtw_porto.csv", index_col=0
)
f1 = pd.read_csv(
    "../benchmarks/similarities_runtimes/similarity_runtimes_true_dtw_porto_200.csv",
    index_col=0,
)

of["200"] = f1["200"]

of.to_csv("../benchmarks/similarities_runtimes/similarity_runtimes_true_dtw_porto.csv")

Computing size 200