In [1]:
import numpy as np
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures

from TestUtils.test_utils import TestUtils
from src.selection.select import Selection
from src.utils.general_utils import Utilities

In [2]:
gl_dataset = PTRAILDataFrame(data_set=pd.read_csv('../TestUtils/geolife.csv'),
                             traj_id='traj_id',
                             datetime='DateTime',
                             latitude='lat',
                             longitude='lon')
ready_dataset = KinematicFeatures.create_distance_column(gl_dataset)
ready_dataset

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,mode_of_transport,Distance
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,2008-03-31 16:00:08,41.741415,86.186028,1,
10,2008-03-31 16:01:07,41.737063,86.179470,1,728.185829
10,2008-03-31 16:02:07,41.734105,86.172823,1,642.172796
10,2008-03-31 16:03:06,41.739110,86.166563,1,761.267192
10,2008-03-31 16:04:05,41.744368,86.159987,1,799.694199
...,...,...,...,...,...
98,2007-06-02 12:07:19,39.935300,116.468267,1,14.666196
98,2007-06-02 12:07:58,39.935450,116.468333,1,17.621166
98,2007-06-02 12:08:20,39.935400,116.468517,1,16.590457
98,2007-06-02 12:09:40,39.934633,116.468983,1,94.077625


In [3]:
def find_original_and_augmentation_pairs_and_calculate_differences(augmented_dataset, selected):
    # Find augmented trajectories associated with each original trajectory.
    select_to_augment_map = {}
    for traj_id in selected:
        pattern = r'\b{}aug'.format(traj_id)
        conditions = augmented_dataset.index.str.match(pattern)
        select_to_augment_map[traj_id] = augmented_dataset.loc[conditions].index.unique()

    # Now, for each original trajectory, calculate the features for all of them
    # and then find the vector difference between the vectors.
    distances = []
    for traj_id in selected:
        # Get the features of the original traj.
        original_features = augmented_dataset.loc[augmented_dataset.index == traj_id].to_numpy()

        # Get the features of the augmented trajectories.
        aug_features = augmented_dataset.loc[augmented_dataset.index.isin(select_to_augment_map[traj_id])].to_numpy()

        # # Now, for each augmented trajectory, find the euclidean distance between the
        # # features of original trajectory and augmented trajectory and store it in a list.
        for aug in aug_features:
            distance = np.linalg.norm(original_features - aug)
            distances.append(distance)

    return round(np.mean(distances), 4), round(np.std(distances), 4)

In [4]:
seed_generator = Utilities.generate_pi_seed(20)
seed_vals = [next(seed_generator) for i in range(20)]
shake_percentages = [0.2, 0.4, 0.6]
circle_methods = ['on', 'in']

results = ["on_20%_dist,on_20%_std,on_40%_std,on_40%_std,on_60%_std,on_60%_std,"
           "in_20%_dist,in_20%_std,in_40%_std,in_40%_std,in_60%_std,in_60%_std"]

for seed in seed_vals:
    row = []
    for shake in shake_percentages:
        for method in circle_methods:
            train, test_x, test_y = TestUtils.get_test_train_data(dataset=ready_dataset, seed_val=seed,
                                                                  class_col='mode_of_transport', k=0.8)

            selected = Selection.select_randomly(train, seed, k=0.3)
            train_x, train_y = TestUtils.augment_trajectories_using_random_strategy(dataset=train,
                                                                                    percent_to_shake=shake,
                                                                                    ids_to_augment=selected,
                                                                                    circle=method,
                                                                                    n_augmentations=20,
                                                                                    class_col="mode_of_transport")
            mean, std = find_original_and_augmentation_pairs_and_calculate_differences(train_x, selected)
            row.append(mean)
            row.append(std)
    print(row)
    results.append(row)


[143316.3892, 456544.8241, 11538.2794, 14933.5641, 206350.7905, 553257.9977, 15409.2766, 19831.7347, 252410.7804, 619242.8758, 17880.7165, 23253.8537]
[94512.5877, 351465.3907, 12041.3629, 14876.9007, 162365.7311, 482820.579, 15488.1128, 19597.8435, 217548.299, 571151.1602, 17653.6619, 22899.5168]
[2265942.884, 5553803.6525, 1278464.5098, 3307891.6782, 3364793.2594, 7933765.7876, 1292738.4402, 3316185.2571, 4129130.87, 8950248.3972, 1317532.3597, 3355097.8551]
[559438.2436, 1789049.0008, 47280.8841, 104768.9898, 756000.8472, 2065040.8546, 50289.4779, 108062.9226, 887267.4177, 2297022.7388, 51915.451, 109927.8114]
[2527216.7915, 6201294.0929, 1329281.8663, 3426244.1995, 3908630.3395, 8856485.226, 1342898.6603, 3430839.9564, 4548961.9902, 9913318.4527, 1358234.3029, 3443772.7947]
[1100210.0418, 2486908.7723, 133379.3116, 263326.3914, 1740134.8121, 3141281.9341, 168316.2872, 272616.3637, 1907402.0105, 3227228.9325, 179410.3818, 280223.1423]
[1005519.1697, 2291353.7582, 61136.999, 103126.9

KeyboardInterrupt: 

In [None]:
results[0] = ["on_20%_dist", "on_20%_std", "on_40%_dist", "on_40%_std", "on_60%_dist", "on_60%_std",
              "in_20%_dist","in_20%_std","in_40%_dist","in_40%_std","in_60%_dist","in_60%_std"]

In [None]:
import csv

with open("../results/experiment_1/geolife.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(results)