In [1]:
import numpy as np
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures
from ptrail.core.Datasets import Datasets

from TestUtils.test_utils import TestUtils
from src.selection.select import Selection
from src.utils.general_utils import Utilities

In [2]:
traffic_dataset = Datasets.load_traffic_data()
ready_dataset = KinematicFeatures.create_distance_column(traffic_dataset)
ready_dataset

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 125
Number of points in the data: 44905
Dataset time range: 0 days 00:00:59.900000
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (34.7107417, 135.4640652, 34.7156517, 135.4702002)

---------------------------------------------------------------------


Unnamed: 0_level_0,Unnamed: 1_level_0,vehicle_type,velocity,traffic_lane,lon,lat,kilopost,vehicle_length,detected_flag,Distance
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1371,1900-01-01 07:30:00.000,1,48.0,2,135.469950,34.710999,3539.5,3.0,0,
1371,1900-01-01 07:30:00.100,1,47.9,2,135.469957,34.710991,3532.5,3.0,0,1.115504
1371,1900-01-01 07:30:00.200,1,47.9,2,135.469963,34.710984,3532.5,3.0,0,0.939478
1371,1900-01-01 07:30:00.300,1,47.9,2,135.469968,34.710979,3531.5,3.0,0,0.763477
1371,1900-01-01 07:30:00.400,1,47.9,2,135.469972,34.710974,3530.8,3.0,0,0.596403
...,...,...,...,...,...,...,...,...,...,...
3357,1900-01-01 07:30:59.500,1,27.7,1,135.468970,34.712177,3697.6,3.5,0,0.737022
3357,1900-01-01 07:30:59.600,1,27.7,1,135.468975,34.712172,3696.6,3.5,1,0.760166
3357,1900-01-01 07:30:59.700,1,29.0,1,135.468981,34.712166,3695.6,3.5,1,0.788944
3357,1900-01-01 07:30:59.800,1,30.3,1,135.468986,34.712160,3694.7,3.5,1,0.835374


In [3]:
def find_original_and_augmentation_pairs_and_calculate_differences(augmented_dataset, selected):
    # Find augmented trajectories associated with each original trajectory.
    select_to_augment_map = {}
    for traj_id in selected:
        pattern = r'\b{}aug'.format(traj_id)
        conditions = augmented_dataset.index.str.match(pattern)
        select_to_augment_map[traj_id] = augmented_dataset.loc[conditions].index.unique()

    # Now, for each original trajectory, calculate the features for all of them
    # and then find the vector difference between the vectors.
    distances = []
    for traj_id in selected:
        # Get the features of the original traj.
        original_features = augmented_dataset.loc[augmented_dataset.index == traj_id].to_numpy()

        # Get the features of the augmented trajectories.
        aug_features = augmented_dataset.loc[augmented_dataset.index.isin(select_to_augment_map[traj_id])].to_numpy()

        # # Now, for each augmented trajectory, find the euclidean distance between the
        # # features of original trajectory and augmented trajectory and store it in a list.
        for aug in aug_features:
            distance = np.linalg.norm(original_features - aug)
            distances.append(distance)

    return round(np.mean(distances), 4), round(np.std(distances), 4)

In [4]:
seed_generator = Utilities.generate_pi_seed(20)
seed_vals = [next(seed_generator) for i in range(20)]
shake_percentages = [0.2, 0.4, 0.6]
circle_methods = ['on', 'in']

results = ["on_20%_dist,on_20%_std,on_40%_std,on_40%_std,on_60%_std,on_60%_std,"
           "in_20%_dist,in_20%_std,in_40%_std,in_40%_std,in_60%_std,in_60%_std"]

for seed in seed_vals:
    row = []
    for shake in shake_percentages:
        for method in circle_methods:
            train, test_x, test_y = TestUtils.get_test_train_data(dataset=ready_dataset, seed_val=seed,
                                                                  class_col='vehicle_type', k=0.8)

            selected = Selection.select_randomly(train, seed, k=0.3)
            train_x, train_y = TestUtils.augment_trajectories_using_random_strategy(dataset=train,
                                                                                    percent_to_shake=shake,
                                                                                    ids_to_augment=selected,
                                                                                    circle=method,
                                                                                    n_augmentations=20,
                                                                                    class_col="vehicle_type")
            mean, std = find_original_and_augmentation_pairs_and_calculate_differences(train_x, selected)
            row.append(mean)
            row.append(std)
    print(row)
    results.append(row)


[402.0198, 89.4421, 400.5518, 90.021, 470.1159, 88.8643, 468.2348, 89.5241, 492.1202, 94.0044, 489.5759, 95.1361]
[366.3004, 102.9543, 363.9885, 104.311, 432.5685, 102.8769, 429.2732, 105.3153, 451.7252, 109.3011, 447.7552, 112.2534]
[1510.0634, 6881.3791, 1478.8344, 6747.6129, 1816.8597, 7742.0634, 1909.1544, 8206.9552, 1971.4802, 8419.795, 1912.75, 8187.3087]
[346.2697, 104.2434, 345.0728, 103.917, 413.0751, 107.2435, 410.5327, 107.8918, 431.6275, 114.3232, 428.2596, 115.6832]
[334.8573, 113.4041, 333.9082, 113.2989, 400.2205, 112.5993, 398.2247, 113.2202, 418.4968, 119.3534, 415.7228, 120.4902]
[354.7808, 106.5235, 353.241, 106.6429, 422.279, 107.0981, 420.4864, 107.305, 441.3431, 113.5965, 438.8419, 114.0173]
[1809.7139, 8511.9423, 1705.3569, 7845.8702, 2199.9065, 10060.9647, 2022.0066, 9016.2494, 2261.9097, 10208.8785, 2145.8694, 9544.0884]
[1616.5961, 7553.8227, 1621.4338, 7243.0228, 2024.1581, 8801.1448, 1874.0218, 7909.2805, 2177.5911, 9440.7603, 2109.5361, 9075.4403]
[340.8416

In [18]:
results[0] = ["on_20%_dist", "on_20%_std", "on_40%_dist", "on_40%_std", "on_60%_dist", "on_60%_std",
              "in_20%_dist","in_20%_std","in_40%_dist","in_40%_std","in_60%_dist","in_60%_std"]

[['on_20%_dist', 'on_20%_std', 'on_40%_dist', 'on_40%_std', 'on_60%_dist', 'on_60%_std', 'in_20%_dist', 'in_20%_std', 'in_40%_dist', 'in_40%_std', 'in_60%_dist', 'in_60%_std'], [402.0198, 89.4421, 400.5518, 90.021, 470.1159, 88.8643, 468.2348, 89.5241, 492.1202, 94.0044, 489.5759, 95.1361], [366.3004, 102.9543, 363.9885, 104.311, 432.5685, 102.8769, 429.2732, 105.3153, 451.7252, 109.3011, 447.7552, 112.2534], [1510.0634, 6881.3791, 1478.8344, 6747.6129, 1816.8597, 7742.0634, 1909.1544, 8206.9552, 1971.4802, 8419.795, 1912.75, 8187.3087], [346.2697, 104.2434, 345.0728, 103.917, 413.0751, 107.2435, 410.5327, 107.8918, 431.6275, 114.3232, 428.2596, 115.6832], [334.8573, 113.4041, 333.9082, 113.2989, 400.2205, 112.5993, 398.2247, 113.2202, 418.4968, 119.3534, 415.7228, 120.4902], [354.7808, 106.5235, 353.241, 106.6429, 422.279, 107.0981, 420.4864, 107.305, 441.3431, 113.5965, 438.8419, 114.0173], [1809.7139, 8511.9423, 1705.3569, 7845.8702, 2199.9065, 10060.9647, 2022.0066, 9016.2494, 2261

In [22]:
import csv

with open("../results/experiment_1/traffic.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(results)


[1415, 9265, 3589, 7932, 3846, 2643, 3832, 7950, 2884, 1971, 6939, 9375, 1058, 2097, 4944, 5923, 781, 6406, 2862, 899]
