In [1]:
import re
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures
from ptrail.preprocessing.statistics import Statistics

from TestUtils.test_utils import TestUtils
from src.selection.select import Selection
from src.augmentation.augment import Augmentation
from src.utils.general_utils import Utilities

In [2]:
gl_dataset = PTRAILDataFrame(data_set=pd.read_csv('./TestUtils/geolife.csv'),
                             traj_id='traj_id',
                             datetime='DateTime',
                             latitude='lat',
                             longitude='lon')
ready_dataset = KinematicFeatures.create_distance_column(gl_dataset)
ready_dataset

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,mode_of_transport,Distance
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10,2008-03-31 16:00:08,41.741415,86.186028,1,
10,2008-03-31 16:01:07,41.737063,86.179470,1,728.185829
10,2008-03-31 16:02:07,41.734105,86.172823,1,642.172796
10,2008-03-31 16:03:06,41.739110,86.166563,1,761.267192
10,2008-03-31 16:04:05,41.744368,86.159987,1,799.694199
...,...,...,...,...,...
98,2007-06-02 12:07:19,39.935300,116.468267,1,14.666196
98,2007-06-02 12:07:58,39.935450,116.468333,1,17.621166
98,2007-06-02 12:08:20,39.935400,116.468517,1,16.590457
98,2007-06-02 12:09:40,39.934633,116.468983,1,94.077625


In [3]:
def find_original_and_augmentation_pairs(augmented_dataset, selected):
    augmented_dataset = augmented_dataset.reset_index()
    select_to_augment_map = {}
    for traj_id in selected:
        pattern = r'\b{}aug'.format(traj_id)
        conditions = augmented_dataset['traj_id'].str.match(pattern)
        select_to_augment_map[traj_id] = augmented_dataset.loc[conditions]['traj_id'].unique()

    print(select_to_augment_map)

In [4]:
seed_generator = Utilities.generate_pi_seed(1)
seed_vals = [next(seed_generator) for i in range(1)]
shake_percentages = [0.2, 0.4, 0.6]
circle_methods = ['on', 'in']

# for seed in seed_vals:
#     for shake in shake_percentages:
#         for method in circle_methods:
#             train, test_x, test_y = TestUtils.get_test_train_data(dataset=ready_dataset, seed_val=seed,
#                                                                   class_col='mode_of_transport', k=0.8)
#
#             selected = Selection.select_randomly(train, seed, k=0.3)
#             augmented_training_data = TestUtils.augment_trajectories_using_random_strategy(dataset=train,
#                                                                                            percent_to_shake=shake,
#                                                                                            ids_to_augment=selected,
#                                                                                            circle=method,
#                                                                                            n_augmentations=20,
#                                                                                            class_col="mode_of_transport")


for seed in seed_vals:
    train, test_x, test_y = TestUtils.get_test_train_data(ready_dataset, seed_val=seed,
                                                          class_col='mode_of_transport', k=0.8)

    trajectories_to_augment = Selection.select_randomly(train, seed, k=0.3)
    train_x, train_y = TestUtils.augment_trajectories_using_random_strategy(dataset=train,
                                                                                   percent_to_shake=0.2,
                                                                                   ids_to_augment=trajectories_to_augment,
                                                                                   circle='in',
                                                                                   n_augmentations=20,
                                                                                   class_col="mode_of_transport")

    find_original_and_augmentation_pairs(train_x, trajectories_to_augment)


{'167': array(['167aug708447', '167aug9783601', '167aug6455757', '167aug3825378',
       '167aug7354080', '167aug4479242', '167aug6013867', '167aug5382742',
       '167aug854881', '167aug1959424', '167aug2920668', '167aug3236511',
       '167aug9976282', '167aug5285447', '167aug9223050', '167aug5805360',
       '167aug4688038', '167aug6359446', '167aug9462912', '167aug1616123'],
      dtype=object), '67': array(['67aug4479242', '67aug6455757', '67aug3236511', '67aug7354080',
       '67aug9462912', '67aug9976282', '67aug5805360', '67aug4688038',
       '67aug1616123', '67aug9783601', '67aug3825378', '67aug5285447',
       '67aug5382742', '67aug1959424', '67aug708447', '67aug9223050',
       '67aug854881', '67aug2920668', '67aug6013867', '67aug6359446'],
      dtype=object), '75': array(['75aug6359446', '75aug7354080', '75aug9223050', '75aug1959424',
       '75aug6013867', '75aug2920668', '75aug3236511', '75aug5285447',
       '75aug854881', '75aug9976282', '75aug9783601', '75aug3825378'