In [1]:
from src.augmentation.augment import Augmentation
from src.selection.select import Selection
from src.utils.alter import Alter
import math
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from random import *
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.preprocessing.filters import Filters
from ptrail.core.Datasets import Datasets
from ptrail.preprocessing.statistics import Statistics
from ptrail.features.kinematic_features import KinematicFeatures
import progressbar

In [2]:
def trajectoryAugumentationProcedure(trajs, seed, n, k, pradius, class_name, selection, augment):
    myRandom = Random(seed * (n * k * pradius))

    # Select the trajectories and remove duplicates from original dataset.
    if selection == 'random':
        splits = Selection.select_randomly(trajs, .2)
    elif selection == 'fewest':
        splits = Selection.select_traj_with_fewest(trajs, myRandom, .2)
    else:
        splits = Selection.select_representative_trajectories(trajs, class_name, tolerance=0.5)
    paramTestingDataSet = Filters.remove_duplicates(dataframe=trajs)

    trainDataParm = paramTestingDataSet.loc[paramTestingDataSet.traj_id.isin(splits["train"]) == True].dropna()
    testDataParm = paramTestingDataSet.loc[paramTestingDataSet.traj_id.isin(splits["test"]) == True].dropna()
    testData = PTRAILDataFrame(data_set=testDataParm,
                               latitude='lat',
                               longitude='lon',
                               datetime='DateTime',
                               traj_id='traj_id')

    statsTestParm = Statistics.generate_kinematic_stats(dataframe=testData, target_col_name=class_name)
    pivotedStatsTestParm = Statistics.pivot_stats_df(dataframe=statsTestParm, target_col_name=class_name)
    pivotedStatsTestParm = pivotedStatsTestParm.loc[:,~pivotedStatsTestParm.columns.duplicated()]
    testParmX = pivotedStatsTestParm.drop(columns=class_name)
    testParmY = pivotedStatsTestParm[class_name].to_numpy()
    noiseTraj = trainDataParm[class_name].unique()

    sampledTraj = myRandom.choices(sorted(noiseTraj), k=math.floor(n * len(noiseTraj)))
    for traj in sampledTraj:
        trajToChange = trainDataParm.loc[trainDataParm.traj_id == traj]

        #Trajectory must be changed
        if augment == 'on':
            trajChanged = Augmentation.augment_trajectories_with_randomly_generated_points(trajToChange, pradius,
                                                                                         k, 100, myRandom, 'on')
        elif augment == 'in':
            trajChanged = Augmentation.augment_trajectories_with_randomly_generated_points(trajToChange, pradius,
                                                                                          k, 100, myRandom, 'in')
        else:
            trajChanged = Augmentation.augment_trajectories_with_interpolation(trajToChange, 300, 'cubic')
        trainDataParm = pd.concat([trainDataParm, trajChanged], ignore_index = True)

    trainDataNoise = PTRAILDataFrame(data_set=trainDataParm,
                                            datetime='DateTime',
                                            traj_id='traj_id',
                                            latitude='lat',
                                            longitude='lon')

    statsTrainNoiseParm = Statistics.generate_kinematic_stats(dataframe=trainDataNoise, target_col_name=class_name)
    pivotedStatsTrainNoiseParm = Statistics.pivot_stats_df(dataframe=statsTrainNoiseParm, target_col_name=class_name)
    pivotedStatsTrainNoise = pivotedStatsTrainNoiseParm.loc[:, ~pivotedStatsTrainNoiseParm.columns.duplicated()]
    pivotedStatsTrainNoise=pivotedStatsTrainNoise.dropna()

    trainParmX = pivotedStatsTrainNoise.drop(columns=class_name)
    trainParmY = pivotedStatsTrainNoise[class_name].to_numpy()

    testParmX = testParmX.interpolate()
    return [trainParmX, trainParmY, testParmX, testParmY]

In [3]:
dataset = Datasets.load_traffic_data()
ready_dataset = Filters.remove_duplicates(dataframe=dataset)

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 125
Number of points in the data: 44905
Dataset time range: 0 days 00:00:59.900000
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (34.7107417, 135.4640652, 34.7156517, 135.4702002)

---------------------------------------------------------------------


In [4]:
%%time

seed_vals = [14159, 26535, 89793]
n_vals = [.2, .3, .4]
k_vals = [.1, .2, .3]
rad_vals = [.001, .005, .01]
selection_vals = ['random', 'fewest', 'representative']
augment_vals = ['interpolate', 'on', 'in']

i = 0
bar = progressbar.ProgressBar(max_value=progressbar.UnknownLength)
text_file = open("./Traffic Results/FinalTrafficResults.csv", "w")
for s in seed_vals:
    for n in n_vals:
        for k in k_vals:
            for rad in rad_vals:
                for sel in selection_vals:
                    for aug in augment_vals:
                        i += 1
                        bar.update(i)
                        # Create the model.
                        model = RandomForestClassifier(random_state=s)

                        # Get the augmented data.
                        data = trajectoryAugumentationProcedure(ready_dataset, s, n, k, rad, 'traffic_lane', sel, aug)

                        # Fit the model and perform testing.
                        model.fit(data[0], data[1])
                        test_predict = model.predict(data[2])
                        performance_val = f1_score(data[3], test_predict, average='weighted')
                        # Add the value to dict.
                        text_file.writelines(f"{s},{n},{k},{rad},{sel},{aug},{performance_val}\n")

/ |        #                                        | 729 Elapsed Time: 1:26:25

CPU times: user 1h 3min 29s, sys: 7min 31s, total: 1h 11min
Wall time: 1h 26min 34s


In [5]:
# Read the results from the file.
all_results = pd.read_csv('Final Results/FinalTrafficResults.csv', sep=',', header=None)
all_results.columns = ['seed', 'n_val', 'k_val', 'rad_val', 'traj_selection_method', 'augmentation_method', 'f1_score']
all_results.sort_values(by='f1_score', ascending=False, ignore_index=True)

FileNotFoundError: [Errno 2] No such file or directory: 'Final Results/FinalTrafficResults.csv'