In [1]:
from src.augmentation.augment import Augmentation
from src.selection.select import Selection
from src.utils.alter import Alter
import math
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from random import *
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.preprocessing.filters import Filters
from ptrail.core.Datasets import Datasets
from ptrail.preprocessing.statistics import Statistics
from ptrail.features.kinematic_features import KinematicFeatures

# pd.set_option('use_inf_as_na', True)

In [2]:
def trajectoryAugumentationProcedure(trajs, seed, n, k, pradius, model):
    myRandom = Random(seed * (n * k * pradius))

    #Split data must be changed
    splits = Selection.select_randomly(trajs, .2)
    # splits = Selection.select_traj_with_fewest(trajs, myRandom, .2)
    # splits = Selection.select_representative_trajectories(trajs, 'VesselType')

    paramTestingDataSet = Filters.remove_duplicates(dataframe=trajs)

    trainDataParm = paramTestingDataSet.loc[paramTestingDataSet.traj_id.isin(splits["train"]) == True].dropna()
    testDataParm = paramTestingDataSet.loc[paramTestingDataSet.traj_id.isin(splits["test"]) == True].dropna()
    testData = PTRAILDataFrame(data_set=testDataParm,
                               latitude='lat',
                               longitude='lon',
                               datetime='DateTime',
                               traj_id='traj_id')
    statsTestParm = Statistics.generate_kinematic_stats(dataframe=testData,
                                                target_col_name='Status')
    pivotedStatsTestParm = Statistics.pivot_stats_df(dataframe=statsTestParm,
                                              target_col_name='Status')
    pivotedStatsTestParm = pivotedStatsTestParm.loc[:,~pivotedStatsTestParm.columns.duplicated()]
    testParmX = pivotedStatsTestParm.drop(columns='Status')
    testParmY = pivotedStatsTestParm['Status'].to_numpy()
    noiseTraj = trainDataParm['traj_id'].unique()

    sampledTraj = myRandom.choices(sorted(noiseTraj), k=math.floor(n * len(noiseTraj)))
    for traj in sampledTraj:
        trajToChange = trainDataParm.loc[trainDataParm.traj_id == traj]



        #Trajectory must be changed
        trajChanged = Augmentation.augment_trajectories_with_randomly_generated_points(trajToChange, pradius,
                                                                                     k, 100, myRandom, 'on')
        # trajChanged = Augmentation.augment_trajectories_with_randomly_generated_points(trajToChange, pradius,
        #                                                                               k, 100, myRandom, 'in')
        # trajChanged = Augmentation.augment_trajectories_with_interpolation(trajToChange, 3600*4, 'cubic')





        trainDataParm = pd.concat([trainDataParm, trajChanged], ignore_index = True)

    #trainDataNoiseFilt = trainDataParm.filter(["traj_id", "DateTime","vehicle_type", "velocity" ,"VesselType" "lon", "lat", "kilopost","vehicle_length", "detected_flag"])



    trainDataNoise = PTRAILDataFrame(data_set=trainDataParm,
                                            datetime='DateTime',
                                            traj_id='traj_id',
                                            latitude='lat',
                                            longitude='lon')

    statsTrainNoiseParm = Statistics.generate_kinematic_stats(dataframe=trainDataNoise,
                                                        target_col_name='Status')

    pivotedStatsTrainNoiseParm = Statistics.pivot_stats_df(dataframe=statsTrainNoiseParm,
                                                      target_col_name='Status')

    pivotedStatsTrainNoise = pivotedStatsTrainNoiseParm.loc[:,~pivotedStatsTrainNoiseParm.columns.duplicated()]


    pivotedStatsTrainNoise=pivotedStatsTrainNoise.dropna()

    trainParmX = pivotedStatsTrainNoise.drop(columns='Status')
    trainParmY = pivotedStatsTrainNoise['Status'].to_numpy()

    testParmX = testParmX.interpolate()

    model.fit(trainParmX, trainParmY)
    test_predict = model.predict(testParmX)
    performance_val = f1_score(testParmY, test_predict, average='weighted')
    print(f"Current run: k={k}, pradius={pradius}, n={n}, fscore={performance_val}, seed={seed}")
    return str(f"{n},{k},{pradius},{performance_val}\n")

In [4]:
%%time

dataset = Datasets.load_hurricanes()
ready_dataset = Filters.remove_duplicates(dataframe=dataset)

seeds = [14159,26535,89793]
n_vals = [.2, .3, .4]
k_vals = [.1, .2, .3]
rad_vals = [.001, .005, .01]


#Rename results file to name of tests run
text_file = open("./Hurricane Results/HurricanesRandomSampleOnCircle.txt", "w")
for n in n_vals:
    for k in k_vals:
        for rad in rad_vals:
            for s in seeds:
                result = (trajectoryAugumentationProcedure(ready_dataset, s, n, k,
                                                       rad, RandomForestClassifier(random_state=s)))
                # print(result)
                text_file.writelines(result)
text_file.close()

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 1814
Number of points in the data: 49105
Dataset time range: 60041 days 12:00:00
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (7.2, -109.5, 81.0, 63.0)

---------------------------------------------------------------------
Current run: k=0.1, pradius=0.001, n=0.2, fscore=0.5287844766331132, seed=14159
Current run: k=0.1, pradius=0.001, n=0.2, fscore=0.49548050555440176, seed=26535
Current run: k=0.1, pradius=0.001, n=0.2, fscore=0.5233461805774943, seed=89793
Current run: k=0.1, pradius=0.005, n=0.2, fscore=0.570458957970318, seed=14159
Current run: k=0.1, pradius=0.005, n=0.2, fscore=0.47328498316728385, seed=26535
Current run: k=0.1, pradius=0.005, n=0.2, fscore=0.5439068254413988, seed=89793
Current run: k=0.1, pradius=0.01, n=0.2, fscore=0.5301977224309703, seed=14159
Current run: k=0.1, pradius=0.01, n=0.2, fscore=