In [1]:
import itertools

In [2]:
import pandas as pd
import random
import itertools

from ptrail.core.Datasets import Datasets
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures
from ptrail.preprocessing.statistics import Statistics
from src.augmentation.augment import Augmentation
from src.selection.select import Selection

import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, accuracy_score, average_precision_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.metrics import f1_score, accuracy_score, average_precision_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Load the dataset and create the distance column in it which is necessary for Augmentation.
starkey_data = Datasets.load_starkey()
ready_dataset = KinematicFeatures.create_distance_column(starkey_data).reset_index()

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 253
Number of points in the data: 287136
Dataset time range: 1196 days 22:51:45
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (45.18896978643169, -118.61020848239596, 45.314545642992, -118.50455596234036)

---------------------------------------------------------------------


In [4]:
traj_ids = list(ready_dataset['traj_id'].unique())
train_size = int(len(traj_ids) * 0.90)

train_traj_ids = random.sample(traj_ids, train_size)
test_traj_ids = []
for t in traj_ids:
    if t not in train_traj_ids:
        test_traj_ids.append(t)

training, testing = ready_dataset.loc[ready_dataset.traj_id.isin(train_traj_ids)], \
    ready_dataset.loc[ready_dataset.traj_id.isin(test_traj_ids)]

In [5]:
# # Random selection
selected = Selection.select_randomly(training, k=0.046)
selected

['OSUX93046',
 '950104E04',
 '910313E07',
 '940221E07',
 'OSUX85159',
 '930409E04',
 'OSUX91121',
 '890418E13',
 'OSUX89203',
 '910315E17']

In [6]:
# Proportional selection
proportionally_selected = Selection.select_trajectories_proportionally(training, classification_col='Species', k=0.2)
proportionally_selected

['910301D01',
 '940124D01',
 '920309D02',
 '950124D02',
 '920304D03',
 '940105D01',
 '910214D01',
 '920303D05',
 '880120D02',
 '921230E03',
 '930119E02',
 '930209E08',
 '890328E21',
 '930202E03',
 '890224E04',
 '930202E16',
 '910313E19',
 '941220E05',
 '960131E12',
 '930422E04',
 '941220E01',
 '940219E11',
 '910315E22',
 '941220E09',
 '890418E13',
 '940228E01',
 '930203E03',
 '921228E34',
 '930107E09',
 '941222E03',
 '930119E03',
 '931216E09',
 '921216E02',
 'OSUX89073',
 'OSUX92071',
 'OSUX89125',
 'OSUX92070',
 'OSUX92115',
 'OSUX89105',
 'OSUX92031',
 'OSUX83061',
 'OSUX91121',
 'OSUX83041',
 'OSUX91164',
 'OSUX92016',
 'OSUX89065',
 'OSUX91038']

In [7]:
# Fewest selection
fewest_selection = Selection.select_fewest_class(training, k=0.04)
fewest_selection

['950227D02',
 'OSUX91121',
 'OSUX91143',
 'OSUX92008',
 'OSUX92071',
 '960313E02',
 'OSUX91032',
 'OSUX89128',
 'OSUX93007',
 '930429E08']

In [8]:
# Representative selection
rep_selection = Selection.select_representative_trajectories(training, 'Species')
rep_selection

Closeness: 0.4861111111111111
Closeness: 0.4861111111111111
Closeness: 0.5138888888888888
Closeness: 0.5555555555555556
Closeness: 0.4722222222222222
Closeness: 0.4861111111111111
Closeness: 0.5
Closeness: 0.4722222222222222
Closeness: 0.5138888888888888
Closeness: 0.5138888888888888
Closeness: 0.5
Closeness: 0.5416666666666666
Closeness: 0.5138888888888888
Closeness: 0.5
Closeness: 0.4861111111111111
Closeness: 0.4722222222222222
Closeness: 0.5138888888888888
Closeness: 0.5138888888888888
Closeness: 0.4861111111111111
Closeness: 0.4861111111111111
Closeness: 0.5138888888888888
Closeness: 0.5
Closeness: 0.5
Closeness: 0.5
Closeness: 0.5277777777777778
Closeness: 0.4583333333333333
Closeness: 0.4861111111111111
Closeness: 0.5138888888888888
Closeness: 0.4861111111111111
Closeness: 0.5277777777777778
Closeness: 0.5138888888888888
Closeness: 0.4861111111111111
Closeness: 0.5138888888888888
Closeness: 0.5
Closeness: 0.4861111111111111
Closeness: 0.4861111111111111
Closeness: 0.5
Closeness:

['880120D02',
 '890130D09',
 '890324E17',
 '890328E21',
 '890413E05',
 '890418E01',
 '890424E06',
 '890424E08',
 '900205E11',
 '910130D01',
 '910312E05',
 '910312E13',
 '910313E07',
 '910313E18',
 '910315E14',
 '910315E20',
 '910319E11',
 '920304D03',
 '920309D02',
 '920318D02',
 '921123E22',
 '921216E02',
 '921216E08',
 '921228E06',
 '921228E09',
 '921228E12',
 '921228E19',
 '921230E03',
 '930107E08',
 '930127D01',
 '930202D01',
 '930318D01',
 '930409D01',
 '930409E04',
 '930410E01',
 '930415E04',
 '930416D01',
 '930421E03',
 '930429E08',
 '931215E09',
 '940110D01',
 '940124D01',
 '940131D01',
 '940213E01',
 '940215D01',
 '940217D01',
 '940219E02',
 '940219E06',
 '940221E07',
 '940303D01',
 '940329E01',
 '941128E01',
 '941129E04',
 '941220E05',
 '941220E09',
 '941221E14',
 '950124E01',
 '950125E10',
 '960227E09',
 '960313E04',
 'OSUX85135',
 'OSUX86137',
 'OSUX87130',
 'OSUX87175',
 'OSUX88129',
 'OSUX88159',
 'OSUX89065',
 'OSUX89073',
 'OSUX89078',
 'OSUX89105',
 'OSUX89127',
 'OSUX

In [9]:
len(rep_selection)

81

In [10]:
# %%time
# # Augment each trajectory 20 times, first augment with the new point being in the circle 10 times.
# # Then augment under the condition that the new point will be on the circle.
#
# training = Augmentation.augment_trajectories_with_randomly_generated_points(training,
#                                                                            random=myRandom,
#                                                                            ids_to_augment=selected,
#                                                                            circle='in')
#
# for i in range(1, 6):
#     training = Augmentation.augment_trajectories_with_randomly_generated_points(training,
#                                                                                random=myRandom,
#                                                                                ids_to_augment=selected,
#                                                                                circle='in')
#
#
# for i in range(0, 6):
#     training = Augmentation.augment_trajectories_with_randomly_generated_points(training,
#                                                                                random=myRandom,
#                                                                                ids_to_augment=selected,
#                                                                                circle='on')
#
# training

In [11]:
# # Generate kinematic statistics on the dataset and then convert
# # the dataset from point-form representation into traj-form representation.
# pivoted_train = Statistics.pivot_stats_df(
#     dataframe=Statistics.generate_kinematic_stats(training, 'Species'),
#     target_col_name='Species')
#
# # Pivoting the test data
# pivoted_test = Statistics.pivot_stats_df(
#     dataframe=Statistics.generate_kinematic_stats(testing, 'Species'),
#     target_col_name='Species')
# pivoted_train