In [1]:
import numpy as np
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures
from ptrail.core.Datasets import Datasets
from ptrail.preprocessing.statistics import Statistics

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from src.utils.test_utils import TestUtils
from src.selection.select import Selection
from src.utils.general_utils import Utilities

In [2]:
foxes_dataset = PTRAILDataFrame(data_set=pd.read_csv('../TestUtils/foxes.csv'),
                                traj_id='fox_id',
                                datetime='dateloc',
                                latitude='latitude',
                                longitude='longitude')
ready_dataset = KinematicFeatures.create_distance_column(foxes_dataset)
ready_dataset

Unnamed: 0_level_0,Unnamed: 1_level_0,sex,lat,lon,Distance
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
116,2008-07-04 07:16:38,1,72.88011,-79.77515,
116,2008-07-04 08:54:58,1,72.89021,-79.77214,1127.379764
116,2008-07-04 10:40:11,1,72.87635,-79.72306,2226.018587
116,2008-07-04 12:19:30,1,72.87701,-79.73571,420.601042
116,2008-07-04 13:17:31,1,72.87414,-79.85453,3903.363864
...,...,...,...,...,...
432,2013-05-31 15:17:27,1,73.11727,-80.03025,712.059619
432,2013-05-31 15:24:57,1,73.11761,-80.04005,318.714045
432,2013-05-31 15:42:27,1,73.12072,-80.05039,480.683897
432,2013-05-31 16:56:27,1,73.12214,-80.07021,659.078302


In [3]:
seed_generator = Utilities.generate_pi_seed(20)
seed_vals = [next(seed_generator) for i in range(20)]
shake_percentages = [0.2, 0.4, 0.6]
circle_methods = ['on', 'in']
ml_models = [ExtraTreesClassifier(), GradientBoostingClassifier(), RandomForestClassifier()]
scaler = MinMaxScaler((0, 1))

distance_results = [["seed", "on_20%_dist", "on_20%_std", "on_40%_dist", "on_40%_std", "on_60%_dist", "on_60%_std",
                    "in_20%_dist","in_20%_std","in_40%_dist","in_40%_std","in_60%_dist","in_60%_std"]]

model_results = [["seed", "model", "baseline", "in_20%_f1", "in_40%_f1", "in_60%_f1", "on_20%_f1", "on_40%_f1", "on_60%_f1"]]

for seed in seed_vals:
    # Intermediate lists for storing distance and model score values.
    distance_row = [seed]

    # Set apart 20% data for testing that augmentation process will never see.
    train, test_x, test_y = TestUtils.get_test_train_data(dataset=ready_dataset, seed_val=seed,
                                                          class_col='sex', k=0.8)

    model_row = TestUtils.create_model_row(seed, ml_models, "sex", train, test_x, test_y)
    for shake in shake_percentages:
        for method in circle_methods:
            # Randomly select 30% of trajectories to be augmented.
            selected = Selection.select_randomly(train, seed, k=0.3)

            # Augment the trajectories.
            train_x, train_y = TestUtils.augment_trajectories_using_random_strategy(dataset=train,
                                                                                    percent_to_shake=shake,
                                                                                    ids_to_augment=selected,
                                                                                    circle=method,
                                                                                    n_augmentations=20,
                                                                                    class_col="sex")
            mean, std = TestUtils.find_original_and_augmentation_pairs_and_calculate_differences(train_x, selected)
            distance_row.append(mean)
            distance_row.append(std)

            for i in range(len(ml_models)):
                f1_score = TestUtils.train_model_and_evaluate(ml_models[i], scaler.fit_transform(train_x), train_y,
                                                              scaler.fit_transform(test_x), test_y, seed)
                model_row[i].append(f1_score)

    model_results.extend(model_row)
    distance_results.append(distance_row)

    print(model_row)

[[1415, 'ExtraTreesClassifier', 0.492, 0.3468, 0.5026, 0.3468, 0.5714, 0.3429, 0.6447], [1415, 'GradientBoostingClassifier', 0.5026, 0.4032, 0.6447, 0.5524, 0.5714, 0.5524, 0.5714], [1415, 'RandomForestClassifier', 0.5714, 0.3468, 0.5026, 0.492, 0.5026, 0.492, 0.5714]]
[[9265, 'ExtraTreesClassifier', 0.3468, 0.5524, 0.3429, 0.6714, 0.5714, 0.7669, 0.6714], [9265, 'GradientBoostingClassifier', 0.4286, 0.6371, 0.492, 0.5578, 0.5071, 0.7669, 0.5578], [9265, 'RandomForestClassifier', 0.2857, 0.5524, 0.3604, 0.6714, 0.7669, 0.7669, 0.6714]]
[[3589, 'ExtraTreesClassifier', 0.2857, 0.4794, 0.4087, 0.6599, 0.5439, 0.6599, 0.4714], [3589, 'GradientBoostingClassifier', 0.2113, 0.4794, 0.4135, 0.4794, 0.5401, 0.4794, 0.4643], [3589, 'RandomForestClassifier', 0.2857, 0.5401, 0.4087, 0.4087, 0.5401, 0.4087, 0.3304]]
[[7932, 'ExtraTreesClassifier', 0.492, 0.4561, 0.3008, 0.5524, 0.492, 0.4561, 0.3468], [7932, 'GradientBoostingClassifier', 0.5714, 0.7868, 0.5026, 0.5026, 0.5026, 0.5714, 0.5714], [793

In [4]:
import csv

file_path = "./fox_distances.csv"
with open(file_path, mode="w") as file:
    writer = csv.writer(file)
    for item in distance_results:
        writer.writerow(item)
    print(f"File successfully written to: {file_path}")

file_path = "./fox_f1_score.csv"
with open(file_path, mode="w") as file:
    writer = csv.writer(file)
    for item in model_results:
        writer.writerow(item)
    print(f"File successfully written to: {file_path}")

File successfully written to: ./fox_distances.csv
File successfully written to: ./fox_f1_score.csv
