In [1]:
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler

from src.selection.select import Selection
from src.utils.general_utils import Utilities
from src.utils.test_utils import TestUtils

In [2]:
birds = PTRAILDataFrame(data_set=pd.read_csv('../datasets/birds.csv'),
                        traj_id='traj_id',
                        datetime='DateTime',
                        latitude='lat',
                        longitude='lon')
ready_dataset = KinematicFeatures.create_distance_column(birds)
ready_dataset

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Species,Distance
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3552,2014-08-01 00:00:23,27.616280,90.226698,0,
3552,2014-08-01 00:00:23,27.616280,90.226698,2,0.000000
3552,2014-08-01 00:10:07,27.616251,90.226671,0,4.167818
3552,2014-08-01 00:10:07,27.616251,90.226671,2,0.000000
3552,2014-08-01 00:20:07,27.616210,90.226711,0,6.047616
...,...,...,...,...,...
4192,2015-04-04 21:30:25,53.515125,27.749393,1,3.342458
4192,2015-04-04 22:00:27,53.515152,27.749441,1,4.286500
4192,2015-04-04 22:30:14,53.515174,27.749447,1,2.518906
4192,2015-04-04 23:00:25,53.515133,27.749446,1,4.560071


In [3]:
seed_generator = Utilities.generate_pi_seed(20)
seed_vals = [next(seed_generator) for i in range(20)]
shake_percentages = [0.2, 0.4, 0.6]
circle_methods = ['on', 'in']
ml_models = [ExtraTreesClassifier(), GradientBoostingClassifier(), RandomForestClassifier()]
scaler = MinMaxScaler((0, 1))

distance_results = [["seed", "on_20%_dist", "on_20%_std", "on_40%_dist", "on_40%_std", "on_60%_dist", "on_60%_std",
                    "in_20%_dist","in_20%_std","in_40%_dist","in_40%_std","in_60%_dist","in_60%_std"]]

model_results = [["seed", "model", "baseline", "in_20%_f1", "in_40%_f1", "in_60%_f1", "on_20%_f1", "on_40%_f1", "on_60%_f1"]]

for seed in seed_vals:
    # Intermediate lists for storing distance and model score values.
    distance_row = [seed]

    # Set apart 20% data for testing that augmentation process will never see.
    train, test_x, test_y = TestUtils.get_test_train_data(dataset=ready_dataset, seed_val=seed,
                                                          class_col='Species', k=0.8)

    model_row = TestUtils.create_model_row(seed, ml_models, "Species", train, test_x, test_y)
    for shake in shake_percentages:
        for method in circle_methods:
            # Randomly select 30% of trajectories to be augmented.
            selected = Selection.select_randomly(train, seed, k=0.3)

            # Augment the trajectories.
            train_x, train_y = TestUtils.augment_trajectories_using_random_strategy(dataset=train,
                                                                                    percent_to_shake=shake,
                                                                                    ids_to_augment=selected,
                                                                                    circle=method,
                                                                                    n_augmentations=20,
                                                                                    class_col="Species")
            mean, std = TestUtils.find_original_and_augmentation_pairs_and_calculate_differences(train_x, selected)
            distance_row.append(mean)
            distance_row.append(std)

            for i in range(len(ml_models)):
                f1_score = TestUtils.train_model_and_evaluate(ml_models[i], scaler.fit_transform(train_x), train_y,
                                                              scaler.fit_transform(test_x), test_y, seed)
                model_row[i].append(f1_score)

    model_results.extend(model_row)
    distance_results.append(distance_row)

    print(model_row)

[[1415, 'ExtraTreesClassifier', 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8], [1415, 'GradientBoostingClassifier', 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8], [1415, 'RandomForestClassifier', 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8]]
[[9265, 'ExtraTreesClassifier', 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8], [9265, 'GradientBoostingClassifier', 0.8, 0.5, 0.5, 0.8, 0.5, 0.5, 0.8], [9265, 'RandomForestClassifier', 0.8, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]
[[3589, 'ExtraTreesClassifier', 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3589, 'GradientBoostingClassifier', 0.6667, 0.6667, 0.6667, 0.4, 0.4, 0.8571, 0.8571], [3589, 'RandomForestClassifier', 0.6667, 0.4, 0.4, 0.4, 0.4, 0.6667, 0.6667]]


KeyboardInterrupt: 