In [1]:
from ptrail.core.Datasets import Datasets
from ptrail.features.kinematic_features import KinematicFeatures

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, accuracy_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from src.utils.general_utils import Utilities
from Test_Utils.test_utils import TestUtils
from Test_Utils.Keys import *

In [2]:
starkey_dataset = Datasets.load_starkey()
ready_dataset = KinematicFeatures.create_distance_column(starkey_dataset)

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 253
Number of points in the data: 287136
Dataset time range: 1196 days 22:51:45
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (45.18896978643169, -118.61020848239596, 45.314545642992, -118.50455596234036)

---------------------------------------------------------------------


In [3]:
# Get the 20 seed values that we are going to use.
seed_generator = Utilities.generate_pi_seed(20)
seed_vals = [next(seed_generator) for i in range(20)]
final_results = ["seed, strategy, model, accuracy, f1_score"]

# All our selection strategies.
select_strategies = [
    BASE, RANDOM_SELECTED, PROPORTIONAL_SELECTED, FEWEST_SELECTED, REPRESENTATIVE_SELECTED, BALANCED_ON, BALANCED_IN
]

augment_strategies = [
    ON, IN, DROP, STRETCH
]

models = [GradientBoostingClassifier(), DecisionTreeClassifier(), SVC()]

for seed in seed_vals:
    # Get the iterable map for the seed.
    iter_map = TestUtils.get_iterable_map(ready_dataset, seed, 'Species')
    for select_strategy in select_strategies:
        for model in models:
            if select_strategy != BASE and 'balanced' not in select_strategy:
                for augment_strategy in augment_strategies:
                    train_x, train_y = TestUtils.select_correct_test_train_split(iter_map, select_strategy,
                                                                                 augment_strategy, 'Species', 1)
                    if (train_x is not None) and (train_y is not None):
                        # Fit the model and predict.
                        model.random_state = seed
                        model.fit(X=train_x, y=train_y)
                        pred_vals = model.predict(X=iter_map[TEST_X])

                        # Calculate the accuracy and f1 score.
                        acc = accuracy_score(y_true=iter_map[TEST_Y], y_pred=pred_vals)
                        score = f1_score(y_true=iter_map[TEST_Y], y_pred=pred_vals, average='weighted')
                        print(f"{seed}, {select_strategy}_{augment_strategy}, {model.__class__.__name__}, {acc}, {score}")
                        final_results.append(f"{seed}, {select_strategy}_{augment_strategy}, {model.__class__.__name__}, {acc}, {score}")
            else:
                train_x, train_y = TestUtils.select_correct_test_train_split(iter_map, select_strategy,
                                                                             BASE, 'Species', 1)
                if (train_x is not None) and (train_y is not None):
                    # Fit the model and predict.
                    model.random_state = seed
                    model.fit(X=train_x, y=train_y)
                    pred_vals = model.predict(X=iter_map[TEST_X])

                    # Calculate the accuracy and f1 score.
                    acc = accuracy_score(y_true=iter_map[TEST_Y], y_pred=pred_vals)
                    score = f1_score(y_true=iter_map[TEST_Y], y_pred=pred_vals, average='weighted')
                    if 'balanced' not in select_strategy:
                        print(f"{seed}, base, {model.__class__.__name__}, {acc}, {score}")
                        final_results.append(f"{seed}, base,"f" {model.__class__.__name__}, {acc}, {score}")
                    else:
                        print(f"{seed}, {select_strategy}, {model.__class__.__name__}, {acc}, {score}")
                        final_results.append(f"{seed}, {select_strategy},"f" {model.__class__.__name__}, {acc}, {score}")


1415, base, GradientBoostingClassifier, 0.9411764705882353, 0.94213898743228
1415, base, DecisionTreeClassifier, 0.9215686274509803, 0.9203891803400677
1415, base, SVC, 0.7450980392156863, 0.7311415794387932
1415, random_selected_on, GradientBoostingClassifier, 0.9607843137254902, 0.9607843137254902
1415, random_selected_in, GradientBoostingClassifier, 0.9411764705882353, 0.94213898743228
1415, random_selected_drop, GradientBoostingClassifier, 0.9215686274509803, 0.9215686274509803
1415, random_selected_stretch, GradientBoostingClassifier, 0.9411764705882353, 0.9400000000000001
1415, random_selected_on, DecisionTreeClassifier, 0.8627450980392157, 0.8627450980392157
1415, random_selected_in, DecisionTreeClassifier, 0.9215686274509803, 0.9203891803400677
1415, random_selected_drop, DecisionTreeClassifier, 0.9019607843137255, 0.9011800849372007
1415, random_selected_stretch, DecisionTreeClassifier, 0.8627450980392157, 0.8607340372046255
1415, random_selected_on, SVC, 0.6274509803921569, 0

In [4]:
TestUtils.write_csv_file('./starkey_results.csv', final_results)

File successfully written to: ./starkey_results.csv
