In [1]:
from ptrail.core.Datasets import Datasets
from ptrail.features.kinematic_features import KinematicFeatures

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, accuracy_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from src.utils.general_utils import Utilities
from TestUtils.test_utils import TestUtils
from TestUtils.Keys import *

In [2]:
starkey_dataset = Datasets.load_starkey()
ready_dataset = KinematicFeatures.create_distance_column(starkey_dataset)

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 253
Number of points in the data: 287136
Dataset time range: 1196 days 22:51:45
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (45.18896978643169, -118.61020848239596, 45.314545642992, -118.50455596234036)

---------------------------------------------------------------------


In [3]:
# Get the 20 seed values that we are going to use.
seed_generator = Utilities.generate_pi_seed(20)
seed_vals = [next(seed_generator) for i in range(20)]
final_results = ["seed, strategy, model, accuracy, f1_score"]

# All our selection strategies.
select_strategies = [
    BASE, BALANCED_IN, BALANCED_ON, BALANCED_DROP, BALANCED_STRETCH
]

models = [GradientBoostingClassifier(), DecisionTreeClassifier(), SVC()]

for seed in seed_vals:
    iter_map = TestUtils.get_iterable_map(ready_dataset, seed, 'Species')
    for select_strategy in select_strategies:
        for model in models:
            train_x, train_y = TestUtils.select_correct_test_train_split(iter_map, select_strategy,
                                                                         BASE, 'Species', 3)
            if (train_x is not None) and (train_y is not None):
                # Fit the model and predict.
                model.random_state = seed
                model.fit(X=train_x, y=train_y)
                pred_vals = model.predict(X=iter_map[TEST_X])

                # Calculate the accuracy and f1 score.
                acc = accuracy_score(y_true=iter_map[TEST_Y], y_pred=pred_vals)
                score = f1_score(y_true=iter_map[TEST_Y], y_pred=pred_vals, average='weighted')
                if 'balanced' not in select_strategy:
                    print(f"{seed}, base, {model.__class__.__name__}, {acc}, {score}")
                    final_results.append(f"{seed}, base,"f" {model.__class__.__name__}, {acc}, {score}")
                else:
                    print(f"{seed}, {select_strategy}, {model.__class__.__name__}, {acc}, {score}")
                    final_results.append(f"{seed}, {select_strategy},"f" {model.__class__.__name__}, {acc}, {score}")


1415, base, GradientBoostingClassifier, 0.9411764705882353, 0.94213898743228
1415, base, DecisionTreeClassifier, 0.9215686274509803, 0.9203891803400677
1415, base, SVC, 0.7450980392156863, 0.7311415794387932
1415, balanced_in, GradientBoostingClassifier, 0.9607843137254902, 0.9607843137254902
1415, balanced_in, DecisionTreeClassifier, 0.8823529411764706, 0.8812215082111277
1415, balanced_in, SVC, 0.7647058823529411, 0.7586945592060681
1415, balanced_on, GradientBoostingClassifier, 0.9215686274509803, 0.9207683073229292
1415, balanced_on, DecisionTreeClassifier, 0.8627450980392157, 0.8605576104681223
1415, balanced_on, SVC, 0.47058823529411764, 0.30117647058823527
1415, balanced_drop, GradientBoostingClassifier, 0.9607843137254902, 0.9607855185084787
1415, balanced_drop, DecisionTreeClassifier, 0.8235294117647058, 0.8212942465988953
1415, balanced_drop, SVC, 0.7843137254901961, 0.7826831882178158
1415, balanced_stretch, GradientBoostingClassifier, 0.8823529411764706, 0.8813071895424837


In [4]:
TestUtils.write_csv_file('./balance_starkey.csv', final_results)

File successfully written to: ./balance_starkey.csv
