In [1]:
from ptrail.core.Datasets import Datasets
from ptrail.features.kinematic_features import KinematicFeatures

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, accuracy_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from src.utils.general_utils import Utilities
from TestUtils.test_utils import TestUtils
from TestUtils.Keys import *

In [2]:
traffic_data = Datasets.load_traffic_data()
ready_dataset = KinematicFeatures.create_distance_column(traffic_data)
ready_dataset

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 125
Number of points in the data: 44905
Dataset time range: 0 days 00:00:59.900000
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (34.7107417, 135.4640652, 34.7156517, 135.4702002)

---------------------------------------------------------------------


Unnamed: 0_level_0,Unnamed: 1_level_0,vehicle_type,velocity,traffic_lane,lon,lat,kilopost,vehicle_length,detected_flag,Distance
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1371,1900-01-01 07:30:00.000,1,48.0,2,135.469950,34.710999,3539.5,3.0,0,
1371,1900-01-01 07:30:00.100,1,47.9,2,135.469957,34.710991,3532.5,3.0,0,1.115504
1371,1900-01-01 07:30:00.200,1,47.9,2,135.469963,34.710984,3532.5,3.0,0,0.939478
1371,1900-01-01 07:30:00.300,1,47.9,2,135.469968,34.710979,3531.5,3.0,0,0.763477
1371,1900-01-01 07:30:00.400,1,47.9,2,135.469972,34.710974,3530.8,3.0,0,0.596403
...,...,...,...,...,...,...,...,...,...,...
3357,1900-01-01 07:30:59.500,1,27.7,1,135.468970,34.712177,3697.6,3.5,0,0.737022
3357,1900-01-01 07:30:59.600,1,27.7,1,135.468975,34.712172,3696.6,3.5,1,0.760166
3357,1900-01-01 07:30:59.700,1,29.0,1,135.468981,34.712166,3695.6,3.5,1,0.788944
3357,1900-01-01 07:30:59.800,1,30.3,1,135.468986,34.712160,3694.7,3.5,1,0.835374


In [3]:
# Get the 20 seed values that we are going to use.
seed_generator = Utilities.generate_pi_seed(20)
seed_vals = [next(seed_generator) for i in range(20)]
final_results = ["seed, strategy, model, accuracy, f1_score"]

# All our selection strategies.
select_strategies = [
    BASE, BALANCED_ON, BALANCED_IN, BALANCED_DROP, BALANCED_STRETCH
]

augment_strategies = [
    ON, IN, DROP, STRETCH
]

models = [GradientBoostingClassifier(), DecisionTreeClassifier(), SVC()]

for seed in seed_vals:
    # Get the iterable map for the seed.
    iter_map = TestUtils.get_iterable_map(ready_dataset, seed, 'vehicle_type')
    for select_strategy in select_strategies:
        for model in models:
            if select_strategy != BASE and 'balanced' not in select_strategy:
                for augment_strategy in augment_strategies:
                    train_x, train_y = TestUtils.select_correct_test_train_split(iter_map, select_strategy,
                                                                                 augment_strategy, 'vehicle_type', 3)
                    if (train_x is not None) and (train_y is not None):
                        # Fit the model and predict.
                        model.random_state = seed
                        model.fit(X=train_x, y=train_y)
                        pred_vals = model.predict(X=iter_map[TEST_X])

                        # Calculate the accuracy and f1 score.
                        acc = accuracy_score(y_true=iter_map[TEST_Y], y_pred=pred_vals)
                        score = f1_score(y_true=iter_map[TEST_Y], y_pred=pred_vals, average='weighted')
                        print(f"{seed}, {select_strategy}_{augment_strategy}, {model.__class__.__name__}, {acc}, {score}")
                        final_results.append(f"{seed}, {select_strategy}_{augment_strategy}, {model.__class__.__name__}, {acc}, {score}")
            else:
                train_x, train_y = TestUtils.select_correct_test_train_split(iter_map, select_strategy,
                                                                             BASE, 'vehicle_type', 3)
                if (train_x is not None) and (train_y is not None):
                    # Fit the model and predict.
                    model.random_state = seed
                    model.fit(X=train_x, y=train_y)
                    pred_vals = model.predict(X=iter_map[TEST_X])

                    # Calculate the accuracy and f1 score.
                    acc = accuracy_score(y_true=iter_map[TEST_Y], y_pred=pred_vals)
                    score = f1_score(y_true=iter_map[TEST_Y], y_pred=pred_vals, average='weighted')
                    if 'balanced' not in select_strategy:
                        print(f"{seed}, base, {model.__class__.__name__}, {acc}, {score}")
                        final_results.append(f"{seed}, base,"f" {model.__class__.__name__}, {acc}, {score}")
                    else:
                        print(f"{seed}, {select_strategy}, {model.__class__.__name__}, {acc}, {score}")
                        final_results.append(f"{seed}, {select_strategy},"f" {model.__class__.__name__}, {acc}, {score}")


1415, base, GradientBoostingClassifier, 0.875, 0.8944444444444444
1415, base, DecisionTreeClassifier, 0.875, 0.8944444444444444
1415, base, SVC, 0.9583333333333334, 0.9379432624113475
1415, balanced_on, GradientBoostingClassifier, 0.9166666666666666, 0.9166666666666666
1415, balanced_on, DecisionTreeClassifier, 0.9166666666666666, 0.9166666666666666
1415, balanced_on, SVC, 0.9166666666666666, 0.9166666666666666
1415, balanced_in, GradientBoostingClassifier, 0.9583333333333334, 0.9379432624113475
1415, balanced_in, DecisionTreeClassifier, 0.7916666666666666, 0.8533681765389084
1415, balanced_in, SVC, 0.9583333333333334, 0.9379432624113475
1415, balanced_drop, GradientBoostingClassifier, 0.9583333333333334, 0.9379432624113475
1415, balanced_drop, DecisionTreeClassifier, 0.7916666666666666, 0.8468992248062017
1415, balanced_drop, SVC, 0.5833333333333334, 0.6990740740740741
1415, balanced_stretch, GradientBoostingClassifier, 0.9166666666666666, 0.9166666666666666
1415, balanced_stretch, De

In [4]:
TestUtils.write_csv_file('./balance_traffic.csv', final_results)

File successfully written to: ./balance_traffic.csv
