In [1]:
import numpy as np
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures
from ptrail.core.Datasets import Datasets
from ptrail.preprocessing.statistics import Statistics

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from TestUtils.test_utils import TestUtils
from src.selection.select import Selection
from src.utils.general_utils import Utilities

In [2]:
traffic_dataset = Datasets.load_traffic_data()
ready_dataset = KinematicFeatures.create_distance_column(traffic_dataset)
ready_dataset

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 125
Number of points in the data: 44905
Dataset time range: 0 days 00:00:59.900000
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (34.7107417, 135.4640652, 34.7156517, 135.4702002)

---------------------------------------------------------------------


Unnamed: 0_level_0,Unnamed: 1_level_0,vehicle_type,velocity,traffic_lane,lon,lat,kilopost,vehicle_length,detected_flag,Distance
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1371,1900-01-01 07:30:00.000,1,48.0,2,135.469950,34.710999,3539.5,3.0,0,
1371,1900-01-01 07:30:00.100,1,47.9,2,135.469957,34.710991,3532.5,3.0,0,1.115504
1371,1900-01-01 07:30:00.200,1,47.9,2,135.469963,34.710984,3532.5,3.0,0,0.939478
1371,1900-01-01 07:30:00.300,1,47.9,2,135.469968,34.710979,3531.5,3.0,0,0.763477
1371,1900-01-01 07:30:00.400,1,47.9,2,135.469972,34.710974,3530.8,3.0,0,0.596403
...,...,...,...,...,...,...,...,...,...,...
3357,1900-01-01 07:30:59.500,1,27.7,1,135.468970,34.712177,3697.6,3.5,0,0.737022
3357,1900-01-01 07:30:59.600,1,27.7,1,135.468975,34.712172,3696.6,3.5,1,0.760166
3357,1900-01-01 07:30:59.700,1,29.0,1,135.468981,34.712166,3695.6,3.5,1,0.788944
3357,1900-01-01 07:30:59.800,1,30.3,1,135.468986,34.712160,3694.7,3.5,1,0.835374


In [3]:
seed_generator = Utilities.generate_pi_seed(1)
seed_vals = [next(seed_generator) for i in range(1)]
shake_percentages = [0.2, 0.4, 0.6]
circle_methods = ['on', 'in']
ml_models = [ExtraTreesClassifier(), GradientBoostingClassifier(), RandomForestClassifier()]
scaler = MinMaxScaler((0, 1))

distance_results = ["on_20%_dist", "on_20%_std", "on_40%_dist", "on_40%_std", "on_60%_dist", "on_60%_std",
                    "in_20%_dist","in_20%_std","in_40%_dist","in_40%_std","in_60%_dist","in_60%_std"]

model_results = ["model", "baseline", "in_20%_f1", "in_40%_f1", "in_60%_f1", "on_20%_f1", "on_40%_f1", "on_60%_f1"]

for seed in seed_vals:
    # Intermediate lists for storing distance and model score values.
    distance_row = []
    model_row = []

    # Set apart 20% data for testing that augmentation process will never see.
    train, test_x, test_y = TestUtils.get_test_train_data(dataset=ready_dataset, seed_val=seed,
                                                          class_col='vehicle_type', k=0.8)
    for model in ml_models:
        base_train_x, base_train_y = TestUtils.get_base_train_x_and_train_y(train, "vehicle_type")
        model_row.append(model.__class__.__name__)
        model_row.append(TestUtils.train_model_and_evaluate(model, scaler.fit_transform(base_train_x), base_train_y,
                                                            scaler.fit_transform(test_x), test_y, seed))
        for shake in shake_percentages:
            for method in circle_methods:
                # Randomly select 30% of trajectories to be augmented.
                selected = Selection.select_randomly(train, seed, k=0.3)

                # Augment the trajectories.
                train_x, train_y = TestUtils.augment_trajectories_using_random_strategy(dataset=train,
                                                                                        percent_to_shake=shake,
                                                                                        ids_to_augment=selected,
                                                                                        circle=method,
                                                                                        n_augmentations=1,
                                                                                        class_col="vehicle_type")

                # Calculate the distances.
                mean, std = TestUtils.find_original_and_augmentation_pairs_and_calculate_differences(train_x, selected)
                f1_score = TestUtils.train_model_and_evaluate(model, scaler.fit_transform(train_x), train_y,
                                                              scaler.fit_transform(test_x), test_y, seed)
                distance_row.append(mean)
                distance_row.append(std)
                model_row.append(f1_score)

    print(distance_row)
    print()
    print(model_row)
    print("----------------------------------------")
    # Append the intermediate lists to the final list.
    distance_results.append(distance_row)
    model_results.append(model_row)


ValueError: Expected 2D array, got 1D array instead:
array=[ 4.92387228e-01  7.18536678e-01  9.32588406e-01  1.05267739e+00
  1.12301684e+00  2.56400882e-01  1.45972594e+00  8.80655430e-01
  2.29382212e-01  1.72581043e+01  5.35462106e+01  1.30512590e+02
  2.27400777e+02  2.81040356e+02  4.47378857e-01  3.15929210e+02
  1.41170738e+02  9.60658859e+01  4.92387228e+00  7.18536678e+00
  9.32588406e+00  1.05267739e+01  1.12301684e+01  2.56400882e+00
  1.45972594e+01  8.80655430e+00  2.29382212e+00 -2.87333931e+00
 -1.21608600e+00  3.82173369e-06  1.43562354e+00  2.87308151e+00
 -1.63317788e+01  1.93345778e+01  2.15323457e-01  3.49793172e+00
 -2.32844295e+01 -1.42864621e+01  5.01291239e-04  1.25497167e+01
  2.35355822e+01 -7.06313757e+01  7.94877890e+01  3.60991477e-02
  2.01680585e+01  1.43157561e+02  1.43797956e+02  1.44373010e+02
  1.44933656e+02  1.45606259e+02  1.38755035e+02  1.75911378e+02
  1.44710432e+02  3.17959103e+00 -9.20773758e+00 -3.96959270e+00
 -1.64008451e-05  3.77245899e+00  8.15198286e+00 -1.40032450e+02
  9.22651509e+01  3.56056766e-03  1.40780236e+01 -9.20773758e+00
 -3.96959270e+00 -1.64008451e-05  3.77245899e+00  8.15198286e+00
 -1.40032450e+02  9.22651509e+01  3.56056766e-03  1.40780236e+01].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.