In [4]:
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.core.Datasets import Datasets

# Load the traffic dataset.
traffic_df = Datasets.load_traffic_data()
traffic_df.head()

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 125
Number of points in the data: 44905
Dataset time range: 0 days 00:00:59.900000
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (34.7107417, 135.4640652, 34.7156517, 135.4702002)

---------------------------------------------------------------------


Unnamed: 0_level_0,Unnamed: 1_level_0,vehicle_type,velocity,traffic_lane,lon,lat,kilopost,vehicle_length,detected_flag
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1371,1900-01-01 07:30:00.000,1,48.0,2,135.46995,34.710999,3539.5,3.0,0
1371,1900-01-01 07:30:00.100,1,47.9,2,135.469957,34.710991,3532.5,3.0,0
1371,1900-01-01 07:30:00.200,1,47.9,2,135.469963,34.710984,3532.5,3.0,0
1371,1900-01-01 07:30:00.300,1,47.9,2,135.469968,34.710979,3531.5,3.0,0
1371,1900-01-01 07:30:00.400,1,47.9,2,135.469972,34.710974,3530.8,3.0,0


In [5]:
from ptrail.features.kinematic_features import KinematicFeatures as spatial

traffic_df = spatial.create_acceleration_column(traffic_df)
traffic_df = spatial.create_bearing_column(traffic_df)
traffic_df = spatial.create_jerk_column(traffic_df)
trajectories = traffic_df.index.unique(level="traj_id")

# Creating a dataset with mean of values
traj_df_list = []
for traj in trajectories:
    traj_df = pd.DataFrame(traffic_df.loc[[traj]].mean()).transpose()
    traj_df['traj_id'] = traj
    traj_df_list.append(traj_df)

mean_df = pd.concat(traj_df_list, ignore_index=True)
drop_list = ['vehicle_type', 'lon', 'lat', 'kilopost',
             'detected_flag', 'traj_id']
mean_df = mean_df.drop(drop_list, axis=1)

In [6]:
# Importing pycaret and creating a model to
# estimate the length of the vehicle
from pycaret.regression import *

length_reg = setup(data = mean_df,
                   target = 'vehicle_length',
                   normalize=True)

ValueError: Sklearn version error

In [None]:
# Lets try to find the best model
compare_models()

In [None]:
# Huber was the best one during the run so
# we'll create a new and tune it
huber = create_model('huber')

In [None]:
tuned_huber = tune_model(huber)

In [None]:
# We can perform some plots to undestand it better
plot_model(tuned_huber)

In [None]:
plot_model(tuned_huber, plot = 'error')

In [None]:
plot_model(tuned_huber, plot='feature')