In [1]:
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.features.kinematic_features import KinematicFeatures

pdf = pd.read_csv('./starkey_new.csv')
starkey = PTRAILDataFrame(data_set=pdf,
                          latitude='lat',
                          longitude='lon',
                          datetime='DateTime',
                          traj_id='Id')
print(starkey)

------------------------ Dataset Facts ------------------------------

Number of unique Trajectories in the data: 253
Number of points in the data: 287136
Dataset time range: 1196 days 22:51:45
Datatype of the DataFrame: <class 'ptrail.core.TrajectoryDF.PTRAILDataFrame'>
Dataset Bounding Box: (45.18896978643169, -118.61020848239596, 45.314545642992, -118.50455596234036)

---------------------------------------------------------------------


In [2]:
%%time

stats = KinematicFeatures.generate_kinematic_stats(starkey)


CPU times: user 33.7 s, sys: 1.1 s, total: 34.8 s
Wall time: 36.8 s


In [4]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA
import plotly.express as px
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(stats.drop(columns=['Species']),
                                                    stats["Species"], test_size=0.33)

# All the models
names = ["Random Forest", "Naive Bayes", "Decision Tree", "KNN", "SVC"]
models = [RandomForestClassifier(), GaussianNB(), DecisionTreeClassifier(), KNeighborsClassifier(), SVC()]
model_fit = [x + ' fit' for x in names]
fitted_models = []


for name, model, fits in zip(names, models, model_fit):
    fit = model.fit(train_x, train_y)
    ans = fit.predict(test_x)
    score = accuracy_score(y_true=test_y, y_pred=ans)
    print(f"Score of {name}: {round(score*100, 2)}%")
    fitted_models.append(fit)

    pca = PCA(n_components=2)
    transformed = pca.fit_transform(test_x)

    fig = px.scatter_3d(x=transformed[:, 0],
                        y=transformed[:, 1],
                        z=ans, symbol=ans,
                        color=ans,template='ggplot2',
                        )
    fig.update_coloraxes(showscale=False)
    fig.show()