In [None]:
!pip install tslearn
!pip install fuzzy-c-means

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tslearn
  Downloading tslearn-0.5.3.2-py3-none-any.whl (358 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m358.2/358.2 KB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tslearn
Successfully installed tslearn-0.5.3.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fuzzy-c-means
  Downloading fuzzy_c_means-1.6.3-py3-none-any.whl (9.1 kB)
Collecting typer<0.4.0,>=0.3.2
  Downloading typer-0.3.2-py3-none-any.whl (21 kB)
Installing collected packages: typer, fuzzy-c-means
  Attempting uninstall: typer
    Found existing installation: typer 0.7.0
    Uninstalling typer-0.7.0:
      Successfully uninstalled typer-0.7.0
Successfully installed fuzzy-c-means-1.6.3 typer-0.3.2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from tslearn.metrics import lcss, dtw
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from tslearn.clustering import TimeSeriesKMeans
import sklearn
import math
import cv2

In [None]:
def generate_random_colors(number_of_colors):
    """
    Generates random colors as hex codes.

    Parameters:
    number_of_colors (int): the number of colors to generate
    
    Returns:
    list: a list of hex color codes
    """
    colors = np.random.randint(256, size=(number_of_colors, 3))
    random_colors = ["#{:02x}{:02x}{:02x}".format(r, g, b) for r, g, b in colors]
    return random_colors

In [None]:
def create_remastered_df(df):
    """
    Gets the trajectory dataframe, groups the data by trackId and reformats the dataframe
    Parameters:
    ffdstring): trajectories df
    
    Returns:
    pandas dataframe: containing 'trackId' and 'trajectory' columns
    """
    trajec_dict = {
    'trackId': [],
    'xcentres': [],
    'ycentres': [],
    'xvelocity': [],
    'yvelocity': []
    }
    for trackid, group_df in df.groupby("trackId"):
        group_df = group_df.sort_values(by='trackLifetime')
        trajec_dict["trackId"].append(trackid)
        trajec_dict["xcentres"].append(list(group_df['xCenter']))
        trajec_dict["ycentres"].append(list(group_df['yCenter']))
        trajec_dict["xvelocity"].append(list(group_df['yVelocity']))
        trajec_dict["yvelocity"].append(list(group_df['yVelocity']))
    remastered_df = pd.DataFrame(trajec_dict)
    return remastered_df

In [None]:
def create_similarity_metrix(data, metric):
  """
    This function takes data and a  metric and returns a similarty matrix.
    
    Parameters:
        - df (pd.DataFrame): The input dataframe, with a column 'trajectory' that contains the trajectory data.
        - metric (str): The distance metric to use, one of 'lcss' or 'dtw'
    
    Returns:
        - np.ndarray: A velocity distance matrix with shape (n, n) where n is the number of rows in df.
    """
  similarity_measure = {'lcss': lcss, 'dtw': dtw}
  similarity_matrix = np.zeros((len(data), len(data)))
  for i in range(len(data)):
    i_array = np.array(data[i])
    for j in range(len(data)):
      j_array = np.array(data[j])
      similarity_matrix[i][j] = similarity_measure[metric](i_array, j_array)
      similarity_matrix[j][i] = similarity_matrix[i][j]
  return similarity_matrix

In [None]:
def measure_clustering_performance(X, labels, metric=None):
    """
    This function takes a dataset, cluster labels and a metric, and returns the performance of clustering.
    
    Parameters:
        - X (np.ndarray or pd.DataFrame): The dataset to be clustered, with shape (n_samples, n_features).
        - labels (np.ndarray): The cluster labels, with shape (n_samples,).
        - metric (str): The performance metric to use, one of 'silhouette', 'calinski_harabasz' or 'davies_bouldin'.
    
    Returns:
        - float or dict: The performance of clustering, as measured by the specified metric or all metrics if metric is None
    """
    metrics = {'silhouette': silhouette_score, 'calinski_harabasz': calinski_harabasz_score, 'davies_bouldin': davies_bouldin_score}
    
    if metric is None:
        scores = {}
        for metric_name, metric_func in metrics.items():
            scores[metric_name] = metric_func(X, labels)
        return scores
    elif metric not in metrics:
        raise ValueError(f"Invalid metric: {metric}. Choose one of {', '.join(metrics.keys())}")
    else:
        return metrics[metric](X, labels)

In [None]:
def select_random_df(df, size_of_the_selected_df):
    """
    This function takes a dataframe and a size_of_the_selected_df 
    and returns a new dataframe consisting of a random sample of the rows of the input dataframe.
    
    Parameters:
        - df (pd.DataFrame): The input dataframe
        - size_of_the_selected_df (int): The number of rows in the output dataframe. Must be less than or equal to the number of rows in df.
    
    Returns:
        - pd.DataFrame: A new dataframe consisting of a random sample of the rows of the input dataframe.
    """
    if size_of_the_selected_df > len(df):
        raise ValueError("size_of_the_selected_df must be less than or equal to the number of rows in df")
    selected_df = df.sample(n=size_of_the_selected_df, random_state=1)
    return selected_df

In [None]:
# Create an empty DataFrame to store the combined data
combined_df = pd.DataFrame()

# List of file names
files = ["drive/MyDrive/ThesisData/data/{}_tracks.csv".format(f"{i:02}") for i in range(2,24)]

# Iterate over the file names
for file in files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file)
    
    # increase the value of the track id by the number of unique trackid in the combined_df 
    if not combined_df.empty:
        unique_track_id_count = combined_df['trackId'].nunique()
        df['trackId'] = df['trackId'] + unique_track_id_count
    
    # Append the data from the current DataFrame to the combined DataFrame
    combined_df = combined_df.append(df, ignore_index=True)

# Print the combined DataFrame
combined_df

Unnamed: 0,recordingId,trackId,frame,trackLifetime,xCenter,yCenter,heading,width,length,xVelocity,yVelocity,xAcceleration,yAcceleration,lonVelocity,latVelocity,lonAcceleration,latAcceleration
0,2,0,11127,0,92.7070,-82.3122,23.8856,0.6988,0.8824,1.0883,0.4566,0.1823,0.2408,1.1800,0.0232,0.2642,-0.1464
1,2,0,11128,1,92.7507,-82.2938,23.9028,0.6988,0.8824,1.0969,0.4670,0.1837,0.2443,1.1921,0.0175,0.2669,-0.1490
2,2,0,11129,2,92.7945,-82.2754,23.9228,0.6988,0.8824,1.1057,0.4777,0.1839,0.2472,1.2044,0.0117,0.2684,-0.1514
3,2,0,11130,3,92.8390,-82.2566,23.9454,0.6988,0.8824,1.1145,0.4887,0.1831,0.2493,1.2169,0.0057,0.2685,-0.1535
4,2,0,11131,4,92.8844,-82.2372,23.9703,0.6988,0.8824,1.1230,0.4999,0.1812,0.2504,1.2292,-0.0006,0.2673,-0.1552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5035781,23,13128,23712,140,71.5383,-63.3552,332.0436,2.1015,4.5571,6.1692,-2.9253,0.7953,1.7231,6.8206,-0.3082,-0.1053,-1.8949
5035782,23,13128,23713,141,71.7839,-63.4703,332.5928,2.1015,4.5571,6.2020,-2.8572,0.7980,1.7210,6.8211,-0.3184,-0.0838,-1.8952
5035783,23,13128,23714,142,72.0310,-63.5823,333.1645,2.1015,4.5571,6.2356,-2.7895,0.7993,1.7200,6.8234,-0.3258,-0.0632,-1.8956
5035784,23,13128,23715,143,72.2840,-63.6944,333.7808,2.1015,4.5571,6.2681,-2.7212,0.7996,1.7198,6.8254,-0.3281,-0.0425,-1.8961


In [None]:
# Create an empty DataFrame to store the combined data
combined_df_2 = pd.DataFrame()

# List of file names
files = ["drive/MyDrive/ThesisData/data/{}_tracksMeta.csv".format(f"{i:02}") for i in range(2,24)]

# Iterate over the file names
for file in files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file)
    
    # increase the value of the track id by the number of unique trackid in the combined_df 
    if not combined_df_2.empty:
        unique_track_id_count = combined_df_2['trackId'].nunique()
        df['trackId'] = df['trackId'] + unique_track_id_count
    
    # Append the data from the current DataFrame to the combined DataFrame
    combined_df_2 = combined_df_2.append(df, ignore_index=True)

# Print the combined DataFrame
combined_df_2

Unnamed: 0,recordingId,trackId,initialFrame,finalFrame,numFrames,width,length,class
0,2,0,11127,12438,1312,0.6988,0.8824,pedestrian
1,2,1,2918,3271,354,0.8280,1.7977,motorcycle
2,2,2,0,389,390,1.9891,4.1515,car
3,2,3,0,111,112,1.9810,4.4463,car
4,2,4,0,264,265,1.8809,3.7336,car
...,...,...,...,...,...,...,...,...
13124,23,13124,23365,23654,290,2.2057,5.1967,car
13125,23,13125,23502,23715,214,3.5310,12.5956,truck
13126,23,13126,23519,23716,198,2.1717,5.2090,car
13127,23,13127,23571,23716,146,2.0845,4.9813,car


In [None]:
car_mask = combined_df_2['class'] == 'car'

# Use the mask to filter the dataframe and extract the 'trackid' column
car_trackids = np.array(combined_df_2.loc[car_mask, 'trackId'])

In [None]:
# filter the combined_df to include only the tracks of cars
filtered_df = combined_df[combined_df['trackId'].isin(car_trackids)]
filtered_df

Unnamed: 0,recordingId,trackId,frame,trackLifetime,xCenter,yCenter,heading,width,length,xVelocity,yVelocity,xAcceleration,yAcceleration,lonVelocity,latVelocity,lonAcceleration,latAcceleration
1666,2,2,0,0,84.3821,-69.4747,27.3021,1.9891,4.1515,4.9296,2.2544,1.2637,1.3521,5.4145,0.2578,1.7431,-0.6218
1667,2,2,1,1,84.5787,-69.3829,27.3277,1.9891,4.1515,4.9911,2.3139,1.2745,1.3744,5.4963,0.2357,1.7632,-0.6359
1668,2,2,2,2,84.7795,-69.2884,27.3494,1.9891,4.1515,5.0529,2.3738,1.2745,1.3916,5.5787,0.2129,1.7713,-0.6505
1669,2,2,3,3,84.9843,-69.1918,27.3706,1.9891,4.1515,5.1141,2.4343,1.2639,1.4038,5.6607,0.1894,1.7678,-0.6656
1670,2,2,4,4,85.1918,-69.0920,27.3960,1.9891,4.1515,5.1740,2.4947,1.2435,1.4116,5.7417,0.1658,1.7535,-0.6811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5035781,23,13128,23712,140,71.5383,-63.3552,332.0436,2.1015,4.5571,6.1692,-2.9253,0.7953,1.7231,6.8206,-0.3082,-0.1053,-1.8949
5035782,23,13128,23713,141,71.7839,-63.4703,332.5928,2.1015,4.5571,6.2020,-2.8572,0.7980,1.7210,6.8211,-0.3184,-0.0838,-1.8952
5035783,23,13128,23714,142,72.0310,-63.5823,333.1645,2.1015,4.5571,6.2356,-2.7895,0.7993,1.7200,6.8234,-0.3258,-0.0632,-1.8956
5035784,23,13128,23715,143,72.2840,-63.6944,333.7808,2.1015,4.5571,6.2681,-2.7212,0.7996,1.7198,6.8254,-0.3281,-0.0425,-1.8961


In [None]:
filtered_df = filtered_df.assign(Velocity = np.hypot(filtered_df['xVelocity'], filtered_df['yVelocity']))
filtered_df

Unnamed: 0,recordingId,trackId,frame,trackLifetime,xCenter,yCenter,heading,width,length,xVelocity,yVelocity,xAcceleration,yAcceleration,lonVelocity,latVelocity,lonAcceleration,latAcceleration,Velocity
1666,2,2,0,0,84.3821,-69.4747,27.3021,1.9891,4.1515,4.9296,2.2544,1.2637,1.3521,5.4145,0.2578,1.7431,-0.6218,5.420634
1667,2,2,1,1,84.5787,-69.3829,27.3277,1.9891,4.1515,4.9911,2.3139,1.2745,1.3744,5.4963,0.2357,1.7632,-0.6359,5.501383
1668,2,2,2,2,84.7795,-69.2884,27.3494,1.9891,4.1515,5.0529,2.3738,1.2745,1.3916,5.5787,0.2129,1.7713,-0.6505,5.582717
1669,2,2,3,3,84.9843,-69.1918,27.3706,1.9891,4.1515,5.1141,2.4343,1.2639,1.4038,5.6607,0.1894,1.7678,-0.6656,5.663906
1670,2,2,4,4,85.1918,-69.0920,27.3960,1.9891,4.1515,5.1740,2.4947,1.2435,1.4116,5.7417,0.1658,1.7535,-0.6811,5.744023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5035781,23,13128,23712,140,71.5383,-63.3552,332.0436,2.1015,4.5571,6.1692,-2.9253,0.7953,1.7231,6.8206,-0.3082,-0.1053,-1.8949,6.827621
5035782,23,13128,23713,141,71.7839,-63.4703,332.5928,2.1015,4.5571,6.2020,-2.8572,0.7980,1.7210,6.8211,-0.3184,-0.0838,-1.8952,6.828499
5035783,23,13128,23714,142,72.0310,-63.5823,333.1645,2.1015,4.5571,6.2356,-2.7895,0.7993,1.7200,6.8234,-0.3258,-0.0632,-1.8956,6.831107
5035784,23,13128,23715,143,72.2840,-63.6944,333.7808,2.1015,4.5571,6.2681,-2.7212,0.7996,1.7198,6.8254,-0.3281,-0.0425,-1.8961,6.833301


In [None]:
remastered_df_2 = create_remastered_df(filtered_df)
remastered_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity
0,2,"[84.3821, 84.5787, 84.7795, 84.9843, 85.1918, ...","[-69.4747, -69.3829, -69.2884, -69.1918, -69.0...","[2.2544, 2.3139, 2.3738, 2.4343, 2.4947, 2.554...","[2.2544, 2.3139, 2.3738, 2.4343, 2.4947, 2.554..."
1,3,"[95.105, 95.4032, 95.7078, 96.0185, 96.3335, 9...","[-63.5575, -63.4384, -63.3192, -63.1997, -63.0...","[3.0222, 3.0078, 2.9925, 2.9757, 2.9569, 2.935...","[3.0222, 3.0078, 2.9925, 2.9757, 2.9569, 2.935..."
2,4,"[57.4754, 57.6063, 57.7412, 57.8808, 58.023, 5...","[-51.4526, -51.6123, -51.7756, -51.9431, -52.1...","[-3.9818, -4.0428, -4.1043, -4.1652, -4.2253, ...","[-3.9818, -4.0428, -4.1043, -4.1652, -4.2253, ..."
3,5,"[32.3329, 32.6776, 33.0236, 33.3702, 33.7185, ...","[-39.391, -39.4929, -39.5953, -39.6976, -39.80...","[-2.528, -2.5178, -2.5063, -2.493, -2.4773, -2...","[-2.528, -2.5178, -2.5063, -2.493, -2.4773, -2..."
4,6,"[41.3668, 40.9533, 40.5302, 40.0987, 39.6602, ...","[-33.3359, -33.2451, -33.1526, -33.0578, -32.9...","[2.2951, 2.3364, 2.3789, 2.4223, 2.4661, 2.510...","[2.2951, 2.3364, 2.3789, 2.4223, 2.4661, 2.510..."
...,...,...,...,...,...
11015,13122,"[93.9303, 93.8177, 93.7045, 93.5918, 93.4789, ...","[-8.4139, -8.6203, -8.827, -9.0335, -9.2392, -...","[-5.1916, -5.1595, -5.1251, -5.0874, -5.0458, ...","[-5.1916, -5.1595, -5.1251, -5.0874, -5.0458, ..."
11016,13124,"[120.2573, 120.1821, 120.1044, 120.0238, 119.9...","[-53.5251, -53.4952, -53.4644, -53.4326, -53.4...","[0.74, 0.7601, 0.7806, 0.8014, 0.8227, 0.8446,...","[0.74, 0.7601, 0.7806, 0.8014, 0.8227, 0.8446,..."
11017,13126,"[42.5216, 42.689, 42.8598, 43.0337, 43.2098, 4...","[-42.5007, -42.5538, -42.6086, -42.6648, -42.7...","[-1.3153, -1.336, -1.3567, -1.3773, -1.3976, -...","[-1.3153, -1.336, -1.3567, -1.3773, -1.3976, -..."
11018,13127,"[120.2574, 120.0791, 119.8983, 119.7161, 119.5...","[-53.3208, -53.2526, -53.1841, -53.1142, -53.0...","[1.7136, 1.723, 1.7326, 1.7421, 1.7513, 1.7602...","[1.7136, 1.723, 1.7326, 1.7421, 1.7513, 1.7602..."


In [None]:
remastered_df_2['x_distance_travelled'] = remastered_df_2.apply(lambda row: row['xcentres'][0]-row['xcentres'][-1], axis=1)
remastered_df_2['y_distance_travelled'] = remastered_df_2.apply(lambda row: row['ycentres'][0]-row['ycentres'][-1], axis=1)
remastered_df_2['starting_x'] = remastered_df_2.apply(lambda row: row['xcentres'][0], axis=1)
remastered_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity,x_distance_travelled,y_distance_travelled,starting_x
0,2,"[84.3821, 84.5787, 84.7795, 84.9843, 85.1918, ...","[-69.4747, -69.3829, -69.2884, -69.1918, -69.0...","[2.2544, 2.3139, 2.3738, 2.4343, 2.4947, 2.554...","[2.2544, 2.3139, 2.3738, 2.4343, 2.4947, 2.554...",62.7748,-40.7965,84.3821
1,3,"[95.105, 95.4032, 95.7078, 96.0185, 96.3335, 9...","[-63.5575, -63.4384, -63.3192, -63.1997, -63.0...","[3.0222, 3.0078, 2.9925, 2.9757, 2.9569, 2.935...","[3.0222, 3.0078, 2.9925, 2.9757, 2.9569, 2.935...",-44.4688,4.6727,95.1050
2,4,"[57.4754, 57.6063, 57.7412, 57.8808, 58.023, 5...","[-51.4526, -51.6123, -51.7756, -51.9431, -52.1...","[-3.9818, -4.0428, -4.1043, -4.1652, -4.2253, ...","[-3.9818, -4.0428, -4.1043, -4.1652, -4.2253, ...",-81.7983,16.9811,57.4754
3,5,"[32.3329, 32.6776, 33.0236, 33.3702, 33.7185, ...","[-39.391, -39.4929, -39.5953, -39.6976, -39.80...","[-2.528, -2.5178, -2.5063, -2.493, -2.4773, -2...","[-2.528, -2.5178, -2.5063, -2.493, -2.4773, -2...",-106.4937,28.7958,32.3329
4,6,"[41.3668, 40.9533, 40.5302, 40.0987, 39.6602, ...","[-33.3359, -33.2451, -33.1526, -33.0578, -32.9...","[2.2951, 2.3364, 2.3789, 2.4223, 2.4661, 2.510...","[2.2951, 2.3364, 2.3789, 2.4223, 2.4661, 2.510...",19.5807,-4.5918,41.3668
...,...,...,...,...,...,...,...,...
11015,13122,"[93.9303, 93.8177, 93.7045, 93.5918, 93.4789, ...","[-8.4139, -8.6203, -8.827, -9.0335, -9.2392, -...","[-5.1916, -5.1595, -5.1251, -5.0874, -5.0458, ...","[-5.1916, -5.1595, -5.1251, -5.0874, -5.0458, ...",13.7127,15.9151,93.9303
11016,13124,"[120.2573, 120.1821, 120.1044, 120.0238, 119.9...","[-53.5251, -53.4952, -53.4644, -53.4326, -53.4...","[0.74, 0.7601, 0.7806, 0.8014, 0.8227, 0.8446,...","[0.74, 0.7601, 0.7806, 0.8014, 0.8227, 0.8446,...",82.0331,-21.1040,120.2573
11017,13126,"[42.5216, 42.689, 42.8598, 43.0337, 43.2098, 4...","[-42.5007, -42.5538, -42.6086, -42.6648, -42.7...","[-1.3153, -1.336, -1.3567, -1.3773, -1.3976, -...","[-1.3153, -1.336, -1.3567, -1.3773, -1.3976, -...",-38.8806,22.8402,42.5216
11018,13127,"[120.2574, 120.0791, 119.8983, 119.7161, 119.5...","[-53.3208, -53.2526, -53.1841, -53.1142, -53.0...","[1.7136, 1.723, 1.7326, 1.7421, 1.7513, 1.7602...","[1.7136, 1.723, 1.7326, 1.7421, 1.7513, 1.7602...",16.0627,-10.6474,120.2574


In [None]:
y_mask = remastered_df_2['ycentres'].apply(lambda x: x[0] < -70)
x_mask = (remastered_df_2['starting_x'] > 65) & (remastered_df_2['starting_x'] < 80)
remastered_df_2 = remastered_df_2[y_mask & x_mask]
remastered_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity,x_distance_travelled,y_distance_travelled,starting_x
11,15,"[75.7401, 75.77, 75.7994, 75.829, 75.8582, 75....","[-75.6879, -75.6525, -75.6173, -75.5826, -75.5...","[0.9006, 0.8934, 0.8859, 0.8782, 0.8702, 0.861...","[0.9006, 0.8934, 0.8859, 0.8782, 0.8702, 0.861...",-24.0058,-56.9290,75.7401
16,20,"[75.8591, 75.89, 75.919, 75.9487, 75.979, 76.0...","[-75.698, -75.6682, -75.6369, -75.606, -75.577...","[0.7715, 0.7671, 0.762, 0.7564, 0.7508, 0.7444...","[0.7715, 0.7671, 0.762, 0.7564, 0.7508, 0.7444...",-23.6182,-56.8889,75.8591
17,22,"[76.0323, 76.1478, 76.2662, 76.3883, 76.5136, ...","[-76.0603, -75.9453, -75.8279, -75.7085, -75.5...","[2.8917, 2.937, 2.983, 3.0294, 3.0754, 3.1207,...","[2.8917, 2.937, 2.983, 3.0294, 3.0754, 3.1207,...",54.4717,-47.1694,76.0323
18,24,"[75.4847, 75.641, 75.802, 75.9667, 76.1356, 76...","[-75.5734, -75.4082, -75.2409, -75.0716, -74.9...","[4.1332, 4.1512, 4.1683, 4.1835, 4.1963, 4.206...","[4.1332, 4.1512, 4.1683, 4.1835, 4.1963, 4.206...",-24.1698,-56.9944,75.4847
21,27,"[75.4185, 75.5821, 75.7503, 75.923, 76.0995, 7...","[-75.9985, -75.8237, -75.6479, -75.4719, -75.2...","[4.3653, 4.3485, 4.3293, 4.307, 4.2811, 4.2513...","[4.3653, 4.3485, 4.3293, 4.307, 4.2811, 4.2513...",54.1732,-47.4413,75.4185
...,...,...,...,...,...,...,...,...
11004,13109,"[68.3565, 68.4992, 68.6432, 68.7889, 68.9347, ...","[-87.7343, -87.4496, -87.1615, -86.8716, -86.5...","[7.1187, 7.1254, 7.1296, 7.1299, 7.1254, 7.115...","[7.1187, 7.1254, 7.1296, 7.1299, 7.1254, 7.115...",-31.6445,-69.2682,68.3565
11006,13111,"[68.6457, 68.7797, 68.9144, 69.0502, 69.186, 6...","[-88.0777, -87.8152, -87.5497, -87.2822, -87.0...","[6.5888, 6.6105, 6.6311, 6.6493, 6.6644, 6.675...","[6.5888, 6.6105, 6.6311, 6.6493, 6.6644, 6.675...",-31.0871,-69.6877,68.6457
11007,13113,"[68.0954, 68.222, 68.3522, 68.4841, 68.6159, 6...","[-86.3925, -86.1401, -85.8857, -85.6301, -85.3...","[6.2837, 6.2863, 6.2865, 6.2832, 6.2755, 6.263...","[6.2837, 6.2863, 6.2865, 6.2832, 6.2755, 6.263...",-31.6493,-67.6952,68.0954
11009,13115,"[68.602, 68.7278, 68.8554, 68.9851, 69.117, 69...","[-87.3016, -87.0545, -86.8028, -86.5477, -86.2...","[6.1822, 6.2446, 6.3069, 6.3681, 6.4273, 6.483...","[6.1822, 6.2446, 6.3069, 6.3681, 6.4273, 6.483...",-30.9507,-68.7692,68.6020


In [None]:
remastered_df_2 = remastered_df_2.assign(
    mean_velocity_x = remastered_df_2['xvelocity'].apply(lambda x: np.mean(x)),
    mean_velocity_y = remastered_df_2['yvelocity'].apply(lambda y: np.mean(y))
    )

In [None]:
selected_df_2 = select_random_df(df=remastered_df_2, size_of_the_selected_df=90)
selected_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity,x_distance_travelled,y_distance_travelled,starting_x,mean_velocity_x,mean_velocity_y
6712,8050,"[68.6975, 68.741, 68.7853, 68.83, 68.8761, 68....","[-86.3906, -86.311, -86.2288, -86.1449, -86.05...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...",34.2674,-55.0741,68.6975,2.796894,2.796894
1503,1796,"[75.449, 75.485, 75.5235, 75.565, 75.6091, 75....","[-75.38, -75.3334, -75.2845, -75.2334, -75.180...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...",-23.9963,-55.9145,75.4490,4.921797,4.921797
3240,3894,"[74.844, 74.886, 74.928, 74.9697, 75.0107, 75....","[-76.5926, -76.5361, -76.4797, -76.424, -76.36...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...",-24.5596,-56.8907,74.8440,2.518445,2.518445
7739,9255,"[69.8622, 69.9725, 70.0832, 70.1935, 70.3051, ...","[-85.1865, -84.9774, -84.7665, -84.555, -84.34...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...",-29.7937,-65.7687,69.8622,4.378320,4.378320
7169,8600,"[68.2979, 68.3983, 68.5011, 68.6051, 68.7106, ...","[-87.3448, -87.13, -86.9115, -86.6899, -86.466...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...",-31.4984,-68.4325,68.2979,6.682112,6.682112
...,...,...,...,...,...,...,...,...,...,...
6185,7408,"[71.6034, 71.9146, 72.2309, 72.552, 72.878, 73...","[-93.3193, -92.9442, -92.5643, -92.181, -91.79...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...",-56.6229,-23.4920,71.6034,4.098583,4.098583
2575,3091,"[75.4529, 75.4688, 75.485, 75.5015, 75.5179, 7...","[-76.3327, -76.3117, -76.2901, -76.268, -76.24...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...",55.2000,-48.4629,75.4529,2.751217,2.751217
10238,12207,"[68.4054, 68.4735, 68.5427, 68.6123, 68.6837, ...","[-87.7355, -87.5903, -87.4415, -87.2898, -87.1...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...",31.3450,-55.7363,68.4054,3.239186,3.239186
10911,13003,"[68.2124, 68.2831, 68.3549, 68.4266, 68.4975, ...","[-87.4568, -87.3187, -87.1778, -87.035, -86.89...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...",-31.7672,-68.6916,68.2124,4.867072,4.867072


In [None]:
selected_df_2

In [None]:
!pwd

/content


In [None]:
random_fig = go.Figure()
for index, row in selected_df_2.iterrows():
    random_fig.add_trace(go.Scatter(x=row['xcentres'], y=row['ycentres']))
random_fig.update_layout(template="plotly_white")
random_fig.show()

In [None]:
selected_df_2['xdistance_travelled'] = selected_df_2['xcentres'].apply(lambda x: [np.abs(x[i]-x[i-1]) if i > 0 else 0 for i in range(len(x))])
selected_df_2['ydistance_travelled'] = selected_df_2['ycentres'].apply(lambda x: [np.abs(x[i]-x[i-1]) if i > 0 else 0 for i in range(len(x))])
selected_df_2['cumulative_xdistance_travelled'] = selected_df_2.apply(lambda row: np.cumsum(np.abs(row['xdistance_travelled'])), axis=1)
selected_df_2['cumulative_ydistance_travelled'] = selected_df_2.apply(lambda row: np.cumsum(np.abs(row['ydistance_travelled'])), axis=1)
selected_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity,x_distance_travelled,y_distance_travelled,starting_x,mean_velocity_x,mean_velocity_y,xdistance_travelled,ydistance_travelled,cumulative_xdistance_travelled,cumulative_ydistance_travelled
6712,8050,"[68.6975, 68.741, 68.7853, 68.83, 68.8761, 68....","[-86.3906, -86.311, -86.2288, -86.1449, -86.05...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...",34.2674,-55.0741,68.6975,2.796894,2.796894,"[0, 0.04349999999999454, 0.04430000000000689, ...","[0, 0.07959999999999923, 0.08220000000000027, ...","[0.0, 0.04349999999999454, 0.08780000000000143...","[0.0, 0.07959999999999923, 0.1617999999999995,..."
1503,1796,"[75.449, 75.485, 75.5235, 75.565, 75.6091, 75....","[-75.38, -75.3334, -75.2845, -75.2334, -75.180...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...",-23.9963,-55.9145,75.4490,4.921797,4.921797,"[0, 0.036000000000001364, 0.03849999999999909,...","[0, 0.046599999999997976, 0.048900000000003274...","[0.0, 0.036000000000001364, 0.0745000000000004...","[0.0, 0.046599999999997976, 0.0955000000000012..."
3240,3894,"[74.844, 74.886, 74.928, 74.9697, 75.0107, 75....","[-76.5926, -76.5361, -76.4797, -76.424, -76.36...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...",-24.5596,-56.8907,74.8440,2.518445,2.518445,"[0, 0.04200000000000159, 0.04200000000000159, ...","[0, 0.05649999999999977, 0.056400000000010664,...","[0.0, 0.04200000000000159, 0.08400000000000318...","[0.0, 0.05649999999999977, 0.11290000000001044..."
7739,9255,"[69.8622, 69.9725, 70.0832, 70.1935, 70.3051, ...","[-85.1865, -84.9774, -84.7665, -84.555, -84.34...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...",-29.7937,-65.7687,69.8622,4.378320,4.378320,"[0, 0.11029999999999518, 0.11070000000000846, ...","[0, 0.2090999999999923, 0.2109000000000094, 0....","[0.0, 0.11029999999999518, 0.22100000000000364...","[0.0, 0.2090999999999923, 0.4200000000000017, ..."
7169,8600,"[68.2979, 68.3983, 68.5011, 68.6051, 68.7106, ...","[-87.3448, -87.13, -86.9115, -86.6899, -86.466...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...",-31.4984,-68.4325,68.2979,6.682112,6.682112,"[0, 0.1004000000000076, 0.10279999999998779, 0...","[0, 0.21480000000001098, 0.2184999999999917, 0...","[0.0, 0.1004000000000076, 0.20319999999999538,...","[0.0, 0.21480000000001098, 0.4333000000000027,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6185,7408,"[71.6034, 71.9146, 72.2309, 72.552, 72.878, 73...","[-93.3193, -92.9442, -92.5643, -92.181, -91.79...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...",-56.6229,-23.4920,71.6034,4.098583,4.098583,"[0, 0.3111999999999995, 0.31630000000001246, 0...","[0, 0.3751000000000033, 0.37989999999999213, 0...","[0.0, 0.3111999999999995, 0.6275000000000119, ...","[0.0, 0.3751000000000033, 0.7549999999999955, ..."
2575,3091,"[75.4529, 75.4688, 75.485, 75.5015, 75.5179, 7...","[-76.3327, -76.3117, -76.2901, -76.268, -76.24...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...",55.2000,-48.4629,75.4529,2.751217,2.751217,"[0, 0.015900000000002024, 0.016199999999997772...","[0, 0.021000000000000796, 0.021600000000006503...","[0.0, 0.015900000000002024, 0.0320999999999997...","[0.0, 0.021000000000000796, 0.0426000000000073..."
10238,12207,"[68.4054, 68.4735, 68.5427, 68.6123, 68.6837, ...","[-87.7355, -87.5903, -87.4415, -87.2898, -87.1...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...",31.3450,-55.7363,68.4054,3.239186,3.239186,"[0, 0.06810000000000116, 0.06919999999999504, ...","[0, 0.14520000000000266, 0.14879999999999427, ...","[0.0, 0.06810000000000116, 0.1372999999999962,...","[0.0, 0.14520000000000266, 0.29399999999999693..."
10911,13003,"[68.2124, 68.2831, 68.3549, 68.4266, 68.4975, ...","[-87.4568, -87.3187, -87.1778, -87.035, -86.89...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...",-31.7672,-68.6916,68.2124,4.867072,4.867072,"[0, 0.0707000000000022, 0.07179999999999609, 0...","[0, 0.13809999999999434, 0.14090000000000202, ...","[0.0, 0.0707000000000022, 0.1424999999999983, ...","[0.0, 0.13809999999999434, 0.27899999999999636..."


In [None]:
def get_normalized_cols_2(row):
  xdistance_mean = np.mean(row['xcentres'])
  ydistance_mean = np.mean(row['ycentres'])
  xvelocity_mean = np.mean(row['xvelocity'])
  yvelocity_mean = np.mean(row['yvelocity'])
  xdistance = (np.array(row['xcentres']) - xdistance_mean)
  ydistance = (np.array(row['ycentres']) - ydistance_mean)
  xvelocity = (np.array(row['xvelocity']) - xvelocity_mean)
  yvelocity = (np.array(row['yvelocity']) - yvelocity_mean)
  xdistance_std = np.std(xdistance)
  ydistance_std = np.std(ydistance)
  xvelocity_std = np.std(xvelocity)
  yvelocity_std = np.std(yvelocity)
  xdistance /= xdistance_std
  ydistance /= ydistance_std
  xvelocity /= xvelocity_std
  yvelocity /= yvelocity_std
  row['xcentres_n'] = xdistance
  row['ycentres_n'] = ydistance
  row['xvelocity_n'] = xvelocity
  row['yvelocity_n'] = yvelocity
  return row
selected_df_2 = selected_df_2.apply(get_normalized_cols_2, axis=1)

In [None]:
selected_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity,x_distance_travelled,y_distance_travelled,starting_x,mean_velocity_x,mean_velocity_y,xdistance_travelled,ydistance_travelled,cumulative_xdistance_travelled,cumulative_ydistance_travelled,xcentres_n,ycentres_n,xvelocity_n,yvelocity_n
6712,8050,"[68.6975, 68.741, 68.7853, 68.83, 68.8761, 68....","[-86.3906, -86.311, -86.2288, -86.1449, -86.05...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...",34.2674,-55.0741,68.6975,2.796894,2.796894,"[0, 0.04349999999999454, 0.04430000000000689, ...","[0, 0.07959999999999923, 0.08220000000000027, ...","[0.0, 0.04349999999999454, 0.08780000000000143...","[0.0, 0.07959999999999923, 0.1617999999999995,...","[-0.6383279104463575, -0.6356669906134351, -0....","[-1.817911784574487, -1.813802183273344, -1.80...","[-0.3588380401785353, -0.34410902996348675, -0...","[-0.3588380401785353, -0.34410902996348675, -0..."
1503,1796,"[75.449, 75.485, 75.5235, 75.565, 75.6091, 75....","[-75.38, -75.3334, -75.2845, -75.2334, -75.180...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...",-23.9963,-55.9145,75.4490,4.921797,4.921797,"[0, 0.036000000000001364, 0.03849999999999909,...","[0, 0.046599999999997976, 0.048900000000003274...","[0.0, 0.036000000000001364, 0.0745000000000004...","[0.0, 0.046599999999997976, 0.0955000000000012...","[-1.9517361455628592, -1.9474974249243227, -1....","[-1.3664906235599423, -1.3636278239988595, -1....","[-2.082748156725892, -2.0549267389843853, -2.0...","[-2.082748156725892, -2.0549267389843853, -2.0..."
3240,3894,"[74.844, 74.886, 74.928, 74.9697, 75.0107, 75....","[-76.5926, -76.5361, -76.4797, -76.424, -76.36...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...",-24.5596,-56.8907,74.8440,2.518445,2.518445,"[0, 0.04200000000000159, 0.04200000000000159, ...","[0, 0.05649999999999977, 0.056400000000010664,...","[0.0, 0.04200000000000159, 0.08400000000000318...","[0.0, 0.05649999999999977, 0.11290000000001044...","[-0.7733124296270529, -0.7691229271941602, -0....","[-0.6455678958460487, -0.6416460361224797, -0....","[-0.31643033571505885, -0.32474233534689606, -...","[-0.31643033571505885, -0.32474233534689606, -..."
7739,9255,"[69.8622, 69.9725, 70.0832, 70.1935, 70.3051, ...","[-85.1865, -84.9774, -84.7665, -84.555, -84.34...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...",-29.7937,-65.7687,69.8622,4.378320,4.378320,"[0, 0.11029999999999518, 0.11070000000000846, ...","[0, 0.2090999999999923, 0.2109000000000094, 0....","[0.0, 0.11029999999999518, 0.22100000000000364...","[0.0, 0.2090999999999923, 0.4200000000000017, ...","[-1.489462457606532, -1.479236810848507, -1.46...","[-1.3493298601433341, -1.337463899142281, -1.3...","[0.33136705919174114, 0.32328666729935074, 0.3...","[0.33136705919174114, 0.32328666729935074, 0.3..."
7169,8600,"[68.2979, 68.3983, 68.5011, 68.6051, 68.7106, ...","[-87.3448, -87.13, -86.9115, -86.6899, -86.466...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...",-31.4984,-68.4325,68.2979,6.682112,6.682112,"[0, 0.1004000000000076, 0.10279999999998779, 0...","[0, 0.21480000000001098, 0.2184999999999917, 0...","[0.0, 0.1004000000000076, 0.20319999999999538,...","[0.0, 0.21480000000001098, 0.4333000000000027,...","[-1.8272230636321138, -1.81863269669458, -1.80...","[-1.5228675517680832, -1.5117650776017917, -1....","[-0.6543022417050256, -0.6309079589056438, -0....","[-0.6543022417050256, -0.6309079589056438, -0...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6185,7408,"[71.6034, 71.9146, 72.2309, 72.552, 72.878, 73...","[-93.3193, -92.9442, -92.5643, -92.181, -91.79...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...",-56.6229,-23.4920,71.6034,4.098583,4.098583,"[0, 0.3111999999999995, 0.31630000000001246, 0...","[0, 0.3751000000000033, 0.37989999999999213, 0...","[0.0, 0.3111999999999995, 0.6275000000000119, ...","[0.0, 0.3751000000000033, 0.7549999999999955, ...","[-1.6487947264494793, -1.6308300071648725, -1....","[-2.4212492268014763, -2.3696290665007367, -2....","[1.468860533247921, 1.4766567126835308, 1.4836...","[1.468860533247921, 1.4766567126835308, 1.4836..."
2575,3091,"[75.4529, 75.4688, 75.485, 75.5015, 75.5179, 7...","[-76.3327, -76.3117, -76.2901, -76.268, -76.24...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...",55.2000,-48.4629,75.4529,2.751217,2.751217,"[0, 0.015900000000002024, 0.016199999999997772...","[0, 0.021000000000000796, 0.021600000000006503...","[0.0, 0.015900000000002024, 0.0320999999999997...","[0.0, 0.021000000000000796, 0.0426000000000073...","[-0.0881057570902568, -0.087365189834976, -0.0...","[-1.53399185166161, -1.5328512151429428, -1.53...","[-0.6890002623267498, -0.6885103102386144, -0....","[-0.6890002623267498, -0.6885103102386144, -0...."
10238,12207,"[68.4054, 68.4735, 68.5427, 68.6123, 68.6837, ...","[-87.7355, -87.5903, -87.4415, -87.2898, -87.1...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...",31.3450,-55.7363,68.4054,3.239186,3.239186,"[0, 0.06810000000000116, 0.06919999999999504, ...","[0, 0.14520000000000266, 0.14879999999999427, ...","[0.0, 0.06810000000000116, 0.1372999999999962,...","[0.0, 0.14520000000000266, 0.29399999999999693...","[-0.664985205037199, -0.6607638307966537, -0.6...","[-1.8977114110714757, -1.8902364443015456, -1....","[0.12447264585457933, 0.14243356823176442, 0.1...","[0.12447264585457933, 0.14243356823176442, 0.1..."
10911,13003,"[68.2124, 68.2831, 68.3549, 68.4266, 68.4975, ...","[-87.4568, -87.3187, -87.1778, -87.035, -86.89...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...",-31.7672,-68.6916,68.2124,4.867072,4.867072,"[0, 0.0707000000000022, 0.07179999999999609, 0...","[0, 0.13809999999999434, 0.14090000000000202, ...","[0.0, 0.0707000000000022, 0.1424999999999983, ...","[0.0, 0.13809999999999434, 0.27899999999999636...","[-1.544067523693008, -1.5380466142229994, -1.5...","[-1.3859166700462657, -1.3786550037603225, -1....","[-0.613629167288581, -0.6036002101945205, -0.5...","[-0.613629167288581, -0.6036002101945205, -0.5..."


In [None]:
selected_df_2['features'] = selected_df_2.apply(lambda row: list(zip(row['xcentres_n'], row['ycentres_n'], row['xvelocity_n'], row['yvelocity_n'])), axis=1)
selected_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity,x_distance_travelled,y_distance_travelled,starting_x,mean_velocity_x,mean_velocity_y,xdistance_travelled,ydistance_travelled,cumulative_xdistance_travelled,cumulative_ydistance_travelled,xcentres_n,ycentres_n,xvelocity_n,yvelocity_n,features
6712,8050,"[68.6975, 68.741, 68.7853, 68.83, 68.8761, 68....","[-86.3906, -86.311, -86.2288, -86.1449, -86.05...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...",34.2674,-55.0741,68.6975,2.796894,2.796894,"[0, 0.04349999999999454, 0.04430000000000689, ...","[0, 0.07959999999999923, 0.08220000000000027, ...","[0.0, 0.04349999999999454, 0.08780000000000143...","[0.0, 0.07959999999999923, 0.1617999999999995,...","[-0.6383279104463575, -0.6356669906134351, -0....","[-1.817911784574487, -1.813802183273344, -1.80...","[-0.3588380401785353, -0.34410902996348675, -0...","[-0.3588380401785353, -0.34410902996348675, -0...","[(-0.6383279104463575, -1.817911784574487, -0...."
1503,1796,"[75.449, 75.485, 75.5235, 75.565, 75.6091, 75....","[-75.38, -75.3334, -75.2845, -75.2334, -75.180...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...",-23.9963,-55.9145,75.4490,4.921797,4.921797,"[0, 0.036000000000001364, 0.03849999999999909,...","[0, 0.046599999999997976, 0.048900000000003274...","[0.0, 0.036000000000001364, 0.0745000000000004...","[0.0, 0.046599999999997976, 0.0955000000000012...","[-1.9517361455628592, -1.9474974249243227, -1....","[-1.3664906235599423, -1.3636278239988595, -1....","[-2.082748156725892, -2.0549267389843853, -2.0...","[-2.082748156725892, -2.0549267389843853, -2.0...","[(-1.9517361455628592, -1.3664906235599423, -2..."
3240,3894,"[74.844, 74.886, 74.928, 74.9697, 75.0107, 75....","[-76.5926, -76.5361, -76.4797, -76.424, -76.36...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...",-24.5596,-56.8907,74.8440,2.518445,2.518445,"[0, 0.04200000000000159, 0.04200000000000159, ...","[0, 0.05649999999999977, 0.056400000000010664,...","[0.0, 0.04200000000000159, 0.08400000000000318...","[0.0, 0.05649999999999977, 0.11290000000001044...","[-0.7733124296270529, -0.7691229271941602, -0....","[-0.6455678958460487, -0.6416460361224797, -0....","[-0.31643033571505885, -0.32474233534689606, -...","[-0.31643033571505885, -0.32474233534689606, -...","[(-0.7733124296270529, -0.6455678958460487, -0..."
7739,9255,"[69.8622, 69.9725, 70.0832, 70.1935, 70.3051, ...","[-85.1865, -84.9774, -84.7665, -84.555, -84.34...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...",-29.7937,-65.7687,69.8622,4.378320,4.378320,"[0, 0.11029999999999518, 0.11070000000000846, ...","[0, 0.2090999999999923, 0.2109000000000094, 0....","[0.0, 0.11029999999999518, 0.22100000000000364...","[0.0, 0.2090999999999923, 0.4200000000000017, ...","[-1.489462457606532, -1.479236810848507, -1.46...","[-1.3493298601433341, -1.337463899142281, -1.3...","[0.33136705919174114, 0.32328666729935074, 0.3...","[0.33136705919174114, 0.32328666729935074, 0.3...","[(-1.489462457606532, -1.3493298601433341, 0.3..."
7169,8600,"[68.2979, 68.3983, 68.5011, 68.6051, 68.7106, ...","[-87.3448, -87.13, -86.9115, -86.6899, -86.466...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...",-31.4984,-68.4325,68.2979,6.682112,6.682112,"[0, 0.1004000000000076, 0.10279999999998779, 0...","[0, 0.21480000000001098, 0.2184999999999917, 0...","[0.0, 0.1004000000000076, 0.20319999999999538,...","[0.0, 0.21480000000001098, 0.4333000000000027,...","[-1.8272230636321138, -1.81863269669458, -1.80...","[-1.5228675517680832, -1.5117650776017917, -1....","[-0.6543022417050256, -0.6309079589056438, -0....","[-0.6543022417050256, -0.6309079589056438, -0....","[(-1.8272230636321138, -1.5228675517680832, -0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6185,7408,"[71.6034, 71.9146, 72.2309, 72.552, 72.878, 73...","[-93.3193, -92.9442, -92.5643, -92.181, -91.79...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...",-56.6229,-23.4920,71.6034,4.098583,4.098583,"[0, 0.3111999999999995, 0.31630000000001246, 0...","[0, 0.3751000000000033, 0.37989999999999213, 0...","[0.0, 0.3111999999999995, 0.6275000000000119, ...","[0.0, 0.3751000000000033, 0.7549999999999955, ...","[-1.6487947264494793, -1.6308300071648725, -1....","[-2.4212492268014763, -2.3696290665007367, -2....","[1.468860533247921, 1.4766567126835308, 1.4836...","[1.468860533247921, 1.4766567126835308, 1.4836...","[(-1.6487947264494793, -2.4212492268014763, 1...."
2575,3091,"[75.4529, 75.4688, 75.485, 75.5015, 75.5179, 7...","[-76.3327, -76.3117, -76.2901, -76.268, -76.24...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...",55.2000,-48.4629,75.4529,2.751217,2.751217,"[0, 0.015900000000002024, 0.016199999999997772...","[0, 0.021000000000000796, 0.021600000000006503...","[0.0, 0.015900000000002024, 0.0320999999999997...","[0.0, 0.021000000000000796, 0.0426000000000073...","[-0.0881057570902568, -0.087365189834976, -0.0...","[-1.53399185166161, -1.5328512151429428, -1.53...","[-0.6890002623267498, -0.6885103102386144, -0....","[-0.6890002623267498, -0.6885103102386144, -0....","[(-0.0881057570902568, -1.53399185166161, -0.6..."
10238,12207,"[68.4054, 68.4735, 68.5427, 68.6123, 68.6837, ...","[-87.7355, -87.5903, -87.4415, -87.2898, -87.1...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...",31.3450,-55.7363,68.4054,3.239186,3.239186,"[0, 0.06810000000000116, 0.06919999999999504, ...","[0, 0.14520000000000266, 0.14879999999999427, ...","[0.0, 0.06810000000000116, 0.1372999999999962,...","[0.0, 0.14520000000000266, 0.29399999999999693...","[-0.664985205037199, -0.6607638307966537, -0.6...","[-1.8977114110714757, -1.8902364443015456, -1....","[0.12447264585457933, 0.14243356823176442, 0.1...","[0.12447264585457933, 0.14243356823176442, 0.1...","[(-0.664985205037199, -1.8977114110714757, 0.1..."
10911,13003,"[68.2124, 68.2831, 68.3549, 68.4266, 68.4975, ...","[-87.4568, -87.3187, -87.1778, -87.035, -86.89...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...",-31.7672,-68.6916,68.2124,4.867072,4.867072,"[0, 0.0707000000000022, 0.07179999999999609, 0...","[0, 0.13809999999999434, 0.14090000000000202, ...","[0.0, 0.0707000000000022, 0.1424999999999983, ...","[0.0, 0.13809999999999434, 0.27899999999999636...","[-1.544067523693008, -1.5380466142229994, -1.5...","[-1.3859166700462657, -1.3786550037603225, -1....","[-0.613629167288581, -0.6036002101945205, -0.5...","[-0.613629167288581, -0.6036002101945205, -0.5...","[(-1.544067523693008, -1.3859166700462657, -0...."


In [None]:
similarity2 = create_similarity_metrix(data=selected_df_2.features.tolist(), metric='lcss')
similarity2

array([[1.        , 0.43157895, 0.23326572, ..., 1.        , 0.34180791,
        0.60386473],
       [0.43157895, 1.        , 0.63859649, ..., 0.38245614, 0.80701754,
        0.93719807],
       [0.23326572, 0.63859649, 1.        , ..., 0.2900232 , 0.82485876,
        0.4589372 ],
       ...,
       [1.        , 0.38245614, 0.2900232 , ..., 1.        , 0.35310734,
        0.57004831],
       [0.34180791, 0.80701754, 0.82485876, ..., 0.35310734, 1.        ,
        0.92270531],
       [0.60386473, 0.93719807, 0.4589372 , ..., 0.57004831, 0.92270531,
        1.        ]])

In [None]:
km_lcss_velocity = TimeSeriesKMeans(n_clusters=3, max_iter=20, random_state=0)
k_means_clusters_lcss_velocity = km_lcss_velocity.fit_predict(X=similarity2)
measure_clustering_performance(X=similarity2, labels=k_means_clusters_lcss_velocity)

{'silhouette': 0.6660703580960396,
 'calinski_harabasz': 207.22848022044624,
 'davies_bouldin': 0.4757835390631915}

In [None]:
km_color_dict = {0:"#a2e8ab",
                 1:"#fc71ff",
                 2:"cyan",
                 3:"black",
                 4:"purple",
                 5:"orange",
                 6:"cyan",
                 7:"grey",
                 8:"pink",
                 9:"brown",
                 10:"yellow",
                 11:"#d2b486",
                 12:"#d2691e"}

In [None]:

selected_df_2['k_means_clusters_lcss_velocity'] = k_means_clusters_lcss_velocity
selected_df_2['k_means_lcss_color'] = selected_df_2.apply(lambda row: km_color_dict[row['k_means_clusters_lcss_velocity']], axis=1)
selected_df_2

Unnamed: 0,trackId,xcentres,ycentres,xvelocity,yvelocity,x_distance_travelled,y_distance_travelled,starting_x,mean_velocity_x,mean_velocity_y,...,cumulative_ydistance_travelled,xcentres_n,ycentres_n,xvelocity_n,yvelocity_n,features,k_means_clusters_lcss_velocity,k_means_lcss_color,standard_cumulative_xdistance_travelled,standard_xvelocity
6712,8050,"[68.6975, 68.741, 68.7853, 68.83, 68.8761, 68....","[-86.3906, -86.311, -86.2288, -86.1449, -86.05...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...","[1.9442, 1.9792, 2.0138, 2.0475, 2.0802, 2.111...",34.2674,-55.0741,68.6975,2.796894,2.796894,...,"[0.0, 0.07959999999999923, 0.1617999999999995,...","[-0.6383279104463575, -0.6356669906134351, -0....","[-1.817911784574487, -1.813802183273344, -1.80...","[-0.3588380401785353, -0.34410902996348675, -0...","[-0.3588380401785353, -0.34410902996348675, -0...","[(-0.6383279104463575, -1.817911784574487, -0....",0,#a2e8ab,"[0.0, 0.042586498796934505, 0.0862495014786732...","[1.9442000000000002, 1.9784649990320209, 2.012..."
1503,1796,"[75.449, 75.485, 75.5235, 75.565, 75.6091, 75....","[-75.38, -75.3334, -75.2845, -75.2334, -75.180...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...","[1.1413, 1.1918, 1.2432, 1.2954, 1.3481, 1.401...",-23.9963,-55.9145,75.4490,4.921797,4.921797,...,"[0.0, 0.046599999999997976, 0.0955000000000012...","[-1.9517361455628592, -1.9474974249243227, -1....","[-1.3664906235599423, -1.3636278239988595, -1....","[-2.082748156725892, -2.0549267389843853, -2.0...","[-2.082748156725892, -2.0549267389843853, -2.0...","[(-1.9517361455628592, -1.3664906235599423, -2...",1,#fc71ff,"[0.0, 0.012779999613762385, 0.0333000004291547...","[1.1413, 1.1592274654448034, 1.188012500602006..."
3240,3894,"[74.844, 74.886, 74.928, 74.9697, 75.0107, 75....","[-76.5926, -76.5361, -76.4797, -76.424, -76.36...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...","[1.3916, 1.362, 1.3303, 1.2963, 1.2601, 1.2218...",-24.5596,-56.8907,74.8440,2.518445,2.518445,...,"[0.0, 0.05649999999999977, 0.11290000000001044...","[-0.7733124296270529, -0.7691229271941602, -0....","[-0.6455678958460487, -0.6416460361224797, -0....","[-0.31643033571505885, -0.32474233534689606, -...","[-0.31643033571505885, -0.32474233534689606, -...","[(-0.7733124296270529, -0.6455678958460487, -0...",1,#fc71ff,"[0.002814000070095169, 0.050441998958589564, 0...","[1.389616758477688, 1.3556283007860181, 1.3189..."
7739,9255,"[69.8622, 69.9725, 70.0832, 70.1935, 70.3051, ...","[-85.1865, -84.9774, -84.7665, -84.555, -84.34...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...","[5.219, 5.1985, 5.175, 5.1475, 5.1159, 5.0797,...",-29.7937,-65.7687,69.8622,4.378320,4.378320,...,"[0.0, 0.2090999999999923, 0.4200000000000017, ...","[-1.489462457606532, -1.479236810848507, -1.46...","[-1.3493298601433341, -1.337463899142281, -1.3...","[0.33136705919174114, 0.32328666729935074, 0.3...","[0.33136705919174114, 0.32328666729935074, 0.3...","[(-1.489462457606532, -1.3493298601433341, 0.3...",1,#fc71ff,"[0.0, 0.06959929805397681, 0.15291949894428097...","[5.219, 5.206064500361681, 5.189452500224113, ..."
7169,8600,"[68.2979, 68.3983, 68.5011, 68.6051, 68.7106, ...","[-87.3448, -87.13, -86.9115, -86.6899, -86.466...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...","[5.362, 5.4092, 5.4558, 5.5009, 5.544, 5.5845,...",-31.4984,-68.4325,68.2979,6.682112,6.682112,...,"[0.0, 0.21480000000001098, 0.4333000000000027,...","[-1.8272230636321138, -1.81863269669458, -1.80...","[-1.5228675517680832, -1.5117650776017917, -1....","[-0.6543022417050256, -0.6309079589056438, -0....","[-0.6543022417050256, -0.6309079589056438, -0....","[(-1.8272230636321138, -1.5228675517680832, -0...",1,#fc71ff,"[0.0, 0.02720839978456703, 0.07881400263310075...","[5.362, 5.374791040098667, 5.399052001237869, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6185,7408,"[71.6034, 71.9146, 72.2309, 72.552, 72.878, 73...","[-93.3193, -92.9442, -92.5643, -92.181, -91.79...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...","[9.374, 9.402, 9.4271, 9.4475, 9.4622, 9.4703,...",-56.6229,-23.4920,71.6034,4.098583,4.098583,...,"[0.0, 0.3751000000000033, 0.7549999999999955, ...","[-1.6487947264494793, -1.6308300071648725, -1....","[-2.4212492268014763, -2.3696290665007367, -2....","[1.468860533247921, 1.4766567126835308, 1.4836...","[1.468860533247921, 1.4766567126835308, 1.4836...","[(-1.6487947264494793, -2.4212492268014763, 1....",2,cyan,"[0.0, 0.0, 0.06846399962902057, 0.158089605045...","[9.374, 9.374, 9.380159720599652, 9.3882240004..."
2575,3091,"[75.4529, 75.4688, 75.485, 75.5015, 75.5179, 7...","[-76.3327, -76.3117, -76.2901, -76.268, -76.24...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...","[0.5012, 0.5028, 0.5037, 0.5039, 0.5033, 0.502...",55.2000,-48.4629,75.4529,2.751217,2.751217,...,"[0.0, 0.021000000000000796, 0.0426000000000073...","[-0.0881057570902568, -0.087365189834976, -0.0...","[-1.53399185166161, -1.5328512151429428, -1.53...","[-0.6890002623267498, -0.6885103102386144, -0....","[-0.6890002623267498, -0.6885103102386144, -0....","[(-0.0881057570902568, -1.53399185166161, -0.6...",0,#a2e8ab,"[0.0, 0.013085700219871278, 0.0273210006952290...","[0.5012, 0.5025168000221253, 0.503434500038623..."
10238,12207,"[68.4054, 68.4735, 68.5427, 68.6123, 68.6837, ...","[-87.7355, -87.5903, -87.4415, -87.2898, -87.1...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...","[3.6051, 3.6579, 3.7107, 3.7627, 3.8137, 3.863...",31.3450,-55.7363,68.4054,3.239186,3.239186,...,"[0.0, 0.14520000000000266, 0.29399999999999693...","[-0.664985205037199, -0.6607638307966537, -0.6...","[-1.8977114110714757, -1.8902364443015456, -1....","[0.12447264585457933, 0.14243356823176442, 0.1...","[0.12447264585457933, 0.14243356823176442, 0.1...","[(-0.664985205037199, -1.8977114110714757, 0.1...",0,#a2e8ab,"[0.0, 0.05400329883098693, 0.11342599802017005...","[3.6051, 3.646970399093628, 3.6924839984893807..."
10911,13003,"[68.2124, 68.2831, 68.3549, 68.4266, 68.4975, ...","[-87.4568, -87.3187, -87.1778, -87.035, -86.89...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...","[3.4598, 3.4828, 3.5052, 3.5261, 3.545, 3.5614...",-31.7672,-68.6916,68.2124,4.867072,4.867072,...,"[0.0, 0.13809999999999434, 0.27899999999999636...","[-1.544067523693008, -1.5380466142229994, -1.5...","[-1.3859166700462657, -1.3786550037603225, -1....","[-0.613629167288581, -0.6036002101945205, -0.5...","[-0.613629167288581, -0.6036002101945205, -0.5...","[(-1.544067523693008, -1.3859166700462657, -0....",1,#fc71ff,"[0.0, 0.03973339834809428, 0.09008599863052483...","[3.4598, 3.4727259994626047, 3.488847999572753..."


In [None]:
chosen_length = 500
selected_df_2['standard_cumulative_xdistance_travelled'] = selected_df_2.apply(lambda row: expand_arr_to_length_magic(np.array(row['cumulative_xdistance_travelled']), chosen_length), axis=1)
selected_df_2['standard_xvelocity'] = selected_df_2.apply(lambda row: expand_arr_to_length_magic(np.array(row['xvelocity']), chosen_length), axis=1)

In [None]:
def expand_arr_to_length_magic(arr: np.ndarray, tlen: int):

  assert isinstance(arr, np.ndarray)

  temp = np.expand_dims(arr, axis=0) if arr.ndim == 1 else arr
  temp = temp.copy()

  max_val = temp.max()

  temp_p = (temp/max_val)*255
  resized_temp_p = cv2.resize(temp_p, (tlen, 1))
  resized_temp = (resized_temp_p/255)*max_val
  res = resized_temp[0]
  return res

In [None]:
kmeans_lcss_fig = go.Figure()
for index, row in selected_df_2.iterrows():
    kmeans_lcss_fig.add_trace(go.Scatter(x=row.cumulative_xdistance_travelled, y=row.xvelocity, line=dict(color=row['k_means_lcss_color'])))
kmeans_lcss_fig.update_layout(template="plotly_white", xaxis_title="Cumulative distance travelled in x axis", yaxis_title="xVelocity")
kmeans_lcss_fig.show()

In [None]:
kmeans_dtw_fig = go.Figure()
for index, row in selected_df_2.iterrows():
    kmeans_dtw_fig.add_trace(go.Scatter(x=row.xcentres, y=row.ycentres, line=dict(color=row['k_means_lcss_color'])))
kmeans_dtw_fig.update_layout(template="plotly_white")
kmeans_dtw_fig.show()

In [None]:
import itertools
plot_indexes = [(1, 1), (1, 2), (1, 3)]
from plotly.subplots import make_subplots
sub_fig = make_subplots(rows=1, cols=3)
for i, j in zip(range(3), plot_indexes):
  cluster_df = selected_df_2[selected_df_2['k_means_clusters_lcss_velocity']==i]
  for index, row in cluster_df.iterrows():
    sub_fig.append_trace(go.Scatter(x=row.standard_cumulative_xdistance_travelled, y=row.standard_xvelocity, line=dict(color=row['k_means_lcss_color'])), row=j[0], col=j[1])
  mean_cluster_velocity = np.mean(cluster_df['standard_xvelocity'].to_list(), axis=0)
  mean_cluster_distance = np.mean(cluster_df['standard_cumulative_xdistance_travelled'].to_list(), axis=0)
  sub_fig.append_trace(go.Scatter(x=mean_cluster_distance, y=mean_cluster_velocity, line=dict(color='black')), row=j[0], col=j[1])
sub_fig.update_layout(template="plotly_white", xaxis_title="Cumulative distance travelled in x axis", yaxis_title="xVelocity")
sub_fig.show()