<a href="https://colab.research.google.com/github/andyrids/trackinsight/blob/main/trackinsight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
%pip install haversine -q

In [23]:
import pandas as pd
import numpy as np
from haversine import haversine_vector, Unit
from numpy.typing import ArrayLike, NDArray
from pandas import DataFrame, DatetimeIndex

In [None]:
def euclidean_distance(x: NDArray, y: NDArray) -> ArrayLike:
    """"""
    return np.linalg.norm(x - y)

In [None]:
# sort array
# a = a[a[:, 0].argsort()]

In [None]:
# [[t, x, y], ...]
# np.split(a[:,1], np.unique(a[:, 0], return_index=True)[1][1:])

In [9]:
data = pd.read_csv("https://raw.githubusercontent.com/PilotLeaf/PyVT/main/traj_preprocess/ais_clean/data/1.csv", index_col=0)

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 308 entries, 0 to 307
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   DRMMSI         308 non-null    int64  
 1   DRLATITUDE     308 non-null    float64
 2   DRLONGITUDE    308 non-null    float64
 3   DRDIRECTION    308 non-null    float64
 4   DRSPEED        308 non-null    float64
 5   DRGPSTIME      308 non-null    int64  
 6   STATUS         308 non-null    int64  
 7   DRTRUEHEADING  308 non-null    int64  
 8   DIRECTION      308 non-null    float64
dtypes: float64(5), int64(4)
memory usage: 24.1 KB


In [12]:
data.columns

Index(['DRMMSI', 'DRLATITUDE', 'DRLONGITUDE', 'DRDIRECTION', 'DRSPEED',
       'DRGPSTIME', 'STATUS', 'DRTRUEHEADING', 'DIRECTION'],
      dtype='object')

In [19]:
data = data.filter(["DRGPSTIME", "DRMMSI", "DRLATITUDE", "DRLONGITUDE", "DRSPEED"])

In [20]:
haversine_vector

Unnamed: 0_level_0,DRGPSTIME,DRMMSI,DRLATITUDE,DRLONGITUDE,DRSPEED
INDEX,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1556415580,244726000,31.172785,122.668333,12.9
1,1556415599,244726000,31.172983,122.667032,12.9
2,1556415623,244726000,31.173205,122.665447,12.9
3,1556415659,244726000,31.173597,122.662968,12.9
4,1556415677,244726000,31.173783,122.661732,13.0
...,...,...,...,...,...
303,1556445869,244726000,31.103215,122.331262,9.9
304,1556445899,244726000,31.103185,122.329603,9.9
305,1556445920,244726000,31.103173,122.328535,9.9
306,1556445950,244726000,31.103153,122.326922,9.9


In [69]:
def assign_epoch_time(data: DataFrame, time_column: str) -> DataFrame:
    """"""
    return data.assign(
        **{time_column: DatetimeIndex(data[time_column]).asi8 // 10**9}
    )

In [70]:
def sort_trajectories(
        data: DataFrame,
        uuid_column: str = "ti_uuid",
        time_column: str = "ti_timestamp"
    ) -> DataFrame:
    """"""
    return data.sort_values(by=[uuid_column, time_column]).reset_index(drop=True)

In [71]:
def elapsed_seconds(
        data: DataFrame,
        uuid_column: str = "ti_uuid",
        time_column: str = "ti_timestamp"
    ) -> DataFrame:
    """"""
    elapsed_time_s = data.groupby(uuid_column)[time_column].diff()
    return data.assign(ti_elapsed_time_s=elapsed_time_s)

In [86]:
def elapsed_distance(
        data: DataFrame,
        uuid_column: str = "ti_uuid",
        x_column: str = "ti_x",
        y_column: str = "ti_y"
    ) -> DataFrame:
    """"""
    data_grouped = data.groupby(uuid_column)

    def assign_distances(x: DataFrame) -> DataFrame:
        """"""
        yx_columns = [y_column, x_column]
        distances_m = haversine_vector(
            x[yx_columns], x[yx_columns].shift(), Unit.METERS, check=False
        )
        return x.assign(ti_distance_m=distances_m)

    return data_grouped.apply(
        assign_distances, include_groups=False
    )

In [90]:
def calculate_speed(
    data: DataFrame,
    uuid_column: str = "ti_uuid",
    elapsed_time_column: str = "ti_elapsed_time_s",
    elapsed_distance_column: str = "ti_distance_m"
) -> DataFrame:
    """"""
    speed_mps = data[elapsed_distance_column] / data[elapsed_time_column]
    speed_kts = speed_mps * 1.943844
    return data.assign(ti_speed_mps=speed_mps, ti_speed_kts=speed_kts)

In [74]:
uuid_column = "DRMMSI"
time_column = "DRGPSTIME"
x_column = "DRLONGITUDE"
y_column = "DRLATITUDE"

In [88]:
def transform_trajectories(
    data: DataFrame,
    uuid_column: str,
    time_column: str,
    x_column: str,
    y_column: str,
):
    if data[time_column].dtype.name == "datetime64[ns]":
        data = data.pipe(assign_epoch_time, time_column)

    data = data.pipe(sort_trajectories, uuid_column, time_column)
    data = data.pipe(elapsed_seconds, uuid_column, time_column)
    data = data.pipe(elapsed_distance, uuid_column, x_column, y_column)
    data = data.pipe(calculate_speed, uuid_column)

    return data

In [91]:
(
    data
    .pipe(sort_trajectories, uuid_column, time_column)
    .pipe(elapsed_seconds, uuid_column, time_column)
    .pipe(elapsed_distance, uuid_column, x_column, y_column)
    .pipe(calculate_speed, uuid_column)
)

Unnamed: 0_level_0,Unnamed: 1_level_0,DRGPSTIME,DRLATITUDE,DRLONGITUDE,DRSPEED,ti_elapsed_time_s,ti_distance_m,ti_speed_mps,ti_speed_kts
DRMMSI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
244726000,0,1556415580,31.172785,122.668333,12.9,,,,
244726000,1,1556415599,31.172983,122.667032,12.9,19.0,125.782026,6.620107,12.868455
244726000,2,1556415623,31.173205,122.665447,12.9,24.0,152.797062,6.366544,12.375569
244726000,3,1556415659,31.173597,122.662968,12.9,36.0,239.780926,6.660581,12.947131
244726000,4,1556415677,31.173783,122.661732,13.0,18.0,119.465403,6.636967,12.901228
244726000,...,...,...,...,...,...,...,...,...
244726000,303,1556445869,31.103215,122.331262,9.9,30.0,149.160800,4.972027,9.664844
244726000,304,1556445899,31.103185,122.329603,9.9,30.0,157.930635,5.264354,10.233084
244726000,305,1556445920,31.103173,122.328535,9.9,21.0,101.720586,4.843837,9.415664
244726000,306,1556445950,31.103153,122.326922,9.9,30.0,153.617611,5.120587,9.953622


In [89]:
transform_trajectories(data, uuid_column, time_column, x_column, y_column)

Unnamed: 0_level_0,Unnamed: 1_level_0,DRGPSTIME,DRLATITUDE,DRLONGITUDE,DRSPEED,ti_elapsed_time_s,ti_distance_m,ti_speed_mps
DRMMSI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
244726000,0,1556415580,31.172785,122.668333,12.9,,,
244726000,1,1556415599,31.172983,122.667032,12.9,19.0,125.782026,6.620107
244726000,2,1556415623,31.173205,122.665447,12.9,24.0,152.797062,6.366544
244726000,3,1556415659,31.173597,122.662968,12.9,36.0,239.780926,6.660581
244726000,4,1556415677,31.173783,122.661732,13.0,18.0,119.465403,6.636967
244726000,...,...,...,...,...,...,...,...
244726000,303,1556445869,31.103215,122.331262,9.9,30.0,149.160800,4.972027
244726000,304,1556445899,31.103185,122.329603,9.9,30.0,157.930635,5.264354
244726000,305,1556445920,31.103173,122.328535,9.9,21.0,101.720586,4.843837
244726000,306,1556445950,31.103153,122.326922,9.9,30.0,153.617611,5.120587


In [52]:
pd.to_datetime(data[time_column], unit="s").dtype.name #pd.TimedeltaIndex(data[time_column]).asi8#// 10**6

'datetime64[ns]'

In [57]:
pd.DatetimeIndex(
    pd.to_datetime(data[time_column], unit="s")
).asi8 // 10**9

array([1556415580, 1556415599, 1556415623, 1556415659, 1556415677,
       1556415719, 1556415750, 1556415771, 1556415791, 1556415831,
       1556415900, 1556415930, 1556415951, 1556415980, 1556416000,
       1556416020, 1556416040, 1556416060, 1556416087, 1556416130,
       1556416170, 1556416188, 1556416211, 1556416230, 1556416251,
       1556416300, 1556416320, 1556416385, 1556416414, 1556416430,
       1556416448, 1556416468, 1556416551, 1556416581, 1556416601,
       1556416622, 1556416696, 1556416877, 1556417057, 1556417417,
       1556417597, 1556417777, 1556418317, 1556418497, 1556418677,
       1556419937, 1556420117, 1556420297, 1556420477, 1556420837,
       1556421017, 1556421377, 1556421737, 1556421917, 1556422097,
       1556422818, 1556423357, 1556424437, 1556424977, 1556425157,
       1556425337, 1556425877, 1556426057, 1556426236, 1556426597,
       1556426778, 1556427137, 1556427317, 1556427677, 1556428037,
       1556428217, 1556428577, 1556428757, 1556428937, 1556429

In [None]:
# prompt: convert mps to kts

def convert_mps_to_kts(mps):
  """Converts meters per second (mps) to knots (kts).

  Args:
    mps: Speed in meters per second.

  Returns:
    Speed in knots.
  """1.943844
  kts = mps * 1.94384
  return kts

# Example usage with the existing data:
data['ti_speed_kts'] = data['ti_speed_mps'].apply(convert_mps_to_kts)