In [1]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler

print(os.cpu_count())

# Optional: Allow TensorFlow to dynamically adjust the number of threads
#tf.config.threading.set_intra_op_parallelism_threads(32)
#tf.config.threading.set_inter_op_parallelism_threads()

print("Intra-op parallelism threads:", tf.config.threading.get_intra_op_parallelism_threads())
print("Inter-op parallelism threads:", tf.config.threading.get_inter_op_parallelism_threads())

2024-10-28 16:45:03.473380: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-28 16:45:03.473807: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-28 16:45:03.476049: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-28 16:45:03.482323: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730130303.493555 1035037 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730130303.49

32
Intra-op parallelism threads: 0
Inter-op parallelism threads: 0


2024-10-28 16:45:04.625613: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [2]:
sequence_length = 100  # Length of the sequence to be fed into the model
num_targets = 4  # Number of targets in your data
future_target = 50  # The maximum future timestep to predict
num_features=18

In [3]:
train_data = pd.read_csv("./Datasets/ais_train.csv", delimiter="|")
test_data = pd.read_csv("./Datasets/ais_test.csv", delimiter=",")

In [4]:
vessel_data = pd.read_csv("./Datasets/vessels.csv", delimiter="|")
port_data = pd.read_csv("./Datasets/ports.csv", delimiter="|")

In [5]:
train_data["time"] = pd.to_datetime(train_data["time"])
test_data["time"] = pd.to_datetime(test_data["time"])

In [6]:
train_data = train_data.merge(
    vessel_data[["vesselId", "shippingLineId"]], on="vesselId", how="left"
)

In [7]:
port_data_renamed = pd.DataFrame()
port_data_renamed[["portId", "port_latitude", "port_longitude"]] = port_data[
    ["portId", "latitude", "longitude"]
]
train_data = train_data.merge(port_data_renamed, on="portId", how="left")

In [8]:
print(train_data.columns)

Index(['time', 'cog', 'sog', 'rot', 'heading', 'navstat', 'etaRaw', 'latitude',
       'longitude', 'vesselId', 'portId', 'shippingLineId', 'port_latitude',
       'port_longitude'],
      dtype='object')


In [9]:
train_data_preprocessed = train_data
train_data_preprocessed.loc[train_data_preprocessed["cog"] >= 360, "cog"] = np.nan
train_data_preprocessed.loc[train_data_preprocessed["sog"] >= 1023, "sog"] = np.nan
train_data_preprocessed.loc[train_data_preprocessed["rot"] == -128, "rot"] = np.nan
train_data_preprocessed.loc[train_data_preprocessed["heading"] == 511, "heading"] = (
    np.nan
)


pattern = r"^\d{2}-\d{2} \d{2}:\d{2}$"
train_data_preprocessed["etaRaw"] = train_data_preprocessed["etaRaw"].where(
    train_data_preprocessed["etaRaw"].str.match(pattern, na=False), np.nan
)


train_data_preprocessed = train_data_preprocessed.sort_values("time")

print(train_data_preprocessed.head())


train_data_preprocessed = (
    train_data_preprocessed.groupby("vesselId")
    .apply(lambda group: group.ffill().bfill())
    .reset_index(drop=True)
)


print(train_data_preprocessed.head())

train_data_preprocessed["heading"] = train_data_preprocessed["heading"].fillna(0)

train_data_preprocessed = train_data_preprocessed.dropna().reset_index(drop=True)


# Replace '00-' in etaRaw with the corresponding month and day from the 'time' column
train_data_preprocessed["etaRaw"] = train_data_preprocessed["etaRaw"].mask(
    train_data_preprocessed["etaRaw"].str.contains("00-", na=False),
    "01" + train_data_preprocessed["etaRaw"].str[2:],
)

train_data_preprocessed["etaRaw"] = train_data_preprocessed["etaRaw"].mask(
    train_data_preprocessed["etaRaw"].str.contains("-00", na=False),
    train_data_preprocessed["etaRaw"].str[:2]
    + "-01"
    + train_data_preprocessed["etaRaw"].str[5:],
)

train_data_preprocessed["etaRaw"] = train_data_preprocessed["etaRaw"].mask(
    train_data_preprocessed["etaRaw"].str.contains(":60", na=False),
    train_data_preprocessed["etaRaw"].str[:9] + "59",
)

train_data_preprocessed["etaRaw"] = train_data_preprocessed["etaRaw"].mask(
    train_data_preprocessed["etaRaw"].str.contains("60:", na=False),
    train_data_preprocessed["etaRaw"].str[:6] + "01:00",
)

train_data_preprocessed["etaRaw"] = train_data_preprocessed["etaRaw"].mask(
    train_data_preprocessed["etaRaw"].str.contains("24:", na=False),
    train_data_preprocessed["etaRaw"].str[:6] + "23:59",
)


train_data_preprocessed["etaRaw"] = pd.to_datetime(
    train_data_preprocessed["time"].dt.year.astype(str)
    + "-"
    + train_data_preprocessed["etaRaw"]
    + ":00",
    format="%Y-%m-%d %H:%M:%S",
)


train_data_preprocessed["seconds_to_eta"] = (
    train_data_preprocessed["etaRaw"] - train_data_preprocessed["time"]
).dt.total_seconds()

train_data_preprocessed = train_data_preprocessed.drop(columns=["etaRaw"])

                 time    cog   sog  rot  heading  navstat       etaRaw  \
0 2024-01-01 00:00:25  284.0   0.7  0.0     88.0        0  01-09 23:00   
1 2024-01-01 00:00:36  109.6   0.0 -6.0    347.0        1  12-29 20:00   
2 2024-01-01 00:01:45  111.0  11.0  0.0    112.0        0  01-02 09:00   
3 2024-01-01 00:03:11   96.4   0.0  0.0    142.0        1  12-31 20:00   
4 2024-01-01 00:03:51  214.0  19.7  0.0    215.0        0  01-25 12:00   

   latitude  longitude                  vesselId                    portId  \
0 -34.74370  -57.85130  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f   
1   8.89440  -79.47939  61e9f3d4b937134a3c4bff1f  634c4de270937fc01c3a7689   
2  39.19065  -76.47567  61e9f436b937134a3c4c0131  61d3847bb7b7526e1adf3d19   
3 -34.41189  151.02067  61e9f3b4b937134a3c4bfe77  61d36f770a1807568ff9a126   
4  35.88379   -5.91636  61e9f41bb937134a3c4c0087  634c4de270937fc01c3a74f3   

             shippingLineId  port_latitude  port_longitude  
0  61ec65aea8cafc0e93f0e9

  .apply(lambda group: group.ffill().bfill())


                 time    cog   sog  rot  heading  navstat       etaRaw  \
0 2024-01-12 14:07:47  308.1  17.1 -6.0    316.0        0  01-08 06:00   
1 2024-01-12 14:31:00  307.6  17.3  5.0    313.0        0  01-14 23:30   
2 2024-01-12 14:57:23  306.8  16.9  5.0    312.0        0  01-14 23:30   
3 2024-01-12 15:18:48  307.9  16.9  6.0    313.0        0  01-14 23:30   
4 2024-01-12 15:39:47  307.0  16.3  7.0    313.0        0  01-14 23:30   

   latitude  longitude                  vesselId                    portId  \
0   7.50361   77.58340  61e9f38eb937134a3c4bfd8b  61d376b393c6feb83e5eb50c   
1   7.57302   77.49505  61e9f38eb937134a3c4bfd8b  61d376d893c6feb83e5eb546   
2   7.65043   77.39404  61e9f38eb937134a3c4bfd8b  61d376d893c6feb83e5eb546   
3   7.71275   77.31394  61e9f38eb937134a3c4bfd8b  61d376d893c6feb83e5eb546   
4   7.77191   77.23585  61e9f38eb937134a3c4bfd8b  61d376d893c6feb83e5eb546   

             shippingLineId  port_latitude  port_longitude  
0  61a8e672f9cba188601e84

In [10]:
train_data_engineered = train_data_preprocessed
train_latitude_radians = np.deg2rad(train_data_engineered["latitude"])
train_longitude_radians = np.deg2rad(train_data_engineered["longitude"])
train_cog_radians = np.deg2rad(train_data_engineered["cog"])
train_heading_radians = np.deg2rad(train_data_engineered["heading"])

port_latitude_radians = np.deg2rad(train_data_engineered["port_latitude"])
port_longitude_radians = np.deg2rad(train_data_engineered["port_longitude"])

train_hour = np.deg2rad(train_data_engineered["time"].dt.hour * 360 / 24)
train_day = np.deg2rad(train_data_engineered["time"].dt.day * 360 / 30)
train_month = np.deg2rad(train_data_engineered["time"].dt.month * 360 / 12)


train_latitude_sin = np.sin(train_latitude_radians)
train_latitude_cos = np.cos(train_latitude_radians)
train_longitude_sin = np.sin(train_longitude_radians)
train_longitude_cos = np.cos(train_longitude_radians)

port_latitude_sin = np.sin(port_latitude_radians)
port_latitude_cos = np.cos(port_latitude_radians)
port_longitude_sin = np.sin(port_longitude_radians)
port_longitude_cos = np.cos(port_longitude_radians)

train_cog_sin = np.sin(train_cog_radians)
train_cog_cos = np.cos(train_cog_radians)

train_heading_sin = np.sin(train_heading_radians)
train_heading_cos = np.cos(train_heading_radians)

train_hour_sin = np.sin(train_hour)
train_hour_cos = np.cos(train_hour)

train_day_sin = np.sin(train_day)
train_day_cos = np.cos(train_day)

train_month_sin = np.sin(train_month)
train_month_cos = np.cos(train_month)


train_data_engineered["latitude_sin"] = train_latitude_sin
train_data_engineered["latitude_cos"] = train_latitude_cos
train_data_engineered["longitude_sin"] = train_longitude_sin
train_data_engineered["longitude_cos"] = train_longitude_cos
train_data_engineered["port_latitude_sin"] = port_latitude_sin
train_data_engineered["port_latitude_cos"] = port_latitude_cos
train_data_engineered["port_longitude_sin"] = port_longitude_sin
train_data_engineered["port_longitude_cos"] = port_longitude_cos
train_data_engineered["cog_sin"] = train_cog_sin
train_data_engineered["cog_cos"] = train_cog_cos
train_data_engineered["heading_sin"] = train_heading_sin
train_data_engineered["heading_cos"] = train_heading_cos

train_data_engineered["hour_sin"] = train_hour_sin
train_data_engineered["hour_cos"] = train_hour_cos
train_data_engineered["day_sin"] = train_day_sin
train_data_engineered["day_cos"] = train_day_cos
train_data_engineered["month_sin"] = train_month_sin
train_data_engineered["month_cos"] = train_month_cos


train_data_engineered["cog_sog_sin"] = (
    train_data_engineered["cog_sin"] * train_data_engineered["sog"]
)
train_data_engineered["cog_sog_cos"] = (
    train_data_engineered["cog_cos"] * train_data_engineered["sog"]
)

train_data_engineered = train_data_engineered.drop(
    columns=[
        "latitude",
        "longitude",
        "cog",
        "heading",
        "portId",
        "cog_sin",
        "cog_cos",
        "sog",
        "shippingLineId",
        "navstat",
        "heading_sin",
        "heading_cos",
        "port_latitude",
        "port_longitude",
        "rot", #probably not important, removed because I needed only 18 features
    ],
    axis=1,
)
print(train_data_engineered.columns)

seconds_to_eta_scaler = MinMaxScaler()
rot_scaler = MinMaxScaler()
cog_sog_sin_scaler = MinMaxScaler()
cog_sog_cos_scaler = MinMaxScaler()

train_data_engineered["seconds_to_eta"] = seconds_to_eta_scaler.fit_transform(
    train_data_engineered["seconds_to_eta"].values.reshape(-1, 1)
)
#train_data_engineered["rot"] = rot_scaler.fit_transform(
#    train_data_engineered["rot"].values.reshape(-1, 1)
#)
train_data_engineered["cog_sog_sin"] = cog_sog_sin_scaler.fit_transform(
    train_data_engineered["cog_sog_sin"].values.reshape(-1, 1)
)
train_data_engineered["cog_sog_cos"] = cog_sog_cos_scaler.fit_transform(
    train_data_engineered["cog_sog_cos"].values.reshape(-1, 1)
)

print(train_data_engineered.head())

Index(['time', 'vesselId', 'seconds_to_eta', 'latitude_sin', 'latitude_cos',
       'longitude_sin', 'longitude_cos', 'port_latitude_sin',
       'port_latitude_cos', 'port_longitude_sin', 'port_longitude_cos',
       'hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'month_sin', 'month_cos',
       'cog_sog_sin', 'cog_sog_cos'],
      dtype='object')
                 time                  vesselId  seconds_to_eta  latitude_sin  \
0 2024-01-12 14:07:47  61e9f38eb937134a3c4bfd8b        0.250161      0.130589   
1 2024-01-12 14:31:00  61e9f38eb937134a3c4bfd8b        0.263753      0.131790   
2 2024-01-12 14:57:23  61e9f38eb937134a3c4bfd8b        0.263716      0.133129   
3 2024-01-12 15:18:48  61e9f38eb937134a3c4bfd8b        0.263686      0.134207   
4 2024-01-12 15:39:47  61e9f38eb937134a3c4bfd8b        0.263656      0.135230   

   latitude_cos  longitude_sin  longitude_cos  port_latitude_sin  \
0      0.991437       0.976610       0.215018           0.229427   
1      0.991278       0.976

In [11]:
print(
    train_data_engineered[
        ["latitude_sin", "latitude_cos", "longitude_sin", "longitude_cos"]
    ].describe()
)

       latitude_sin  latitude_cos  longitude_sin  longitude_cos
count  1.522065e+06  1.522065e+06   1.522065e+06   1.522065e+06
mean   5.674148e-01  7.325459e-01   4.657097e-02   5.090908e-01
std    3.533228e-01  1.287637e-01   5.297501e-01   6.767740e-01
min   -7.376648e-01  3.328656e-01  -1.000000e+00  -9.997826e-01
25%    5.666483e-01  6.245345e-01  -9.085523e-02  -9.307284e-02
50%    6.721562e-01  7.403239e-01   7.383488e-02   9.743727e-01
75%    7.809972e-01  8.197004e-01   3.145059e-01   9.969210e-01
max    9.429743e-01  1.000000e+00   9.923086e-01   1.000000e+00


In [21]:
time_diff_scaler = MinMaxScaler()
time_diff_index = 0


def create_sequences(
    data: pd.DataFrame, sequence_length: int, future_target: int
):
    """
    Creates sequences of a specified length from the input data for each vessel.

    Args:
        data (pd.DataFrame): The input data containing 'vesselId', 'time', and feature columns.
        sequence_length (int): The length of each sequence.
        future_target (int): The maximum number of steps ahead to predict.

    Returns:
        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
            A tuple containing the input sequences (X), targets (y), vessel IDs, and times.
    """
    sequences = []
    targets = []
    vessel_ids = []
    times = []
    all_time_diffs = []
    
    feature_columns = [col for col in data.columns if col not in ["vesselId", "time"]]
    feature_columns.append("time_diff")

    print(feature_columns)

    # Group data by 'vesselId'
    grouped = data.groupby("vesselId")

    for vessel_id, group in grouped:
        # Sort the group by 'time'
        group = group.sort_values("time").reset_index(drop=True)

        # Calculate time differences to the next instance
        group["time_diff"] = (group["time"].diff(-1).dt.total_seconds()/10**6).abs().fillna(0)
    
        # Convert features to numpy array
        feature_array = group[feature_columns].values

        # Generate sequences
        num_sequences = len(group) - sequence_length - future_target + 1

        for i in range(num_sequences):
            seq_x = feature_array[
                i : i + sequence_length
            ].copy()  # Use copy to avoid modifying the original array

            # Randomly select future target N steps ahead (5 to future_target inclusive)
            N = np.random.randint(1, future_target + 1)
            target_idx = i + sequence_length + N - 1  # Adjust index accordingly

            # Get seq_y
            seq_y = group.iloc[target_idx][
                ["latitude_sin", "latitude_cos", "longitude_sin", "longitude_cos"]
            ].values

            # Calculate cumulative time difference from last entry in seq_x to target
            last_seq_time = group["time"].iloc[i + sequence_length - 1]
            target_time = group["time"].iloc[target_idx]
            time_diff_cumulative = (target_time - last_seq_time).total_seconds()/10**6

            # Update the last time_diff in seq_x to be time_diff_cumulative
            time_diff_index = list(feature_columns).index("time_diff")
            seq_x[-1, time_diff_index] = time_diff_cumulative

            all_time_diffs.extend(seq_x[:, time_diff_index])

            # Append sequences and targets
            sequences.append(seq_x)
            targets.append(seq_y)
            vessel_ids.append(vessel_id)
            times.append(last_seq_time)

    # Convert lists to numpy arrays
    X = np.array(sequences).astype(np.float32)
    y = np.array(targets).astype(np.float32)
    vessel_ids = np.array(vessel_ids)
    times = np.array(times)
    
    return X, y, vessel_ids, times, time_diff_index

In [22]:
import os

print(train_data_engineered.columns)
train_data_sequenced_X = []
train_data_sequenced_Y = []

if os.path.exists(
    f"intermediate/train_data_sequenced_X_{sequence_length}_{future_target}.npy"
):
    print("Loading sequenced data from file")
    train_data_sequenced_X = np.load(
        f"intermediate/train_data_sequenced_X_{sequence_length}_{future_target}.npy",
        allow_pickle=True,
    )
    train_data_sequenced_Y = np.load(
        f"intermediate/train_data_sequenced_Y_{sequence_length}_{future_target}.npy",
        allow_pickle=True,
    )
else:
    print("Creating sequenced data")
    train_data_sequenced_X, train_data_sequenced_Y, vessels, times, time_diff_index = create_sequences(
        train_data_engineered,
        sequence_length=sequence_length,
        future_target=future_target,
    )
    np.save(f"intermediate/train_data_sequenced_X_{sequence_length}_{future_target}.npy",train_data_sequenced_X)
    np.save(f"intermediate/train_data_sequenced_Y_{sequence_length}_{future_target}.npy",train_data_sequenced_Y)

# train_data_shifted_df = train_data_shifted_df.drop(columns=["time"], axis=1)

Index(['time', 'vesselId', 'seconds_to_eta', 'latitude_sin', 'latitude_cos',
       'longitude_sin', 'longitude_cos', 'port_latitude_sin',
       'port_latitude_cos', 'port_longitude_sin', 'port_longitude_cos',
       'hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'month_sin', 'month_cos',
       'cog_sog_sin', 'cog_sog_cos'],
      dtype='object')
Creating sequenced data
['seconds_to_eta', 'latitude_sin', 'latitude_cos', 'longitude_sin', 'longitude_cos', 'port_latitude_sin', 'port_latitude_cos', 'port_longitude_sin', 'port_longitude_cos', 'hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'month_sin', 'month_cos', 'cog_sog_sin', 'cog_sog_cos', 'time_diff']


In [23]:
from typing import Tuple

def append_last_known_data_test(
    test_data: pd.DataFrame, known_data: pd.DataFrame
) -> Tuple[np.ndarray, pd.Series]:
    
    """
    Groups training data by vesselId and propagates all data from the last known location.

    Args:
        test_data (pd.DataFrame): The test data containing 'vesselId' and 'time'.
        known_data (pd.DataFrame): The known data to extract last known positions.

    Returns:
        Tuple[np.ndarray, pd.Series]: A tuple containing the numpy array of sequences and the original times.
    """
    
    test_data["time"] =pd.to_datetime(test_data["time"])

    # Check if all vesselIds in test_data are present in known_data
    missing_vessels = test_data.loc[~test_data["vesselId"].isin(known_data["vesselId"]), "vesselId"].unique()
    if missing_vessels.size > 0:
        raise ValueError(f"The following vesselIds are missing in known_data: {missing_vessels}")

    # Group known_data by 'vesselId', sort by 'time', and take the last 50 records
    grouped_data = (
        known_data.sort_values("time")
        .groupby("vesselId")
        .tail(50)
        .reset_index(drop=True)
    )
    grouped_data["time_diff"] = (grouped_data.groupby("vesselId")["time"].diff(-1).dt.total_seconds()/10**6).abs().fillna(0)
    
    test_data_numpy = []
    all_time_diffs = []


    for idx, row in test_data.iterrows():
        vessel_id = row["vesselId"]
        test_time = row["time"]

        # Check if vessel_id is in grouped_data
        if vessel_id not in grouped_data["vesselId"].values:
            raise ValueError(f"vesselId {vessel_id} not found in known_data.")

        # Extract vessel_data for the current vessel_id
        vessel_data = grouped_data[grouped_data["vesselId"] == vessel_id].copy()
        
        # Get index of the last row
        last_idx = vessel_data.index[-1]
                
        # Calculate cumulative time difference and scale it
        time_diff_cumulative = (test_time - vessel_data.loc[last_idx, "time"]).total_seconds()/10**6

        # Update 'time_diff' in vessel_data
        vessel_data.loc[last_idx, "time_diff"] = time_diff_cumulative
        

        # Drop 'time' column from vessel_data
        vessel_data = vessel_data.drop(["time","vesselId"], axis=1)

        # Convert vessel_data to numpy array and append to list
        test_data_numpy.append(vessel_data.values)
        all_time_diffs.extend([vessel_data["time_diff"].values])

    original_time = test_data["time"]
    vessels = test_data["vesselId"]

    return np.array(test_data_numpy).astype(np.float32), np.array(all_time_diffs).astype(np.float32)


In [24]:
print(train_data_engineered.columns)
test_data_X=0

if os.path.exists(
    f"intermediate/test_data_sequenced_X_{sequence_length}_{future_target}.npy"
    ):
    print("loading test data")
    test_data_X=np.load(f"intermediate/test_data_sequenced_X_{sequence_length}_{future_target}.npy", allow_pickle=True)
else:
    print("creating test data")
    test_data_X, test_time_diff_index = append_last_known_data_test(test_data, train_data_engineered)
    np.save(f"intermediate/test_data_sequenced_X_{sequence_length}_{future_target}.npy", test_data_X)

Index(['time', 'vesselId', 'seconds_to_eta', 'latitude_sin', 'latitude_cos',
       'longitude_sin', 'longitude_cos', 'port_latitude_sin',
       'port_latitude_cos', 'port_longitude_sin', 'port_longitude_cos',
       'hour_sin', 'hour_cos', 'day_sin', 'day_cos', 'month_sin', 'month_cos',
       'cog_sog_sin', 'cog_sog_cos'],
      dtype='object')
loading test data


In [25]:

last_lat_radians = np.arctan2(test_data_X[0,49,2], test_data_X[0,49,3])
last_long_radians = np.arctan2(test_data_X[0,49,4],test_data_X[0,49,5])

print(train_data_sequenced_X.shape)

last_lat_radians_train = np.arctan2(train_data_sequenced_X[20,49,2], train_data_sequenced_X[20,49,3])
last_long_radians_train = np.arctan2(train_data_sequenced_X[20,49,4],train_data_sequenced_X[20,49,5])

last_lat_radians_val = np.arctan2(train_data_sequenced_Y[0,0], train_data_sequenced_Y[0,1])
last_long_radians_val = np.arctan2(train_data_sequenced_Y[0,2],train_data_sequenced_Y[0,3])

last_lat_degrees=np.rad2deg(last_lat_radians)
last_long_degrees=np.rad2deg(last_long_radians)

last_lat_degrees_train=np.rad2deg(last_lat_radians_train)
last_long_degrees_train=np.rad2deg(last_long_radians_train)

last_lat_degrees_val=np.rad2deg(last_lat_radians_val)
last_long_degrees_val=np.rad2deg(last_long_radians_val)



print(last_lat_degrees,last_long_degrees)
print(last_lat_degrees_val,last_long_degrees_val)


(1419819, 100, 18)
139.12846 15.954792
17.795547 -106.41868


In [26]:
print(train_data_sequenced_X[-5:,49,:])

[[ 2.6206821e-01  8.1117857e-01  5.8479857e-01  1.6641165e-01
   9.8605639e-01  8.6014616e-01  5.1004761e-01  4.0805960e-01
   9.1295528e-01 -1.0000000e+00 -1.8369701e-16  8.6602539e-01
   5.0000000e-01  5.0000000e-01 -8.6602539e-01  5.2549911e-01
   5.2051890e-01  1.2400000e-03]
 [ 2.6203915e-01  8.1148523e-01  5.8437288e-01  1.6697215e-01
   9.8596162e-01  8.6014616e-01  5.1004761e-01  4.0805960e-01
   9.1295528e-01 -9.6592581e-01  2.5881904e-01  8.6602539e-01
   5.0000000e-01  5.0000000e-01 -8.6602539e-01  5.0920302e-01
   5.2576816e-01  8.9999998e-04]
 [ 2.6201805e-01  8.1175554e-01  5.8399737e-01  1.6725005e-01
   9.8591453e-01  8.6014616e-01  5.1004761e-01  4.0805960e-01
   9.1295528e-01 -9.6592581e-01  2.5881904e-01  8.6602539e-01
   5.0000000e-01  5.0000000e-01 -8.6602539e-01  5.1160544e-01
   5.3061223e-01  1.1990000e-03]
 [ 2.6198995e-01  8.1199962e-01  5.8365798e-01  1.6802141e-01
   9.8578334e-01  8.6014616e-01  5.1004761e-01  4.0805960e-01
   9.1295528e-01 -9.6592581e-01  

In [27]:

class ContinuousPositionalEncoding(layers.Layer):
    def __init__(self, d_model, **kwargs):
        """
        Initializes the ContinuousPositionalEncoding layer.

        Args:
            d_model (int): The dimensionality of the model.
            **kwargs: Additional keyword arguments for the Layer base class.
        """
        super(ContinuousPositionalEncoding, self).__init__(**kwargs)
        self.d_model = d_model

    @tf.function
    def call(self, time_differences):
        """
        Computes the positional encoding based on cumulative time differences.

        Args:
            time_differences (tf.Tensor): A tensor of shape (batch_size, sequence_length)
                                          containing cumulative time differences.

        Returns:
            tf.Tensor: Positional encoding tensor of shape (batch_size, sequence_length, d_model).
        """
        # Compute cumulative time positions
        time_positions = tf.cumsum(time_differences, axis=1)  # Shape: (batch_size, sequence_length)
        position = tf.expand_dims(time_positions, axis=-1)    # Shape: (batch_size, sequence_length, 1)

        # Compute the angle rates
        i = tf.range(self.d_model, dtype=tf.float32)          # Shape: (d_model,)
        i = tf.reshape(i, (1, 1, self.d_model))               # Shape: (1, 1, d_model)
        angle_rates = 1 / tf.pow(10000.0, (2 * (i // 2)) / tf.cast(self.d_model, tf.float32))  # Shape: (1, 1, d_model)
        angle_rads = position * angle_rates                   # Shape: (batch_size, sequence_length, d_model)

        # Apply sin to even indices and cos to odd indices
        sines = tf.sin(angle_rads[..., 0::2])                 # Shape: (batch_size, sequence_length, d_model/2)
        cosines = tf.cos(angle_rads[..., 1::2])               # Shape: (batch_size, sequence_length, d_model/2)

        # Concatenate sines and cosines along the last axis
        pos_encoding = tf.concat([sines, cosines], axis=-1)   # Shape: (batch_size, sequence_length, d_model)

        return pos_encoding



In [28]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Layer Normalization
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    # Multi-Head Attention
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs  # Residual Connection

    # Feed-Forward Network
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Dense(ff_dim, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(inputs.shape[-1])(x)
    return x + res  # Residual Connection


In [29]:
def build_transformer_model(
    sequence_length,
    num_features,
    num_targets,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0
):
    feature_inputs = keras.Input(shape=(sequence_length, num_features), name='feature_inputs')
    cumulative_time_inputs = keras.Input(shape=(sequence_length,), name='cumulative_time_inputs')

    # Add Positional Encoding using the custom layer
    positional_encoding_layer = ContinuousPositionalEncoding(num_features)
    positional_encoding = positional_encoding_layer(cumulative_time_inputs)
    x = layers.Add()([feature_inputs, positional_encoding])

    # Transformer Blocks
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D()(x)

    # MLP for Regression
    for units in mlp_units:
        x = layers.Dense(units, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)

    outputs = layers.Dense(num_targets)(x)
    model = keras.Model(inputs=[feature_inputs, cumulative_time_inputs], outputs=outputs)
    return model



In [30]:
def get_aggregated_time_diff(data):
    aggregated_time_diff=data[:,:,-1]
    
    # Step 2: Shift time_diff_next by one position to create time_diff_previous
    # Set the first entry in each sequence to 0 to represent no previous time difference
    aggregated_time_diff = np.roll(aggregated_time_diff, shift=1, axis=1)
    aggregated_time_diff[:, 0] = 0  # Set the first entry of each sequence to 0

    # Step 3: Calculate the cumulative sum of time_diff_previous along each sequence
    aggregated_time_diff = np.cumsum(aggregated_time_diff, axis=1)  # Shape (1471269, 50)
    return(aggregated_time_diff)

In [31]:
model = build_transformer_model(
    sequence_length=sequence_length,
    num_features=num_features,
    num_targets=num_targets,  # Define this variable based on your task
    head_size=18,
    num_heads=1,
    ff_dim=72,
    num_transformer_blocks=6,
    mlp_units=[72],
    dropout=0.1,
    mlp_dropout=0.1,
)


model.compile(
    loss="mean_squared_error",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["mae"],
)

model.summary()

In [32]:
from sklearn.model_selection import train_test_split
import gc

train_data_sequenced_X=train_data_sequenced_X.astype(np.float32)
train_data_sequenced_Y=train_data_sequenced_Y.astype(np.float32)


X_train_1, X_val_1, X_train_2, X_val_2, y_train, y_val = train_test_split(
    train_data_sequenced_X,  # First part of input_train
    get_aggregated_time_diff(train_data_sequenced_X),  # todo verify this is correct
    train_data_sequenced_Y,  # Target array
    test_size=0.1,
    random_state=42
)

del train_data_sequenced_X
gc.collect() 

# Aggregate the training and testing inputs into lists for model input
X_train = [X_train_1, X_train_2]
X_val = [X_val_1, X_val_2]


In [33]:
def create_dataset(features, time_diffs, targets, batch_size):
    # Create the dataset
    dataset = tf.data.Dataset.from_tensor_slices(((features, time_diffs), targets))
    
    # Shuffle, batch, and prefetch in one pipeline
    dataset = dataset.shuffle(buffer_size=10000) \
                     .batch(batch_size) \
                     .prefetch(tf.data.AUTOTUNE)
    return dataset

# Create training and validation datasets using the optimized function
train_dataset = create_dataset(X_train_1, X_train_2, y_train, batch_size=1024)
val_dataset = create_dataset(X_val_1, X_val_2, y_val, batch_size=1024)


In [None]:

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Metric to monitor
    patience=6,          # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore model weights from the epoch with the best value of the monitored metric
)

# Assuming model, train_dataset, and val_dataset are already defined
# Train the model with the EarlyStopping callback
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=50,
    callbacks=[early_stopping]
)

Epoch 1/50




[1m971/971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 193ms/step - loss: 0.2642 - mae: 0.3325 - val_loss: 0.0397 - val_mae: 0.1192
Epoch 2/50
[1m971/971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 192ms/step - loss: 0.0457 - mae: 0.1381 - val_loss: 0.0314 - val_mae: 0.1068
Epoch 3/50
[1m971/971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 192ms/step - loss: 0.0343 - mae: 0.1165 - val_loss: 0.0252 - val_mae: 0.0957
Epoch 4/50
[1m971/971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 192ms/step - loss: 0.0275 - mae: 0.1035 - val_loss: 0.0203 - val_mae: 0.0850
Epoch 5/50
[1m971/971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 192ms/step - loss: 0.0224 - mae: 0.0941 - val_loss: 0.0170 - val_mae: 0.0759
Epoch 6/50
[1m971/971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 192ms/step - loss: 0.0192 - mae: 0.0870 - val_loss: 0.0152 - val_mae: 0.0701
Epoch 7/50
[1m971/971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s

In [None]:
model.save(f"transformer_{sequence_length}_{future_target}.h5")

In [None]:
# Predict future positions
prediction_x=[test_data_X,get_aggregated_time_diff(test_data_X)]
predictions = model.predict(prediction_x)

predictions=np.array(predictions).astype("float32")
print(predictions.dtype)

lat_predictions_radians = np.arctan2(predictions[:,0], predictions[:,1])
long_predictions_radians = np.arctan2(predictions[:,2],predictions[:,3])

# Convert radians to degrees
lat_predictions_degrees = np.rad2deg(lat_predictions_radians)
long_predictions_degrees = np.rad2deg(long_predictions_radians)



In [None]:
print(lat_predictions_degrees)
print(long_predictions_degrees)

In [None]:
predictions = pd.DataFrame({
        'ID': range(len(lat_predictions_degrees)),
        'longitude_predicted': long_predictions_degrees,
        'latitude_predicted': lat_predictions_degrees
    })
if pd.isna(predictions["latitude_predicted"]).any():
    print("oh no")
print(predictions.columns)

In [None]:
predictions.to_csv("predictions.csv", index=False)

In [None]:
print(len(predictions))
print(len(test_data))
print(predictions["longitude_predicted"])