In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split


In [9]:
data = pd.read_csv("../CSV/big_files/ais_train_processed.csv", sep="|")
data.drop(columns=["portId"], inplace=True)

X = data.copy().drop(columns=["latitude", "longitude"])
y = data.copy()[["latitude", "longitude"]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X_train = pd.get_dummies(X_train, columns=["vesselId"], drop_first=True)
X_test = pd.get_dummies(X_test, columns=["vesselId"], drop_first=True)

#More data BS
for col in X_train:
    if col not in X_test:
        X_test[col] = False

X_train = X_train.reindex(sorted(X_train.columns), axis=1)
X_test = X_test.reindex(sorted(X_test.columns), axis=1)

def evaluate(model: str, y_pred):

    n = len(y_test)
    p = X_test.shape[1]

    mse_latitude = mean_squared_error(y_test, y_pred)
    mse_longitude = mean_squared_error(y_test, y_pred)
    r2_latitude = r2_score(y_test, y_pred)
    r2_latitude_adj = 1 - (1 - r2_latitude) * ((n - 1) / (n - p - 1))
    r2_longitude = r2_score(y_test, y_pred)
    r2_longitude_adj = 1 - (1 - r2_longitude) * ((n - 1) / (n - p - 1))


    print(f"---- {model} Metrics ----")
    print(f"Mean Squared Error (Latitude): {mse_latitude:.4f}")
    print(f"R-squared (Latitude): {r2_latitude:.4f}, Adjusted R-squared (Latitude): {r2_latitude_adj: .4f}")
    print(f"Mean Squared Error (Longitude): {mse_longitude:.4f}")
    print(f"R-squared (Longitude): {r2_longitude:.4f}, Adjusted R-squared (Latitude): {r2_longitude_adj: .4f}")



In [None]:
# --- Time Series KNN ---
from sktime.regression.distance_based import KNeighborsTimeSeriesRegressor

start = time.time()
KNNTS = KNeighborsTimeSeriesRegressor(n_neighbors=3, distance="eucl_dist")
KNNTS.fit(X_train, y_train)
print(time.time()-start)

knnts_pred = KNNTS.predict(X_test)

evaluate("KNNTS", knnts_pred)