# AIS Trajectory

### Importere biblioteker

In [59]:
import pandas as pd
import numpy as np
from datetime import timedelta
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

from tqdm import tqdm


### Importer data

In [60]:
train_df = pd.read_csv('data/ais_train.csv', sep='|')
test_df = pd.read_csv('data/ais_test.csv')

ports_df = pd.read_csv('data/ports.csv', sep='|')  
schedule_df = pd.read_csv('data/schedules_to_may_2024.csv', sep='|')
vessels_df = pd.read_csv('data/vessels.csv', sep='|')

display(train_df.head())

Unnamed: 0,time,cog,sog,rot,heading,navstat,etaRaw,latitude,longitude,vesselId,portId
0,2024-01-01 00:00:25,284.0,0.7,0,88,0,01-09 23:00,-34.7437,-57.8513,61e9f3a8b937134a3c4bfdf7,61d371c43aeaecc07011a37f
1,2024-01-01 00:00:36,109.6,0.0,-6,347,1,12-29 20:00,8.8944,-79.47939,61e9f3d4b937134a3c4bff1f,634c4de270937fc01c3a7689
2,2024-01-01 00:01:45,111.0,11.0,0,112,0,01-02 09:00,39.19065,-76.47567,61e9f436b937134a3c4c0131,61d3847bb7b7526e1adf3d19
3,2024-01-01 00:03:11,96.4,0.0,0,142,1,12-31 20:00,-34.41189,151.02067,61e9f3b4b937134a3c4bfe77,61d36f770a1807568ff9a126
4,2024-01-01 00:03:51,214.0,19.7,0,215,0,01-25 12:00,35.88379,-5.91636,61e9f41bb937134a3c4c0087,634c4de270937fc01c3a74f3


### Pre-prosessering

In [61]:
# Convert 'time' to datetime
train_df['time'] = pd.to_datetime(train_df['time'])
test_df['time'] = pd.to_datetime(test_df['time'])

# Ensure 'vesselId' is string
train_df['vesselId'] = train_df['vesselId'].astype(str)
test_df['vesselId'] = test_df['vesselId'].astype(str)

# Convert 'time' to numeric format
train_df['time_numeric'] = (train_df['time'] - train_df['time'].min()).dt.total_seconds()
test_df['time_numeric'] = (test_df['time'] - train_df['time'].min()).dt.total_seconds()

# Convert 'portId' to numeric format
le = LabelEncoder()
train_df['portId'] = le.fit_transform(train_df['portId'])

# Convert 'vesselId' to numeric format
train_df['vesselId'] = le.fit_transform(train_df['vesselId'])
test_df['vesselId'] = le.fit_transform(test_df['vesselId'])

train_df['etaRaw'] = train_df['etaRaw'].dropna()

train_df['etaRaw'] = pd.to_datetime(train_df['etaRaw'], format='%m-%d %H:%M', errors='coerce')
train_df['etaRaw'] = train_df['etaRaw'].apply(lambda x: x.replace(year=2024) if pd.notnull(x) else x)

train_df['etaRaw_numeric'] = (train_df['etaRaw'] - train_df['time'].min()).dt.total_seconds()

# drop etaRaw og time
train_df = train_df.drop(columns=['etaRaw', 'time'])

# Drop Nan values
train_df = train_df.dropna()

display(train_df.head())


Unnamed: 0,cog,sog,rot,heading,navstat,latitude,longitude,vesselId,portId,time_numeric,etaRaw_numeric
0,284.0,0.7,0,88,0,-34.7437,-57.8513,50,40,0.0,773975.0
1,109.6,0.0,-6,347,1,8.8944,-79.47939,189,674,11.0,31435175.0
2,111.0,11.0,0,112,0,39.19065,-76.47567,432,353,80.0,118775.0
3,96.4,0.0,0,142,1,-34.41189,151.02067,110,18,166.0,31607975.0
4,214.0,19.7,0,215,0,35.88379,-5.91636,356,605,206.0,2116775.0


### Feature engineering

### Fit model

In [62]:
model = {}

features = ['time_numeric', 'vesselId']
targets = ['cog', 'sog', 'rot', 'heading', 'navstat', 'latitude', 'longitude', 'portId', 'time_numeric', 'etaRaw_numeric']


model = xgb.XGBRegressor().fit(train_df[features], train_df[targets])



### Predict

In [65]:
# Predict positions
preds=[]

for index, row in test_df.iterrows():
    vessel = row['vesselId']
    time_numeric = row['time_numeric']
    
    pred = model.predict(pd.DataFrame([[time_numeric, vessel]], columns=['time_numeric', 'vesselId']))
    
    preds.append(pred)

In [76]:
print(np.shape(preds))

lat = []
lon = []
ID = []
counter = 0

for pred in preds:
    lat.append(pred[0,5])
    lon.append(pred[0,6])
    ID.append(counter)
    counter += 1



submission_df = pd.DataFrame({'ID': ID, 'longitude_predicted': lon, 'latitude_predicted': lat})

print(submission_df.head())

(51739, 1, 10)
   ID  longitude_predicted  latitude_predicted
0   0            24.864161           27.893616
1   1            32.005219           11.849067
2   2            10.626740           42.565014
3   3           -63.512241           29.686218
4   4            -5.293605           48.930389


### Eksporter til csv

In [78]:
submission_df.to_csv('predictions.csv', index=False)
print('Få den jævla "predictions.csv" filen inn på Kaggle og se om det funker')

Få den jævla "predictions.csv" filen inn på Kaggle og se om det funker
