# AIS Trajectory

### Importere biblioteker

In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta
from sklearn.linear_model import LinearRegression
from haversine import haversine, Unit

### Importer data

In [2]:
train_df = pd.read_csv('data/ais_train.csv', sep='|')
test_df = pd.read_csv('data/ais_test.csv')

### Pre-prosessering

In [3]:
# Convert 'time' to datetime
train_df['time'] = pd.to_datetime(train_df['time'])
test_df['time'] = pd.to_datetime(test_df['time'])

# Ensure 'vesselId' is string
train_df['vesselId'] = train_df['vesselId'].astype(str)
test_df['vesselId'] = test_df['vesselId'].astype(str)

# Convert 'time' to numeric format
train_df['time_numeric'] = (train_df['time'] - train_df['time'].min()).dt.total_seconds()
test_df['time_numeric'] = (test_df['time'] - train_df['time'].min()).dt.total_seconds()


In [4]:
# Extract initial positions
initial_positions = train_df.groupby('vesselId').first().reset_index()[['vesselId', 'time', 'latitude', 'longitude']]
initial_positions.rename(columns={'time': 'initial_time', 'latitude': 'initial_latitude', 'longitude': 'initial_longitude'}, inplace=True)
test_df = test_df.merge(initial_positions, on='vesselId', how='left')

### Feature engineering

### Fit model

In [5]:
# Fit linear regression models for each vessel
lat_models = {}
lon_models = {}

vessels = train_df['vesselId'].unique()
for vessel in vessels:
    vessel_data = train_df[train_df['vesselId'] == vessel]
    
    if len(vessel_data) < 2:
        continue
    
    X = vessel_data[['time_numeric']]
    y_lat = vessel_data['latitude']
    y_lon = vessel_data['longitude']
    
    lat_model = LinearRegression().fit(X, y_lat)
    lon_model = LinearRegression().fit(X, y_lon)
    
    lat_models[vessel] = lat_model
    lon_models[vessel] = lon_model

# Handle vessels not in training data
vessels_in_train = set(lat_models.keys())
vessels_in_test = set(test_df['vesselId'].unique())
vessels_not_in_train = vessels_in_test - vessels_in_train

# Global models
global_lat_model = LinearRegression().fit(train_df[['time_numeric']], train_df['latitude'])
global_lon_model = LinearRegression().fit(train_df[['time_numeric']], train_df['longitude'])


### Predict

In [9]:
# Predict positions
lat_preds = []
lon_preds = []

for index, row in test_df.iterrows():
    vessel = row['vesselId']
    time_numeric = row['time_numeric']
    
    if vessel in lat_models:
        lat_model = lat_models[vessel]
        lon_model = lon_models[vessel]
    else:
        lat_model = global_lat_model
        lon_model = global_lon_model
    
    # Convert time_numeric to DataFrame with appropriate column name
    time_numeric_df = pd.DataFrame({'time_numeric': [time_numeric]})
    
    lat_pred = lat_model.predict(time_numeric_df)[0]
    lon_pred = lon_model.predict(time_numeric_df)[0]
    
    lat_preds.append(lat_pred)
    lon_preds.append(lon_pred)

test_df['latitude_predicted'] = lat_preds
test_df['longitude_predicted'] = lon_preds

### Eksporter til csv

In [8]:
submission = test_df[['ID', 'longitude_predicted', 'latitude_predicted']].copy()
submission.to_csv('predictions.csv', index=False)
print('Få den jævla "predictions.csv" filen inn på Kaggle og se om det funker')

Få den jævla "predictions.csv" filen inn på Kaggle og se om det funker
