In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2  # Ensure l2 is imported

# Parameters
SEQUENCE_LENGTH = 5  # Number of past time steps to use for prediction
BATCH_SIZE = 64
EPOCHS = 50

# Load training data
train = pd.read_csv('ais_train.csv', sep='|')

# Load test data
test = pd.read_csv('ais_test.csv', sep=',')

# Convert 'time' column to datetime
train['time'] = pd.to_datetime(train['time'])
test['time'] = pd.to_datetime(test['time'])

# Map 'vesselId' to unique integers
le = LabelEncoder()
train['vesselId'] = le.fit_transform(train['vesselId'])
test['vesselId'] = le.transform(test['vesselId'])

# Sort datasets by 'vesselId' and 'time'
train = train.sort_values(by=['vesselId', 'time'])
test = test.sort_values(by=['vesselId', 'time'])

# Create 'previous_lat', 'previous_lon', and 'delta_time' in the training set
train['previous_lat'] = train.groupby('vesselId')['latitude'].shift(1)
train['previous_lon'] = train.groupby('vesselId')['longitude'].shift(1)
train['delta_time'] = train.groupby('vesselId')['time'].diff().dt.total_seconds()

# Drop rows with missing values resulting from the shift operation
train = train.dropna(subset=['previous_lat', 'previous_lon', 'delta_time'])

# Feature Engineering: Create sequences
def create_sequences(df, seq_length):
    sequences = []
    targets = []
    vessel_ids = df['vesselId'].unique()
    
    for vessel_id in vessel_ids:
        vessel_data = df[df['vesselId'] == vessel_id].sort_values('time')
        vessel_features = vessel_data[['vesselId', 'previous_lat', 'previous_lon', 'delta_time']].values
        vessel_targets = vessel_data[['latitude', 'longitude']].values
        
        for i in range(len(vessel_features) - seq_length):
            seq = vessel_features[i:i+seq_length]
            target = vessel_targets[i+seq_length]
            sequences.append(seq)
            targets.append(target)
    
    return np.array(sequences), np.array(targets)

X, y = create_sequences(train, SEQUENCE_LENGTH)

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
# Reshape X for scaling
X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_val_reshaped = X_val.reshape(-1, X_val.shape[-1])

scaler.fit(X_train_reshaped)

X_train_scaled = scaler.transform(X_train_reshaped).reshape(X_train.shape)
X_val_scaled = scaler.transform(X_val_reshaped).reshape(X_val.shape)

# Define BiGRU Model with Dropout and L2 Regularization
model = Sequential([
    Bidirectional(GRU(64, return_sequences=True, kernel_regularizer=l2(0.001)), input_shape=(SEQUENCE_LENGTH, X_train.shape[-1])),
    Dropout(0.3),
    Bidirectional(GRU(32, kernel_regularizer=l2(0.001))),
    Dropout(0.3),
    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    Dense(2)  # Output: [latitude, longitude]
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()

# Define Callbacks
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    'best_model.keras',          # Changed extension to .keras
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    verbose=1,
    mode='min',
    min_lr=1e-6
)

callbacks = [early_stop, checkpoint, lr_scheduler]

# Train the model
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks
)

# Prepare test data: Initialize 'previous_lat', 'previous_lon', and 'delta_time'
test['previous_lat'] = np.nan
test['previous_lon'] = np.nan
test['delta_time'] = np.nan

# Retrieve last known positions from the training set
last_positions = train.groupby('vesselId').apply(lambda x: x.iloc[-1])[['vesselId', 'latitude', 'longitude', 'time']]
last_positions = last_positions.set_index('vesselId')

# Prepare a list to collect the prediction results
submission_rows = []

# Iterate over each vessel in the test data
for vessel_id in test['vesselId'].unique():
    vessel_test_data = test[test['vesselId'] == vessel_id].copy()
    vessel_test_data = vessel_test_data.sort_values(by='time')
    
    # Check if the vessel_id exists in the last_positions
    if vessel_id in last_positions.index:
        prev_lat = last_positions.loc[vessel_id, 'latitude']
        prev_lon = last_positions.loc[vessel_id, 'longitude']
        last_time = last_positions.loc[vessel_id, 'time']
    else:
        # If vessel_id is not in the training data, skip prediction
        continue
    
    # Initialize sequence buffer
    sequence = []
    for _ in range(SEQUENCE_LENGTH):
        sequence.append([vessel_id, prev_lat, prev_lon, 0])  # Assuming delta_time=0 for initial steps
    
    # Iterate over each record for the vessel
    for idx, row in vessel_test_data.iterrows():
        delta_time = (row['time'] - last_time).total_seconds()
        
        # Update the last element in the sequence with the actual delta_time
        sequence[-1][3] = delta_time
        
        # Prepare the feature vector
        X_test_seq = np.array(sequence).reshape(1, SEQUENCE_LENGTH, -1)
        X_test_seq_scaled = scaler.transform(X_test_seq.reshape(-1, X_test_seq.shape[-1])).reshape(X_test_seq.shape)
        
        # Predict latitude and longitude
        predicted_lat, predicted_lon = model.predict(X_test_seq_scaled)[0]
        
        # Append the prediction to the submission list
        submission_rows.append({
            'ID': row['ID'],
            'longitude_predicted': predicted_lon,
            'latitude_predicted': predicted_lat
        })
        
        # Update the sequence
        sequence.pop(0)
        sequence.append([vessel_id, predicted_lat, predicted_lon, delta_time])
        
        # Update last_time for next iteration
        last_time = row['time']

# Create a submission DataFrame from the list
submission_df = pd.DataFrame(submission_rows)

# Merge the predictions with the test data based on 'ID'
final_submission = test[['ID']].merge(submission_df, on='ID', how='left')

# Save the submission file
final_submission.to_csv('submission.csv', index=False)

print("Submission file 'submission.csv' has been created successfully.")


  super().__init__(**kwargs)


Epoch 1/50
[1m18971/18975[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 211.3385 - mae: 6.9080
Epoch 1: val_loss improved from inf to 21.62007, saving model to best_model.keras
[1m18975/18975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 10ms/step - loss: 211.3044 - mae: 6.9073 - val_loss: 21.6201 - val_mae: 1.2557 - learning_rate: 0.0010
Epoch 2/50
[1m18975/18975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 39.6631 - mae: 2.7267
Epoch 2: val_loss improved from 21.62007 to 21.04176, saving model to best_model.keras
[1m18975/18975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 9ms/step - loss: 39.6630 - mae: 2.7267 - val_loss: 21.0418 - val_mae: 1.0309 - learning_rate: 0.0010
Epoch 3/50
[1m18971/18975[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 31.4993 - mae: 2.3185
Epoch 3: val_loss did not improve from 21.04176
[1m18975/18975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 9m

  last_positions = train.groupby('vesselId').apply(lambda x: x.iloc[-1])[['vesselId', 'latitude', 'longitude', 'time']]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/