# How to run scripts with CPU use (for testing)
- 1. Log in
ssh sXXXXXX@login.hpc.dtu.dk

- 2. Go to project
cd /dtu/blackhole/0e/213550/dark-vessel-hunter

- 3. Start interactive node
linuxsh

- 4. Back to project (now on compute node)
cd /dtu/blackhole/0e/213550/dark-vessel-hunter

- 5. Load Python
module load python3/3.10.12

- 6. Activate venv
source .venv/bin/activate

- 7. Run your script (short tests only)
python3 main_3_training.py  # example


In [1]:
import pandas as pd
import gc
import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
import os
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

import config
import main_2_pre_processing
from src.pre_proc import ais_query
from src.utils import training_utils
from src.utils import evaluation_utils
from src.utils import inspection_utils


TRAIN_START_DATE = config.TRAIN_START_DATE     
TRAIN_END_DATE = config.TRAIN_END_DATE         

TEST_START_DATE = config.TEST_START_DATE       
TEST_END_DATE = config.TEST_END_DATE           

PRE_PROCESSING_DF_TRAIN_PATH = config.PRE_PROCESSING_DF_TRAIN_PATH
PRE_PROCESSING_DF_TEST_PATH = config.PRE_PROCESSING_DF_TEST_PATH

SEGMENT_MAX_LENGTH = config.SEGMENT_MAX_LENGTH 
NUMERIC_COLS = config.NUMERIC_COLS             
FEATURE_COLS = config.FEATURE_COLS

WEIGHTS_PATH = config.WEIGHTS_PATH
PLOT_PATH = config.PLOT_PATH
PREDICTION_DF_PATH = config.PREDICTION_DF_PATH
PREDICTION_DENORM_DF_PATH = config.PREDICTION_DENORM_DF_PATH

MAPS_PATH = config.MAPS_PATH

## Preprocessing

##### OPTIONAL: Load data from filtered parquet database and look at how many records and unique days are in the data
--> if it's unable to allocate memory, it means that your database is too big

In [None]:
# Filtered AIS data is located at "ais-data/parquet/" and contains your DOWNLOADED and then FILTERED data (filtered based on bbox, ship types, etc.)
# Still contains NaN values and unprocessed columns
raw_df = ais_query.query_ais_duckdb("ais-data/parquet", verbose=True)


print(f"Raw DataFrame shape: {raw_df.shape}")
print(f"Raw DataFrame columns:\n{raw_df.columns.tolist()}")

unique_days = pd.Series(raw_df['Timestamp'].dt.normalize().unique())
unique_days = pd.to_datetime(unique_days).sort_values()
print(f"Data loaded: {len(raw_df)} records.")
#print("Unique days:", [d.strftime('%Y-%m-%d') for d in unique_days])

# Print continuous periods (split when a day is missing)
if unique_days.empty:
    print("No days found in data.")
else:
    days = pd.to_datetime(unique_days).sort_values().reset_index(drop=True)
    diffs = days.diff().dt.days
    groups = (diffs.fillna(1) != 1).cumsum()
    periods = [(grp.iloc[0], grp.iloc[-1]) for _, grp in days.groupby(groups)]

    print("Continuous periods (split when a day is missing):")
    for start, end in periods:
        if start == end:
            print(start.strftime("%Y-%m-%d"))
        else:
            length = (end - start).days + 1
            print(f"{start.strftime('%Y-%m-%d')} -> {end.strftime('%Y-%m-%d')} ({length} days)")
    print(f"Total periods: {len(periods)}")

In [None]:
# Free up memory
del raw_df
gc.collect()

##### Pre-processing

In [5]:
# Setup this parameters at your convenience into config.py
print(f"SEGMENT_MAX_LENGTH: {SEGMENT_MAX_LENGTH}")   # Maximum length of segments (minimum length is set during filtering, but during processing we discard segments shorter than SEGMENT_MAX_LENGTH anyway)
print(f"NUMERIC_COLS: {NUMERIC_COLS}")               # Numeric columns to be normalized
print(f"TRAIN_START_DATE: {TRAIN_START_DATE}")       # Start date for training data you want to PRE-PROCESS
print(f"TRAIN_END_DATE: {TRAIN_END_DATE}")           # End date for training data you want to PRE-PROCESS
print(f"TEST_START_DATE: {TEST_START_DATE}")         # Start date for test data you want to PRE-PROCESS
print(f"TEST_END_DATE: {TEST_END_DATE}")             # End date for test data you want to PRE-PROCESS

SEGMENT_MAX_LENGTH: 300
NUMERIC_COLS: ['Latitude', 'Longitude', 'SOG', 'COG']
TRAIN_START_DATE: 2025-08-01
TRAIN_END_DATE: 2025-08-31
TEST_START_DATE: 2025-09-01
TEST_END_DATE: 2025-09-03


In [6]:
print(f"PRE_PROCESSING_DF_TRAIN_PATH: {PRE_PROCESSING_DF_TRAIN_PATH}") # Pre-processed TRAINING data will be saved here after pre-processing, change this path in config.py if needed
print(f"PRE_PROCESSING_DF_TEST_PATH: {PRE_PROCESSING_DF_TEST_PATH}")   # Pre-processed TEST data will be saved here after pre-processing, change this path in config.py if needed

PRE_PROCESSING_DF_TRAIN_PATH: ais-data/df_preprocessed/pre_processed_df_train.parquet
PRE_PROCESSING_DF_TEST_PATH: ais-data/df_preprocessed/pre_processed_df_test.parquet


In [2]:
# Queries the dates that are between TRAIN_START_DATE and TRAIN_END_DATE
main_2_pre_processing.main_pre_processing(dataframe_type="train")

[pre_processing] Querying AIS data for training period: 2025-08-01 to 2025-08-31
[query_ais_duckdb] SQL:
 SELECT * FROM read_parquet('ais-data/parquet/**/*.parquet') WHERE 1=1 AND Date IN ('2025-08-01', '2025-08-02', '2025-08-03', '2025-08-04', '2025-08-05', '2025-08-06', '2025-08-07', '2025-08-08', '2025-08-09', '2025-08-10', '2025-08-11', '2025-08-12', '2025-08-13', '2025-08-14', '2025-08-15', '2025-08-16', '2025-08-17', '2025-08-18', '2025-08-19', '2025-08-20', '2025-08-21', '2025-08-22', '2025-08-23', '2025-08-24', '2025-08-25', '2025-08-26', '2025-08-27', '2025-08-28', '2025-08-29', '2025-08-30', '2025-08-31')
[pre_processing] Initial data size: 3996082 records.
[pre_processing] Dropping unnecessary columns and rows with missing values...
[pre_processing] Data size after dropping: 3925808 records.
[pre_processing] Splitting segments to max length 300...
[pre_processing] Saving pre-processed DataFrame to ais-data/df_preprocessed/pre_processed_df_train.parquet...
[pre_processing] Co

In [3]:
# Queries the dates that are between TEST_START_DATE and TEST_END_DATE
main_2_pre_processing.main_pre_processing(dataframe_type="test")

[pre_processing] Querying AIS data for testing period: 2025-09-01 to 2025-09-03
[query_ais_duckdb] SQL:
 SELECT * FROM read_parquet('ais-data/parquet/**/*.parquet') WHERE 1=1 AND Date IN ('2025-09-01', '2025-09-02', '2025-09-03')
[pre_processing] Initial data size: 367732 records.
[pre_processing] Dropping unnecessary columns and rows with missing values...
[pre_processing] Data size after dropping: 367230 records.
[pre_processing] Splitting segments to max length 300...
[pre_processing] Saving pre-processed DataFrame to ais-data/df_preprocessed/pre_processed_df_test.parquet...
[pre_processing] Columns of pre-processed DataFrame:
['Timestamp', 'Latitude', 'Longitude', 'SOG', 'COG', 'MMSI', 'Segment_nr', 'ShipTypeID']
[pre_processing] Saving preprocessing metadata to ais-data/df_preprocessed/pre_processing_metadata_test.json...


## Training

In [4]:
print(f"NUMERIC_COLS: {NUMERIC_COLS}")   # Columns that will be normalized
print(f"FEATURE_COLS: {FEATURE_COLS}")   # Columns that will be used as features
print(f"PRE_PROCESSING_DF_TRAIN_PATH: {PRE_PROCESSING_DF_TRAIN_PATH}")   # Path to pre-processed training DataFrame
print(f"PRE_PROCESSING_DF_TEST_PATH: {PRE_PROCESSING_DF_TEST_PATH}")   # Path to pre-processed testing DataFrame

NUMERIC_COLS: ['Latitude', 'Longitude', 'SOG', 'COG']
FEATURE_COLS: ['Latitude', 'Longitude', 'SOG', 'COG']
PRE_PROCESSING_DF_TRAIN_PATH: ais-data/df_preprocessed/pre_processed_df_train.parquet
PRE_PROCESSING_DF_TEST_PATH: ais-data/df_preprocessed/pre_processed_df_test.parquet


In [5]:
# Load pre-processed training DataFrame from file to  ---> df_seq

# df_seq is list of dictionaries with the following structure:  (If you want to modify it, look at training_utils.py load_df_seq function)
# {       
#         "Segment_nr": seg_id,                # metadata
#         "MMSI": mmsi,                        # metadata  
#         "FirstTimestamp": first_timestamp,   # metadata
#         "ShipTypeID": ship_type_id,          # used to condition the model during decoding
#         "Sequence": X,                       # actual features
# })      
 

# Loading 
df_seq_train = training_utils.load_df_seq(PRE_PROCESSING_DF_TRAIN_PATH)

# Splitting into train and val sets (80%-20% split) (validation set useful for early stopping and other metrics during training)
df_seq_train, df_seq_val = train_test_split(
        df_seq_train,
        test_size=0.2,
        random_state=5,
    )

##### Dataset and Dataloader classes 

In [6]:
# Import 
# This is an ad-hoc Dataset class to wrap up the df_seq into a PyTorch Dataset
# Try to keep as it is unless you need to modify something specific about features/labels
class AISTrajectoryDataset(Dataset):
    """
    Dataset wrapping df_seq:
      - Sequence: list-of-lists (T, Features) or np.ndarray
      - ShipTypeID: integer class id
    """

    def __init__(self, df_seq: pd.DataFrame):
        self.df = df_seq.reset_index(drop=True)

    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        seq = np.array(row["Sequence"], dtype=np.float32)  # (T, Features)
        x = torch.from_numpy(seq)                         # (T, Features)

        ship_type_id = int(row["ShipTypeID"])
        ship_type_id = torch.tensor(ship_type_id, dtype=torch.long)

        return x, ship_type_id

In [7]:
# Give df_seq to Dataset class to create train and val DATASETS
train_dataset = AISTrajectoryDataset(df_seq_train)
val_dataset = AISTrajectoryDataset(df_seq_val) # useful for early stopping during training

# Give the datasets to DataLoader to create train and val DATALOADERS
train_loader = DataLoader(
    train_dataset,
    batch_size=64, # can be modified
    shuffle=True,  # usually shuffling for training to improve generalization
    drop_last=False,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=64, # can be modified
    shuffle=False, # usually no shuffling for validation, because we want consistent evaluation
    drop_last=False,
)

In [8]:
# Infer sequence shape and num_shiptypes (needed for model definition)
sample_x, _ = train_dataset[0]
T, F = sample_x.shape
num_shiptypes = df_seq_train["ShipTypeID"].nunique()
print(f"Sequence shape: T={T}, F={F}, num_shiptypes={num_shiptypes}")

# Device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Sequence shape: T=300, F=4, num_shiptypes=17
Using device: cpu


##### Model class (import your model here)

In [9]:
# Import
from src.train.models.AE_simple import LSTMAutoencoderWithShipType

# Instantiate the model
model = LSTMAutoencoderWithShipType(
    input_dim=F,
    hidden_dim=128,
    latent_dim=64,
    num_shiptypes=num_shiptypes,
    shiptype_emb_dim=8,
    num_layers=1,
    dropout=0.3,
).to(device)

##### Loss function (can be modified)


In [10]:
import torch.nn.functional as F

def sequence_loss_fn(
    recon_batch: torch.Tensor,  # (B, T, F)
    x_batch: torch.Tensor,      # (B, T, F)
) -> torch.Tensor:
    """
    Returns loss per sequence: shape (B,).
    Here you can easily change the definition (MSE, MAE, weights, etc.).
    """
    mse = F.mse_loss(recon_batch, x_batch, reduction="none")  # (B, T, F)
    # mean on time and feature, keeping batch separate
    seq_error = mse.mean(dim=(1, 2))  # (B,)
    return seq_error

##### Training

In [11]:
print(f"WEIGHTS_PATH: {WEIGHTS_PATH}") # Change this path in config.py if needed, the model weights will be saved here after training

WEIGHTS_PATH: models/AE_simple.pth


In [None]:
epochs = 10
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = sequence_loss_fn
max_grad_norm = 1.0 # Maximum gradient norm for gradient clipping (prevent exploding gradients, can be adjusted)


history = {"train_loss": [], "val_loss": []}

for epoch in range(1, epochs + 1):
    # ---- Train ----
    model.train()
    train_loss_sum = 0.0

    for x, ship_type_id in train_loader:
        x = x.to(device)
        ship_type_id = ship_type_id.to(device)

        recon, _ = model(x, ship_type_id)
        seq_errors = criterion(recon, x) 
        loss = seq_errors.mean()

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()

        train_loss_sum += loss.item() * x.size(0)

    # Total train loss noemralized by number of samples (a batch could have different size, so we sum and normalize here)
    train_loss = train_loss_sum / len(train_loader.dataset)
    history["train_loss"].append(train_loss)

    # ---- Validation ---- # Useful for early stopping/model selection
    model.eval()
    val_loss_sum = 0.0
    with torch.no_grad():
        for x, ship_type_id in val_loader:
            x = x.to(device)
            ship_type_id = ship_type_id.to(device)

            recon, _ = model(x, ship_type_id)
            seq_errors = criterion(recon, x)
            loss = seq_errors.mean()
            
            val_loss_sum += loss.item() * x.size(0)

    val_loss = val_loss_sum / len(val_loader.dataset)
    history["val_loss"].append(val_loss)

    print(
        f"Epoch {epoch}/{epochs} - "
        f"train MSE: {train_loss:.6f} - val MSE: {val_loss:.6f}"
    )


torch.save(model.state_dict(), WEIGHTS_PATH)
print(f"Model weights saved to {WEIGHTS_PATH}")

## Evaluation

In [2]:
print("WEIGHTS_PATH:", WEIGHTS_PATH)
print("PLOT_PATH:", PLOT_PATH)
print("PREDICTION_DF_PATH:", PREDICTION_DF_PATH)
print(f"PREPROCESSING_DF_TEST_PATH: {PRE_PROCESSING_DF_TEST_PATH}")

WEIGHTS_PATH: models/AE_simple.pth
PLOT_PATH: eval/plots
PREDICTION_DF_PATH: eval/predictions_df.parquet
PREPROCESSING_DF_TEST_PATH: ais-data/df_preprocessed/pre_processed_df_test.parquet


In [3]:
# Load pre-processed training DataFrame from file to  ---> df_seq
df_seq_test = training_utils.load_df_seq(PRE_PROCESSING_DF_TEST_PATH)

In [5]:
# Load test dataset and dataloader
class AISTrajectoryDataset(Dataset):
    """
    Dataset wrapping df_seq:
      - Sequence: list-of-lists (T, F) or np.ndarray
      - ShipTypeID: integer class id
    """

    def __init__(self, df_seq: pd.DataFrame):
        self.df = df_seq.reset_index(drop=True)

    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        seq = np.array(row["Sequence"], dtype=np.float32)  # (T, F)
        x = torch.from_numpy(seq)                         # (T, F)

        ship_type_id = int(row["ShipTypeID"])
        ship_type_id = torch.tensor(ship_type_id, dtype=torch.long)

        return x, ship_type_id

test_dataset = AISTrajectoryDataset(df_seq_test)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    drop_last=False,
)

# Infer sequence shape and num_shiptypes
sample_x, _ = test_dataset[0]
T, F = sample_x.shape
df_seq_train = training_utils.load_df_seq(PRE_PROCESSING_DF_TRAIN_PATH) # TO BE DELETED LATER
num_shiptypes = df_seq_train["ShipTypeID"].nunique()                    # For now it uses training df to get num_shiptypes, BUT IT HAS TO BE CONSISTENT, SO WE NEED TO DEFINE A FIXED NUMBER (e.g., limit the number of ship types during pre-processing)
print(f"Sequence shape: T={T}, F={F}, num_shiptypes={num_shiptypes}")

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Sequence shape: T=300, F=4, num_shiptypes=17
Using device: cpu


##### Put here your model

In [6]:
# Import
from src.train.models.AE_simple import LSTMAutoencoderWithShipType

# Instantiate the model
model = LSTMAutoencoderWithShipType(
    input_dim=F,
    hidden_dim=128,
    latent_dim=64,
    num_shiptypes=num_shiptypes,
    shiptype_emb_dim=8,
    num_layers=1,
    dropout=0.3,
).to(device)

In [7]:
# Load the saved model weights
model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
model.to(device)
model.eval()

LSTMAutoencoderWithShipType(
  (encoder): LSTM(4, 128, batch_first=True)
  (fc_latent): Linear(in_features=128, out_features=64, bias=True)
  (shiptype_emb): Embedding(17, 8)
  (fc_z_st_to_h): Linear(in_features=72, out_features=128, bias=True)
  (decoder): LSTM(4, 128, batch_first=True)
  (fc_out): Linear(in_features=128, out_features=4, bias=True)
)

##### Loss function

In [8]:
import torch.nn.functional as F

def sequence_loss_fn(
    recon_batch: torch.Tensor,  # (B, T, F)
    x_batch: torch.Tensor,      # (B, T, F)
) -> torch.Tensor:
    """
    Returns loss per sequence: shape (B,).
    Here you can easily change the definition (MSE, MAE, weights, etc.).
    """
    mse = F.mse_loss(recon_batch, x_batch, reduction="none")  # (B, T, F)
    # mean on time and feature, keeping batch separate
    seq_error = mse.mean(dim=(1, 2))  # (B,)
    return seq_error

##### Plots


In [31]:
# Save some plots and get evaluation results about reconstruction errors
results = evaluation_utils.make_plots(model=model, test_loader=test_loader, device=device, seq_loss_fn=sequence_loss_fn)
print("Evaluation results:", results)


=== Reconstruction errors on test ===
Test:
  mean: 0.000228
  std:  0.000564
  q95:  0.000799
  q99:  0.001409
  max:  0.012264

Suggested anomaly threshold (99th percentile of test): 0.001409
Plots saved in directory: "eval/plots"
Evaluation results: {'test_scores': array([1.1595870e-04, 3.2631713e-05, 8.8864195e-05, ..., 2.9249548e-05,
       2.4567564e-05, 1.4920163e-04], shape=(1021,), dtype=float32), 'threshold_99': np.float32(0.0014091902)}


##### Prediction df - for further analysis (e.g plotting on maps)

In [9]:
# Build predictions DataFrame for further analysis
df_predictions = evaluation_utils.build_predictions_df(model=model, dataset=test_loader.dataset, device=device, seq_loss_fn=sequence_loss_fn)

# Save predictions DataFrame to file
df_predictions.to_parquet(PREDICTION_DF_PATH, index=False)
print("")
print(f"Predictions DataFrame saved to {PREDICTION_DF_PATH}")


# Understanding the structure of the predictions DataFrame
print("Predictions DataFrame shape:", df_predictions.shape)
print("Predictions DataFrame columns:", df_predictions.columns.tolist())
print("")

print(f"Structure of a Sequence_real cell:")
seq = df_predictions["Sequence_real"].iloc[0]
arr = np.array(seq)
print("type:", type(seq))
print("numpy shape:", arr.shape)
# fallback / readable shape
print("T (timesteps):", len(seq), "F (features per timestep):", len(seq[0]) if len(seq) > 0 else 0)



Predictions DataFrame saved to eval/predictions_df.parquet
Predictions DataFrame shape: (1021, 6)
Predictions DataFrame columns: ['Segment_nr', 'MMSI', 'ShipTypeID', 'Sequence_real', 'Sequence_pred', 'recon_error']

Structure of a Sequence_real cell:
type: <class 'list'>
numpy shape: (300, 4)
T (timesteps): 300 F (features per timestep): 4


## Inspection

##### FIRST OF ALL --> Denormalization

In [10]:
print(f"PREDICTION_DF_PATH: {PREDICTION_DF_PATH}")                 # Path to predictions DataFrame, saved after evaluation
print(f"PREDICTION_DENORM_DF_PATH: {PREDICTION_DENORM_DF_PATH}")   # Path to denormalized predictions DataFrame, to be saved after denormalization

PREDICTION_DF_PATH: eval/predictions_df.parquet
PREDICTION_DENORM_DF_PATH: eval/predictions_denorm_df.parquet


In [11]:
# Load predictions DataFrame from file
df_predictions = pd.read_parquet(PREDICTION_DF_PATH)

# Denormalize predictions
df_denormalized = inspection_utils.denormalize_predictions(
    df=df_predictions,
    metadata_path=config.PRE_PROCESSING_METADATA_TEST_PATH,
)

# Understanding the structure of the denomarlized pedictions DataFrame
print("Denormalized Predictions DataFrame shape:", df_denormalized.shape)
print("Denormalized Predictions DataFrame columns:", df_denormalized.columns.tolist())
print("")
print(f"Structure of a real_Longitude cell:")
seq = df_denormalized["real_Longitude"].iloc[0]
arr = np.array(seq)
print("type:", type(seq))
print("numpy shape:", arr.shape)

Denormalized Predictions DataFrame shape: (1021, 13)
Denormalized Predictions DataFrame columns: ['Segment_nr', 'MMSI', 'ShipTypeID', 'recon_error', 'real_Latitude', 'real_Longitude', 'real_SOG', 'real_COG', 'pred_Latitude', 'pred_Longitude', 'pred_SOG', 'pred_COG', 'ShipType']

Structure of a real_Longitude cell:
type: <class 'list'>
numpy shape: (300,)


In [12]:
#Save denormalized predictions DataFrame to file
df_denormalized.to_parquet(PREDICTION_DENORM_DF_PATH, index=False)
print(f"Denormalized Predictions DataFrame saved to {PREDICTION_DENORM_DF_PATH}")

Denormalized Predictions DataFrame saved to eval/predictions_denorm_df.parquet


##### Plot on map

In [9]:
# This function TAKES AS INPUT THE DENORMALIZED PREDICTIONS DATAFRAME and saves interactive HTML maps visualizing the AIS tracks and reconstruction errors.
df_denormalized = pd.read_parquet(PREDICTION_DENORM_DF_PATH)

# 1 single segment
#inspection_utils.save_interactive_html(df_denormalized, out_html=f"{MAPS_PATH}/ais_maps.html", segment=7235, zoom_start=7)

# random 8 tracks
inspection_utils.save_interactive_html(df_denormalized, out_html=f"{MAPS_PATH}/ais_maps.html", n_random=8, zoom_start=7)

# a single MMSI
#inspection_utils.save_interactive_html(df_denormalized, out_html=f"{MAPS_PATH}/ais_mmsi_219005866.html", mmsi=219005866, zoom_start=8)

# a list of MMSI
#inspection_utils.save_interactive_html(df_denormalized, out_html=f"{MAPS_PATH}/ais_some_mmsi.html", mmsi=[219005866, 241455000], zoom_start=8)

# head 10, best reconstructions
df_denormalized = df_denormalized.sort_values(by=["recon_error"]) 
inspection_utils.save_interactive_html(df_denormalized, out_html=f"{MAPS_PATH}/ais_best_first_10.html", head_n=10)
# head 10, worst reconstructions
df_denormalized = df_denormalized.sort_values(by=["recon_error"], ascending=False)
inspection_utils.save_interactive_html(df_denormalized, out_html=f"{MAPS_PATH}/ais_worst_first_10.html", head_n=10)

'eval/maps/ais_worst_first_10.html'