# dark-vessel-hunter
DTU Deep Learning project 29, group 80


### Run this in your terminal before executing this:

In [None]:
pip install -r requirements.txt

## Import of the files

In [1]:
import ais_downloader
import ais_filtering
import ais_reader
import ais_to_parquet

## Data setup
### Set data preferences

In [None]:
START_DATE = "2025-11-01"
END_DATE   = "2025-11-02"

FOLDER_NAME = "ais-data"
OUTPUT_FOLDER_NAME = "ais-data-parquet"
DELETE_DOWNLOADED_CSV = False
verbose_mode = True

vessel_ais_class = ("Class A", "Class B")
min_segment_length = 256

# Bounding Box to prefilter AIS data [lat_max, lon_min, lat_min, lon_max]
bbox = [57.58, 10.5, 57.12, 11.92]

# Polygon coordinates for precise Area of Interest (AOI) filtering (lon, lat)
polygon_coords = [
    (10.5162, 57.3500),  # coast top left (lon, lat)
    (10.9314, 57.5120),  # sea top left
    (11.5128, 57.5785),  # sea top right
    (11.9132, 57.5230),  # top right (Swedish coast)
    (11.9189, 57.4078),  # bottom right (Swedish coast)
    (11.2133, 57.1389),  # sea bottom right
    (11.0067, 57.1352),  # sea bottom left
    (10.5400, 57.1880),  # coast bottom left
    (10.5162, 57.3500),  # close polygon
]



### Imports for the script

In [3]:
from tqdm import tqdm
from pathlib import Path
import pandas as pd
from datetime import date, timedelta

### Script

In [None]:
# --- Create folder path ---
folder_path = Path(FOLDER_NAME)
folder_path.mkdir(parents=True, exist_ok=True)

# --- If you want to download all csv files before, uncomment the line below ---
# ais_downloader.download_multiple_ais_data(START_DATE, END_DATE, folder_path)

# --- Build the schedule of download string dates ---
dates = ais_downloader.get_work_dates(START_DATE, END_DATE, folder_path, filter=False)

# --- Define separator for conflicting data ---
# separator = " | "

# --- Iterate with tqdm and download, unzip and delete ---
for day in tqdm(dates, desc=f"Processing data", unit="file" ):
    tag = f"{day:%Y-%m}" if day < date.fromisoformat("2024-03-01") else f"{day:%Y-%m-%d}"
    print(f"\nProcessing date: {tag}")

    # --- Download one day ---
    csv_path = ais_downloader.download_one_ais_data(day, folder_path)
    
    # --- Load CSV into DataFrame ---
    df_raw = ais_reader.read_single_ais_df(csv_path, bbox, verbose=verbose_mode)
    # --- Optionally delete the downloaded CSV file ---
    if DELETE_DOWNLOADED_CSV: csv_path.unlink(missing_ok=True)
    
    # --- Filter and split ---
    # Filter AIS data, keeping Class A and Class B by default,
    df_filtered = ais_filtering.filter_ais_df(
        df_raw,
        polygon_coords,
        allowed_mobile_types= vessel_ais_class,
        verbose=verbose_mode,
    )

    # df_filtered = ais_filtering.df_filter(df_raw, verbose_mode=True, polygon_filter=True)
    
    # print(df_filtered.head()) # For debugging purposes to see the filtered data
    # df_static, df_dynamic = ais_filtering.split_static_dynamic(df_filtered, join_conflicts=True, sep=separator)
    
    # --- Save to parquet ---
    # ais_to_parquet.save_by_mmsi(df_static, df_dynamic, folder_path, tag)

    df_seg = ais_to_parquet.segment_ais_tracks(df_filtered, min_track_len=min_segment_length, verbose=verbose_mode)
    ais_to_parquet.save_by_mmsi(df_seg, verbose=verbose_mode, output_folder=OUTPUT_FOLDER_NAME)

Processing data:   0%|          | 0/2 [00:00<?, ?file/s]


Processing date: 2025-11-01
Skipping 2025-11-01 download: already present in ais-data folder
 Read AIS data: 988,647 rows within bbox,  241 unique vessels
 [filter_ais_df] Before filtering: 988,647 rows,  [filter_ais_df] 241 unique vessels
 [filter_ais_df] Type of mobile filtering complete: 950,987 rows  [filter_ais_df] (removed 37,660 rows)  [filter_ais_df] using types: ['Class A', 'Class B']
 [filter_ais_df] MMSI filtering complete: 950,965 rows,  [filter_ais_df] 238 unique vessels
 [filter_ais_df] Duplicate removal complete: 535,909 rows,  [filter_ais_df] 238 unique vessels
 [filter_ais_df] Polygon filtering complete: 276,112 rows,  [filter_ais_df] 176 unique vessels
 [segment_ais_tracks] Starting with 276,112 rows,  176 unique vessels
 [segment_ais_tracks] After MMSI-level filter: 100,223 rows,  124 vessels
 [segment_ais_tracks] After segment-level filter: 98,101 rows,  131 segments


Processing data:  50%|█████     | 1/2 [01:11<01:11, 71.71s/file]

 [save_by_mmsi] Parquet dataset written/appended at: D:\Projects\dark-vessel-hunter\ais-data-parquet

Processing date: 2025-11-02
Starting download and extraction for 2025-11-02


Downloading 2025-11-02 zip file: 100%|██████████| 536M/536M [00:26<00:00, 21.6MB/s]
Unzipping into ais-data folder : 100%|██████████| 1/1 [00:49<00:00, 49.17s/it]


Completed download and extraction for 2025-11-02
 Read AIS data: 933,867 rows within bbox,  226 unique vessels
 [filter_ais_df] Before filtering: 933,867 rows,  [filter_ais_df] 226 unique vessels
 [filter_ais_df] Type of mobile filtering complete: 895,249 rows  [filter_ais_df] (removed 38,618 rows)  [filter_ais_df] using types: ['Class A', 'Class B']
 [filter_ais_df] MMSI filtering complete: 895,249 rows,  [filter_ais_df] 225 unique vessels
 [filter_ais_df] Duplicate removal complete: 500,728 rows,  [filter_ais_df] 225 unique vessels
 [filter_ais_df] Polygon filtering complete: 259,909 rows,  [filter_ais_df] 152 unique vessels
 [segment_ais_tracks] Starting with 259,909 rows,  152 unique vessels
 [segment_ais_tracks] After MMSI-level filter: 137,425 rows,  112 vessels
 [segment_ais_tracks] After segment-level filter: 135,132 rows,  122 segments


Processing data: 100%|██████████| 2/2 [03:19<00:00, 99.52s/file] 

 [save_by_mmsi] Parquet dataset written/appended at: D:\Projects\dark-vessel-hunter\ais-data-parquet



