# View MED Tracking Predictions

Notebook per generare il video del Mediterraneo con le tile ground truth/predette e la traccia del centro ciclone prevista dal modello di tracking (`output/tracking_inference_predictions.csv`).


In [1]:
%%html
<style>

.dataframe td {
    white-space: normal !important;
    word-wrap: break-word;
    max-width: 900px;
}
</style>


In [2]:
%load_ext autoreload
%autoreload 2

import os
os.environ['PATH'] = './ffmpeg-7.0.2-amd64-static:' + os.environ.get('PATH', '')

from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd

# pd.set_option('display.max_colwidth', None)
# pd.set_option('display.max_rows', 1500)
# pd.set_option('display.max_columns', 1500)
# pd.set_option('display.precision', 3)
# pd.set_option('display.float_format', '{:.3f}'.format)
# pd.set_option('display.width', 21000)

import matplotlib.pyplot as plt
import matplotlib
from IPython.display import HTML

from arguments import prepare_finetuning_args, prepare_tracking_args
from dataset.build_dataset import calc_tile_offsets, get_train_test_validation_df
from dataset.data_manager import make_validation_data_builder_from_manos_tracks, make_tracking_data_builder_from_csv, BuildTrackingDataset
from view_test_tiles import (
    make_animation_parallel_ffmpeg,
    filling_missing_tile,
    safe_literal_eval,
    PALETTE,
)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Impossibile caricare la libreria torch dynamo


In [3]:
args = prepare_finetuning_args()

input_dir = Path('../fromgcloud')
output_dir = Path('../airmassRGB/supervised/')
manos_csv = Path('medicane_data_input/medicanes_new_windows.csv')

#classification_csv = Path('output/inference_predictions.csv')
tracking_csv = Path('output/tracking_inference_predictions.csv')

#if not classification_csv.exists():
#    fallback = Path(args.output_dir) / classification_csv.name
#    if fallback.exists():
#        classification_csv = fallback
if not tracking_csv.exists():
    fallback = Path(args.output_dir) / tracking_csv.name
    if fallback.exists():
        tracking_csv = fallback

#assert classification_csv.exists(), f'Missing classification predictions at {classification_csv}'
assert tracking_csv.exists(), f'Missing tracking predictions at {tracking_csv}'

PALETTE['PRED'] = (255, 0, 0)

#print(f'Classification predictions: {classification_csv}')
print(f'Tracking predictions: {tracking_csv}')


Tracking predictions: output/tracking_inference_predictions.csv


In [4]:
args = prepare_tracking_args()
test_builder, tracks_df_test = make_tracking_data_builder_from_csv(
    manos_track_file=manos_csv,
    selected_csv="test_tracking_selezionati.csv",
    input_dir=input_dir,
    output_dir=output_dir,
    split="test",
    args=args)



Cicloni nel train: 12, cicloni nel test: 6
Train rows: 564, Test rows: 382, Validation rows: 0
23 days 08:45:00
15 days 21:30:00
0 days 00:00:00
sorted_metadata_files num :  4516
Creo video senza salvarli...
1)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 81 	 ids: [      0 7001715]
Senza cicloni: 819
900 video per il periodo (effettivo) da 2020-12-13 09:45:00 a 2020-12-16 13:00:00

2)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 47 	 ids: [      0 7001716]
Senza cicloni: 505
552 video per il periodo (effettivo) da 2020-12-25 11:45:00 a 2020-12-27 10:00:00

3)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 49 	 ids: [0]
Senza cicloni: 539
588 video per il periodo (effettivo) da 2021-10-28 11:45:00 a 2021-10-30 13:00:00

4)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 18 	 ids: [0]
Senza cicloni: 198
216 video per il periodo (effettivo) da 2021-11-08 

In [5]:
df_predictions = pd.read_csv(tracking_csv)
print(df_predictions.shape, test_builder.df_video.shape)

df_predictions['path'] = df_predictions['path'].astype(str)
df_predictions['path_basename'] = df_predictions['path'].str.split('/').str[-1]

df_joined = test_builder.df_video.merge(
    df_predictions,
    left_on='path',
    right_on='path_basename',
    how='inner',
    suffixes=('_video', '_pred')
)

df_joined.drop(columns=['path_basename'], inplace=True)

df_joined = df_joined.rename(columns={
    'tile_offset_x_video': 'tile_offset_x',
    'tile_offset_y_video': 'tile_offset_y',
    'tile_offset_x_pred': 'tile_offset_x_model',
    'tile_offset_y_pred': 'tile_offset_y_model',
    'path_pred': 'path_prediction'
})

print(df_joined.columns.tolist())


(160, 17) (159, 9)
['video_id', 'tile_offset_x', 'tile_offset_y', 'path_video', 'label', 'start_time', 'end_time', 'orig_paths', 'id_cyc_unico', 'path_prediction', 'pred_x', 'pred_y', 'target_x', 'target_y', 'err_px', 'err_km', 'tile_offset_x_model', 'tile_offset_y_model', 'pred_x_global', 'pred_y_global', 'target_x_global', 'target_y_global', 'pred_lat', 'pred_lon', 'target_lat', 'target_lon']


In [6]:
df_joined.columns

Index(['video_id', 'tile_offset_x', 'tile_offset_y', 'path_video', 'label',
       'start_time', 'end_time', 'orig_paths', 'id_cyc_unico',
       'path_prediction', 'pred_x', 'pred_y', 'target_x', 'target_y', 'err_px',
       'err_km', 'tile_offset_x_model', 'tile_offset_y_model', 'pred_x_global',
       'pred_y_global', 'target_x_global', 'target_y_global', 'pred_lat',
       'pred_lon', 'target_lat', 'target_lon'],
      dtype='object')

In [7]:
records = []
def _to_list(val):
    if val is None:
        return []
    if isinstance(val, (list, tuple, set)):
        return list(val)
    if isinstance(val, str):
        stripped = val.strip()
        if not stripped:
            return []
        try:
            parsed = safe_literal_eval(stripped)
        except Exception:
            parsed = stripped
        if isinstance(parsed, (list, tuple, set)):
            return list(parsed)
        return [] if pd.isna(parsed) else [parsed]
    if isinstance(val, float) and pd.isna(val):
        return []
    return [val]

def _parse_datetime_from_name(name):
    if name is None or (isinstance(name, float) and pd.isna(name)):
        return pd.NaT
    base = Path(str(name)).name
    stem = base.split('.')[0]
    parts = stem.split('_')
    if len(parts) < 2:
        return pd.NaT
    dt_str = '_'.join(parts[:2])
    try:
        return datetime.strptime(dt_str, '%d-%m-%Y_%H%M')
    except ValueError:
        return pd.NaT

for _, row in df_joined.iterrows():
    orig_paths_list = _to_list(row['orig_paths'])
    if not orig_paths_list:
        continue

    pred_val = row.get('predictions', row.get('label', 0))
    try:
        pred_val = int(pred_val)
    except Exception:
        pred_val = 0

    label_val = row.get('labels', row.get('label', 0))
    try:
        label_val = int(label_val)
    except Exception:
        label_val = 0

    for orig_path in orig_paths_list:
        if orig_path is None:
            continue
        orig_path_str = str(orig_path)
        records.append({
            'path': orig_path_str,
            'predictions': pred_val,
            'tmp_label': label_val,
            'tile_offset_x': row.get('tile_offset_x'),
            'tile_offset_y': row.get('tile_offset_y'),
            'neighboring': row.get('neighboring', 0),
            'track_pred_x': row.get('pred_x_global'),
            'track_pred_y': row.get('pred_y_global'),
            'track_target_x': row.get('target_x_global'),
            'track_target_y': row.get('target_y_global'),
            'track_pred_lat': row.get('pred_lat'),
            'track_pred_lon': row.get('pred_lon'),
            'track_target_lat': row.get('target_lat'),
            'track_target_lon': row.get('target_lon'),
            'track_err_px': row.get('err_px'),
            'track_err_km': row.get('err_km'),
            'video_path': row.get('path'),
        })

df_mapping = pd.DataFrame(records)

if not df_mapping.empty:
    df_mapping['frame_datetime'] = df_mapping['path'].apply(_parse_datetime_from_name)
    df_mapping['video_end_datetime'] = df_mapping['video_path'].apply(_parse_datetime_from_name)

    def _select_best_row(group):
        valid = group.dropna(subset=['frame_datetime'])
        if valid.empty:
            return group.iloc[[0]]
        valid = valid.dropna(subset=['video_end_datetime'])
        if valid.empty:
            return group.iloc[[0]]

        candidates = valid[valid['video_end_datetime'] >= valid['frame_datetime']]
        if not candidates.empty:
            diffs = (candidates['video_end_datetime'] - candidates['frame_datetime']).dropna()
            if diffs.empty:
                return candidates.iloc[[0]]
            idx = diffs.idxmin()
            return candidates.loc[[idx]]

        diffs = (valid['video_end_datetime'] - valid['frame_datetime']).abs().dropna()
        if diffs.empty:
            return valid.iloc[[0]]
        idx = diffs.idxmin()
        return valid.loc[[idx]]

    df_mapping = df_mapping.groupby('path', group_keys=False).apply(_select_best_row).reset_index(drop=True)

df_mapping


  df_mapping = df_mapping.groupby('path', group_keys=False).apply(_select_best_row).reset_index(drop=True)


Unnamed: 0,path,predictions,tmp_label,tile_offset_x,tile_offset_y,neighboring,track_pred_x,track_pred_y,track_target_x,track_target_y,track_pred_lat,track_pred_lon,track_target_lat,track_target_lon,track_err_px,track_err_km,video_path,frame_datetime,video_end_datetime
0,../fromgcloud/airmass_rgb_20201213_1445.png,1,1,852,196,0,934.536575,300.073769,862.0,246.0,34.142723,24.075520,36.089121,21.863759,90.473903,295.468279,,NaT,NaT
1,../fromgcloud/airmass_rgb_20201213_1450.png,1,1,852,196,0,934.536575,300.073769,862.0,246.0,34.142723,24.075520,36.089121,21.863759,90.473903,295.468279,,NaT,NaT
2,../fromgcloud/airmass_rgb_20201213_1455.png,1,1,852,196,0,934.536575,300.073769,862.0,246.0,34.142723,24.075520,36.089121,21.863759,90.473903,295.468279,,NaT,NaT
3,../fromgcloud/airmass_rgb_20201213_1500.png,1,1,852,196,0,934.536575,300.073769,862.0,246.0,34.142723,24.075520,36.089121,21.863759,90.473903,295.468279,,NaT,NaT
4,../fromgcloud/airmass_rgb_20201213_1505.png,1,1,852,196,0,934.536575,300.073769,862.0,246.0,34.142723,24.075520,36.089121,21.863759,90.473903,295.468279,,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1871,../fromgcloud/airmass_rgb_20211030_1240.png,1,1,639,196,0,778.808334,287.880577,742.0,274.0,34.451840,18.709217,34.946330,17.496532,39.338580,123.748143,,NaT,NaT
1872,../fromgcloud/airmass_rgb_20211030_1245.png,1,1,639,196,0,778.808334,287.880577,742.0,274.0,34.451840,18.709217,34.946330,17.496532,39.338580,123.748143,,NaT,NaT
1873,../fromgcloud/airmass_rgb_20211030_1250.png,1,1,639,196,0,778.808334,287.880577,742.0,274.0,34.451840,18.709217,34.946330,17.496532,39.338580,123.748143,,NaT,NaT
1874,../fromgcloud/airmass_rgb_20211030_1255.png,1,1,639,196,0,778.808334,287.880577,742.0,274.0,34.451840,18.709217,34.946330,17.496532,39.338580,123.748143,,NaT,NaT


In [8]:
def expand_mediterranean_dataframe(df_mapping: pd.DataFrame, builder) -> pd.DataFrame:
    """Espande il dataframe in modo da avere tutte le tile per frame, segnando quelle mancanti."""

    master_df = builder.master_df.copy()
    rename_candidates = {
        'path': ['path_x', 'path_y'],
        'tile_offset_x': ['tile_offset_x_x', 'tile_offset_x_y'],
        'tile_offset_y': ['tile_offset_y_x', 'tile_offset_y_y'],
    }
    rename_map = {}
    for target, options in rename_candidates.items():
        if target not in master_df.columns:
            for opt in options:
                if opt in master_df.columns:
                    rename_map[opt] = target
                    break
    if rename_map:
        master_df = master_df.rename(columns=rename_map)

    missing_cols = [col for col in ['path', 'tile_offset_x', 'tile_offset_y'] if col not in master_df.columns]
    if missing_cols:
        raise KeyError(f"Colonne {missing_cols} mancanti in master_df; colonne disponibili: {list(master_df.columns)}")

    df_data = (
        df_mapping
        .merge(master_df, on=['path', 'tile_offset_x', 'tile_offset_y'], how='left')
        .drop(columns='label', errors='ignore')
        .rename(columns={'tmp_label': 'label'})
    )

    offsets = calc_tile_offsets(stride_x=213, stride_y=196)
    df_offsets = pd.DataFrame(offsets, columns=['tile_offset_x', 'tile_offset_y'])

    def expand_group(group):
        merged = df_offsets.merge(group, on=['tile_offset_x', 'tile_offset_y'], how='left', indicator=True)
        path_value = group['path'].iloc[0]
        merged['path'] = path_value

        for col in ['datetime']:
            if col in group.columns:
                val = group[col].iloc[0]
                merged[col] = merged[col].fillna(val)

        merged[filling_missing_tile] = merged['_merge'] == 'left_only'
        extra_cols = [c for c in group.columns if c not in ['path', 'tile_offset_x', 'tile_offset_y']]
        return merged[['path', 'tile_offset_x', 'tile_offset_y'] + extra_cols + [filling_missing_tile]]

    expanded = (
        df_data
        .groupby('path', group_keys=False)
        .apply(expand_group)
        .reset_index(drop=True)
    )

    if 'predictions' not in expanded.columns:
        expanded['predictions'] = 0
    expanded['predictions'] = expanded['predictions'].astype('Int8')
    expanded['label'] = expanded['label'].astype('Int8')
    return expanded, builder


In [9]:
expanded_df, val_builder = expand_mediterranean_dataframe(df_mapping, test_builder)

  df_data


In [10]:
print(f"Frame totali: {expanded_df['path'].nunique()} immagini - {len(expanded_df)} righe complessive.")
expanded_df.head(2)

Frame totali: 1876 immagini - 22512 righe complessive.


Unnamed: 0,path,tile_offset_x,tile_offset_y,predictions,label,neighboring,track_pred_x,track_pred_y,track_target_x,track_target_y,...,frame_datetime,video_end_datetime,datetime,x_pix,y_pix,source,id_cyc_unico,start_time,end_time,filled_gray
0,../fromgcloud/airmass_rgb_20201213_1445.png,0,0,,,,,,,,...,NaT,NaT,2020-12-13 14:45:00,,,,,NaT,NaT,True
1,../fromgcloud/airmass_rgb_20201213_1445.png,213,0,,,,,,,,...,NaT,NaT,2020-12-13 14:45:00,,,,,NaT,NaT,True


In [11]:
expanded_df = expanded_df.sort_values('datetime').reset_index(drop=True)
print(f"Ordinati {len(expanded_df)} frame complessivi da {expanded_df['datetime'].min()} a {expanded_df['datetime'].max()}")
expanded_df.head(2)


Ordinati 22512 frame complessivi da 2020-12-13 14:45:00 a 2021-10-30 13:00:00


Unnamed: 0,path,tile_offset_x,tile_offset_y,predictions,label,neighboring,track_pred_x,track_pred_y,track_target_x,track_target_y,...,frame_datetime,video_end_datetime,datetime,x_pix,y_pix,source,id_cyc_unico,start_time,end_time,filled_gray
0,../fromgcloud/airmass_rgb_20201213_1445.png,0,0,,,,,,,,...,NaT,NaT,2020-12-13 14:45:00,,,,,NaT,NaT,True
1,../fromgcloud/airmass_rgb_20201213_1445.png,1065,196,,,,,,,,...,NaT,NaT,2020-12-13 14:45:00,,,,,NaT,NaT,True


In [12]:
def ensure_list(val):
    if isinstance(val, list):
        return list(val)
    if val is None:
        return []
    if isinstance(val, (tuple, set)):
        return list(val)
    if isinstance(val, str):
        stripped = val.strip()
        if not stripped:
            return []
        try:
            parsed = safe_literal_eval(stripped)
        except Exception:
            parsed = stripped
        if isinstance(parsed, (list, tuple, set)):
            return list(parsed)
        return [] if pd.isna(parsed) else [parsed]
    if isinstance(val, float) and pd.isna(val):
        return []
    return [val]

for col in ['x_pix', 'y_pix', 'source']:
    expanded_df[col] = expanded_df[col].apply(ensure_list)

def append_track_row(row):
    sources = list(row['source']) if isinstance(row['source'], list) else ensure_list(row['source'])
    x_vals = list(row['x_pix']) if isinstance(row['x_pix'], list) else ensure_list(row['x_pix'])
    y_vals = list(row['y_pix']) if isinstance(row['y_pix'], list) else ensure_list(row['y_pix'])

    if pd.notna(row.get('track_target_x')) and pd.notna(row.get('track_target_y')):
        x_vals.insert(0, float(row['track_target_x']))
        y_vals.insert(0, float(row['track_target_y']))
        sources.insert(0, 'GT')

    if pd.notna(row.get('track_pred_x')) and pd.notna(row.get('track_pred_y')):
        x_vals.insert(0, float(row['track_pred_x']))
        y_vals.insert(0, float(row['track_pred_y']))
        sources.insert(0, 'PRED')

    row['x_pix'] = x_vals
    row['y_pix'] = y_vals
    row['source'] = sources
    return row

expanded_df = expanded_df.apply(append_track_row, axis=1)


In [13]:
tracked_frames = expanded_df.copy()
tracked_frames['disable_tile_boxes'] = True
tracked_frames = tracked_frames.sort_values('datetime').reset_index(drop=True)
print(f"Frame disponibili per tracking: {tracked_frames['path'].nunique()} immagini su {len(tracked_frames)} righe")
tracked_frames[['path', 'datetime', 'track_pred_x', 'track_pred_y', 'track_err_px']].head()


Frame disponibili per tracking: 1876 immagini su 22512 righe


Unnamed: 0,path,datetime,track_pred_x,track_pred_y,track_err_px
0,../fromgcloud/airmass_rgb_20201213_1445.png,2020-12-13 14:45:00,,,
1,../fromgcloud/airmass_rgb_20201213_1445.png,2020-12-13 14:45:00,,,
2,../fromgcloud/airmass_rgb_20201213_1445.png,2020-12-13 14:45:00,,,
3,../fromgcloud/airmass_rgb_20201213_1445.png,2020-12-13 14:45:00,,,
4,../fromgcloud/airmass_rgb_20201213_1445.png,2020-12-13 14:45:00,,,


In [14]:
# legacy block removed


In [15]:
# legacy block removed


In [16]:
# Genera i frame e il video MP4 (richiede ffmpeg disponibile nel PATH)
make_animation_parallel_ffmpeg(
    tracked_frames,
    output_folder='./anim_frames_tracking',
    nomefile='test_tracking_predictions'
)



>>> Generazione dei frame PNG in anim_frames_tracking_test_tracking_predictions
 abbiamo 1876 gruppi
Tutti i frame sono stati generati
15.45 minuti

>>> Creazione del video MP4 con ffmpeg...


ffmpeg version 7.0.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --enable-libxvid --enable-libzvbi --enable-libzimg
  libavutil      59.  8.100 / 59.  8.100
  libavcodec     61.  3.100 / 61.  3.100
  libavformat   


Video salvato: test_tracking_predictions.mp4

Cartella anim_frames_tracking_test_tracking_predictions già esistente, non ricreo i frame. Controlla se il video è già stato creato.
Video già esistente: test_tracking_predictions.mp4


[out#0/mp4 @ 0x8664680] video:123657KiB audio:0KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.031508%
frame= 1877 fps= 60 q=-1.0 Lsize=  123696KiB time=00:03:07.44 bitrate=5406.1kbits/s speed=5.95x    
[libx264 @ 0x868c940] frame I:10    Avg QP:15.15  size:232670
[libx264 @ 0x868c940] frame P:476   Avg QP:17.68  size:127488
[libx264 @ 0x868c940] frame B:1391  Avg QP:20.41  size: 45732
[libx264 @ 0x868c940] consecutive B-frames:  1.2%  0.0%  0.2% 98.7%
[libx264 @ 0x868c940] mb I  I16..4:  3.4% 22.6% 74.0%
[libx264 @ 0x868c940] mb P  I16..4:  0.2%  9.2%  8.4%  P16..4: 21.9% 29.7% 30.2%  0.0%  0.0%    skip: 0.5%
[libx264 @ 0x868c940] mb B  I16..4:  0.0%  0.1%  0.3%  B16..8: 18.0% 16.6% 10.3%  direct:32.9%  skip:21.7%  L0:16.9% L1:20.9% BI:62.2%
[libx264 @ 0x868c940] 8x8 transform intra:47.1% inter:54.2%
[libx264 @ 0x868c940] coded y,uvDC,uvAC intra: 93.7% 100.0% 99.7% inter: 71.3% 69.1% 45.4%
[libx264 @ 0x868c940] i16 v,h,dc,p: 26% 21%  9% 43%
[libx264 @ 0x868