# Galleria rapida delle tracce

Notebook derivato da `View_tracking_tiles` per effettuare una revisione visiva del dataset di tracking.
Consente di sfogliare più clip contemporaneamente e di annotare velocemente quelle da conservare.

In [1]:
import os, re
import numpy as np
import pandas as pd

from arguments import prepare_finetuning_args
from dataset.data_manager import BuildTrackingDataset, DataManager
import torch
from torch.utils.data import DataLoader
from dataset.datasets import MedicanesTrackDataset
from dataset.build_dataset import get_cyclone_center_pixel, get_train_test_validation_df
from PIL import Image, ImageDraw
from io import BytesIO

import ipywidgets as widgets
from IPython.display import display
from typing import Sequence, List, Optional, Any

from view_test_tiles import VideoGallerySelector


Impossibile caricare la libreria torch dynamo


In [3]:
args = prepare_finetuning_args()

# Percorsi
input_dir = "../fromgcloud"
output_dir = "../airmassRGB/supervised/"

manos_file = "medicane_data_input/medicanes_new_windows.csv"
df_tracks = pd.read_csv(manos_file, parse_dates=['time', 'start_time', 'end_time'])

train_csv_name = "train_tracking.csv"
test_csv_name = "test_tracking.csv"
val_csv_name = "val_tracking.csv"

#bt = BuildTrackingDataset(type="supervised", args=args)
#bt.prepare_data(df_tracks, input_dir, output_dir)
#bt.create_tracking_csv(output_dir, csv_out)

tracks_df_train, tracks_df_test, tracks_df_val = get_train_test_validation_df(df_tracks, 0.7, 0.15, id_col='id_final')


Cicloni nel train: 12, cicloni nel test: 3, cicloni nella validation: 3
Train rows: 564, Test rows: 176, Validation rows: 206
23 days 08:45:00
7 days 08:05:00
8 days 13:25:00


In [4]:
print("\nBuilding TRAIN tracking set (tiles + CSV)...")
train_b = BuildTrackingDataset(type='SUPERVISED', args=args)
train_b.prepare_data(tracks_df_train, input_dir, output_dir)
train_b.create_tracking_csv(output_dir, train_csv_name, only_label_1=True, num_frames=args.num_frames)

print("\nBuilding TEST tracking set (tiles + CSV)...")
test_b = BuildTrackingDataset(type='SUPERVISED', args=args)
test_b.prepare_data(tracks_df_test, input_dir, output_dir)
test_b.create_tracking_csv(output_dir, test_csv_name, only_label_1=True, num_frames=args.num_frames)

print("\nBuilding VAL tracking set (tiles + CSV)...")
val_b = BuildTrackingDataset(type='SUPERVISED', args=args)
val_b.prepare_data(tracks_df_val, input_dir, output_dir)
val_b.create_tracking_csv(output_dir, val_csv_name, only_label_1=True, num_frames=args.num_frames)



Building TRAIN tracking set (tiles + CSV)...
sorted_metadata_files num :  6629
Creo video senza salvarli...
1)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 29 	 ids: [      0 7001283]
Senza cicloni: 247
276 video per il periodo (effettivo) da 2010-10-12 13:45:00 a 2010-10-13 13:00:00

2)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 56 	 ids: [      0 7001328]
Senza cicloni: 616
672 video per il periodo (effettivo) da 2011-11-05 23:45:00 a 2011-11-08 08:00:00

3)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 42 	 ids: [      0 7001358]
Senza cicloni: 426
468 video per il periodo (effettivo) da 2012-04-13 05:45:00 a 2012-04-14 21:00:00

4)  ->
Sbilanciato, ma comunque tengo traccia dei positivi e negativi
Con cicloni: 39 	 ids: [      0 7001421]
Senza cicloni: 369
408 video per il periodo (effettivo) da 2013-11-18 22:45:00 a 2013-11-20 09:00:00

5)  ->
Sbilanciato, ma comunque tengo t

In [14]:
print(train_b.df_video.id_cyc_unico.unique())
print(test_b.df_video.id_cyc_unico.unique())
print(val_b.df_video.id_cyc_unico.unique())

[      0 7001283 7001328 7001358 7001421 7001461 7001466 7001500 7001521
 7001542 7001575 7001674 7001702]
[      0 7001715 7001716]
[0]


In [15]:
csv_out = val_csv_name

In [16]:
# Costruisce DataLoader per tracking a partire dal CSV generato
dm = DataManager(is_train=False, args=args, type_t='supervised', specify_data_path=csv_out)
track_loader = dm.get_tracking_dataloader(args)
track_ds = dm.dataset


Getting TRACKING dataset (pixel coords)...
[INFO][TrackingDataset] Dropped 1 rows with missing/non-finite coordinates from val_tracking.csv (kept 200/201).
DATASET length: 200
Creo il DistributedSampler con world_size 1 e rank 0
Sampler_train = <torch.utils.data.distributed.DistributedSampler object at 0x7fb65e3f9a00>
Batch_size local: 1


## Selezione simultanea di più clip

La classe seguente crea una galleria di clip video con pulsanti "toggle" per selezionare le tracce da mantenere.
Gli identificativi selezionati (indici o path) possono poi essere salvati e riutilizzati per filtrare il DataFrame del CSV.

In [5]:
selezionati = []
esclusi = []

In [6]:
200//36, 200%36

(5, 20)

In [19]:
# Estrae alcune clip dal dataset per popolare la galleria
i = 5
max_samples = 20
columns = 6
sample_indices = list(range(max_samples*i, max_samples*(i+1)))
#sample_indices = [0, 10, 23, 67]

clips = []
identifiers = []

mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1, 1)
std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1, 1)
marker_radius = 5
marker_color = (255, 0, 0)

for ds_idx in sample_indices:
    video, coords, folder = track_ds[ds_idx]
    video = (video * std + mean).clamp(0, 1)

    px_tile, py_tile = map(float, coords[:2])

    frames = []
    for frame in video.permute(1, 2, 3, 0).cpu().numpy():
        img = (np.clip(frame, 0.0, 1.0) * 255).astype('uint8')
        pil = Image.fromarray(img)
        draw = ImageDraw.Draw(pil)
        draw.ellipse(
            (
                px_tile - marker_radius,
                py_tile - marker_radius,
                px_tile + marker_radius,
                py_tile + marker_radius
            ),
            fill=marker_color
        )
        frames.append(np.asarray(pil) / 255.0)
    clips.append(frames)

    # Usa il path della cartella come identificativo per poterlo incrociare con il CSV
    identifiers.append(folder)

selector = VideoGallerySelector(
    clips,
    identifiers=identifiers,
    columns=columns,
    interval=200,
    preview_width=280, #140
    grid_gap='8px' 
)
selector.display()


VBox(children=(GridBox(children=(VBox(children=(Image(value=b'GIF89a\x18\x01\x18\x01\x87\x00\x00\xfb\xec\x87\x…

In [17]:
esc = selector.get_excluded_identifiers()
esclusi.extend(esc)

In [13]:
# Recupera gli identificativi (es. path cartella) delle clip marcate
sel = selector.get_selected_identifiers()
selezionati.extend(sel)

In [20]:
len(esclusi), esclusi

(10,
 ['../airmassRGB/supervised/08-11-2021_1300_426_0',
  '../airmassRGB/supervised/08-11-2021_1400_426_0',
  '../airmassRGB/supervised/08-11-2021_1500_426_0',
  '../airmassRGB/supervised/08-11-2021_1600_426_0',
  '../airmassRGB/supervised/28-02-2023_2000_426_0',
  '../airmassRGB/supervised/07-09-2023_1800_639_196',
  '../airmassRGB/supervised/07-09-2023_2000_639_196',
  '../airmassRGB/supervised/07-09-2023_2300_639_196',
  '../airmassRGB/supervised/08-09-2023_0000_639_196',
  '../airmassRGB/supervised/08-09-2023_0100_639_196'])

In [34]:
len(selezionati)#, selezionati

0

## Uso con il DataFrame del CSV

Gli identificativi ottenuti possono essere utilizzati per filtrare il CSV originale.
Ad esempio, se il path della cartella è presente in una colonna `folder`, si può eseguire:

```python

df_selezionati = df_tracking[df_tracking['folder'].isin(selezionati)]
```

In alternativa è possibile memorizzare gli indici relativi della galleria tramite
`selector.get_selected_indices()` e unirli con l'indice del DataFrame.

In [21]:
df = pd.read_csv(csv_out)
df

Unnamed: 0,path,start,end,x_pix,y_pix
0,../airmassRGB/supervised/08-11-2021_1300_426_0,1,16,12.0,162.0
1,../airmassRGB/supervised/08-11-2021_1400_426_0,1,16,12.0,168.0
2,../airmassRGB/supervised/08-11-2021_1500_426_0,1,16,26.0,174.0
3,../airmassRGB/supervised/08-11-2021_1600_426_0,1,16,12.0,174.0
4,../airmassRGB/supervised/08-11-2021_1700_426_0,1,16,12.0,174.0
...,...,...,...,...,...
196,../airmassRGB/supervised/09-09-2023_2200_639_196,1,16,178.0,148.0
197,../airmassRGB/supervised/09-09-2023_2300_639_196,1,16,186.0,148.0
198,../airmassRGB/supervised/10-09-2023_0000_639_196,1,16,186.0,148.0
199,../airmassRGB/supervised/10-09-2023_0100_639_196,1,16,193.0,148.0


In [22]:
df_selezionati = df[~df['path'].isin(esclusi)]
df_selezionati

Unnamed: 0,path,start,end,x_pix,y_pix
4,../airmassRGB/supervised/08-11-2021_1700_426_0,1,16,12.0,174.0
5,../airmassRGB/supervised/08-11-2021_1800_426_0,1,16,12.0,174.0
6,../airmassRGB/supervised/08-11-2021_1900_426_0,1,16,11.0,181.0
7,../airmassRGB/supervised/08-11-2021_2000_426_0,1,16,11.0,181.0
8,../airmassRGB/supervised/08-11-2021_2100_426_0,1,16,11.0,181.0
...,...,...,...,...,...
196,../airmassRGB/supervised/09-09-2023_2200_639_196,1,16,178.0,148.0
197,../airmassRGB/supervised/09-09-2023_2300_639_196,1,16,186.0,148.0
198,../airmassRGB/supervised/10-09-2023_0000_639_196,1,16,186.0,148.0
199,../airmassRGB/supervised/10-09-2023_0100_639_196,1,16,193.0,148.0


In [23]:
df_selezionati.to_csv("val_tracking_selezionati.csv", index=False)

val_tracking_selezionati.csv <br>
test_tracking_selezionati.csv <br>
train_tracking_selezionati.csv <br>

# Separo quelli selezionati da quelli totali per ottenere solo quelli scartati

In [None]:
# Differenza tra test_tracking.csv e test_tracking_selezionati.csv
# Produce un CSV con le clip presenti solo nel set completo così da poterle usare come nuovo preds_csv
def _resolve_csv_path(filename: str) -> str:
    candidate = os.path.join(args.output_dir, filename)
    return candidate if os.path.exists(candidate) else filename

test_csv_path = _resolve_csv_path('test_tracking.csv')
selected_csv_path = _resolve_csv_path('test_tracking_selezionati.csv')

print(f"Carico test da: {test_csv_path}")
print(f"Carico selezionati da: {selected_csv_path}")

test_df = pd.read_csv(test_csv_path)
selected_df = pd.read_csv(selected_csv_path)

# devo uniformare i path
selected_df['path'] = selected_df['path'].apply(lambda p: p.replace('/leonardo_scratch/fast/IscrC_SAME-D/airmass', '../airmassRGB/supervised'))

extra_df = test_df[~test_df['path'].isin(selected_df['path'])].copy()
print(f"Righe totali test: {len(test_df)} | Selezionate: {len(selected_df)} | Solo test: {len(extra_df)}")

# ariconverto
extra_df['path'] = extra_df['path'].apply(lambda p: p.replace( '../airmassRGB/supervised', '/leonardo_scratch/fast/IscrC_SAME-D/airmass'))

extra_csv_path = os.path.join(os.path.dirname(test_csv_path) or '.', 'test_tracking_extra.csv')
extra_df.to_csv(extra_csv_path, index=False)
print(f"CSV extra salvato in: {extra_csv_path}")

preds_csv_extra = extra_csv_path
preds_df_extra = extra_df
preds_df_extra.head()
