In [2]:
%load_ext autoreload
%autoreload 2

import os
from time import time
from datetime import datetime
import random
import torch
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', 500)

import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import HTML
import matplotlib.animation as animation
from matplotlib.animation import FFMpegWriter
import matplotlib
matplotlib.rcParams['animation.embed_limit'] = 2**128


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from arguments import prepare_finetuning_args, Args
from dataset import build_dataset
from torch.utils.data import DataLoader
from engine_for_finetuning import validation_one_epoch
import models
from timm.models import create_model

from build_dataset import calc_tile_offsets
from view_test_tiles import plot_image, draw_tiles_and_center, create_gif_pil, draw_timestamp_in_bottom_right, display_video_clip
from medicane_utils.geo_const import latcorners, loncorners, x_center, y_center, create_basemap_obj


### Carico il dataset

In [3]:
args = prepare_finetuning_args()
dataset_val, _ = build_dataset(is_train=False, test_mode=False, args=args)

data_loader_val = DataLoader(
    dataset_val,
    batch_size=args.batch_size,
    shuffle=True,         # Per estrarre sample casuali
    num_workers=args.num_workers,
    pin_memory=args.pin_mem,
    drop_last=False
)

get_prediction = False

In [4]:
df_data = pd.read_csv("all_data_all_methods_tracks_complete.csv", dtype={
        "path": 'string',
        "tile_offset_x": 'int16',
        "tile_offset_y": 'int16',
        "label": 'category',
        "lat": 'object',  # non più float16
        "lon": 'object',  # non più float16
        "x_pix": 'object', # non più Int16
        "y_pix": 'object', # non più Int16
        "name": 'string',
        "source": 'string'
    }, parse_dates=['datetime'])

In [4]:
df_data.path = df_data.path.str.replace("../fromgcloud/", "E:\\Medicanes_Data\\from_gcloud\\")

In [5]:
df_data

Unnamed: 0.1,Unnamed: 0,path,datetime,tile_offset_x,tile_offset_y,label,lat,lon,x_pix,y_pix,name,source
0,0,E:\Medicanes_Data\from_gcloud\airmass_rgb_20111101_0000.png,2011-11-01,0,0,0,[],[],[],[],,[]
1,1,E:\Medicanes_Data\from_gcloud\airmass_rgb_20111101_0000.png,2011-11-01,224,0,0,[],[],[],[],,[]
2,2,E:\Medicanes_Data\from_gcloud\airmass_rgb_20111101_0000.png,2011-11-01,448,0,0,[],[],[],[],,[]
3,3,E:\Medicanes_Data\from_gcloud\airmass_rgb_20111101_0000.png,2011-11-01,672,0,0,[],[],[],[],,[]
4,4,E:\Medicanes_Data\from_gcloud\airmass_rgb_20111101_0000.png,2011-11-01,896,0,0,[],[],[],[],,[]
...,...,...,...,...,...,...,...,...,...,...,...,...
1177635,1177635,E:\Medicanes_Data\from_gcloud\airmass_rgb_20230912_0000.png,2023-09-12,0,196,0,[],[],[],[],,[]
1177636,1177636,E:\Medicanes_Data\from_gcloud\airmass_rgb_20230912_0000.png,2023-09-12,224,196,0,[],[],[],[],,[]
1177637,1177637,E:\Medicanes_Data\from_gcloud\airmass_rgb_20230912_0000.png,2023-09-12,448,196,0,[],[],[],[],,[]
1177638,1177638,E:\Medicanes_Data\from_gcloud\airmass_rgb_20230912_0000.png,2023-09-12,672,196,0,[],[],[],[],,[]


### carica modello

In [None]:
# voglio prendere le predizioni
get_prediction = True

# istanzia l'oggetto del modello 
print(f"Creating model: {args.model} (nb_classes={args.nb_classes})")
model = create_model(
    args.model,
    num_classes=args.nb_classes,
    drop_rate=0.0,
    drop_path_rate=args.drop_path,
    #attn_drop_rate=0.0,
    drop_block_rate=None,
    **args.__dict__
)


device = args.device

# Carica i pesi del checkpoint nel modello
checkpoint_path = "output/checkpoint-best.pth"  
if os.path.exists(checkpoint_path):
    ckpt = torch.load(checkpoint_path, map_location="cpu")
    if "model" in ckpt:
        missing = model.load_state_dict(ckpt["model"], strict=False)
        print(f"Checkpoint loaded. Missing keys: {missing.missing_keys}")
    else:
        # Altri formati di caricamento possibili, a seconda di come hai salvato.
        model.load_state_dict(ckpt, strict=False)
    print("Checkpoint caricato correttamente.")
else:
    print("ATTENZIONE: file checkpoint non trovato. Userai i pesi random del modello.")


model.to(args.device)
model.eval()   

### carica tiles

In [45]:
def get_time_from_row(folder_path):
    stringa = folder_path[0].split('/')[-1]
    data = stringa.split('_')[0]
    ora = stringa.split('_')[1]
    ora = ora[:2] + ':' + ora[2:]
    #data = pd.to_datetime(data)
    time = pd.to_datetime(data + " " + ora, format="%d-%m-%Y %H:%M")
    return time

In [46]:
basemap = create_basemap_obj()
def create_labeled_images_with_tiles(df_grouped, nome_gif):
# in ogni group abbiamo una sola immagine (un istante temporale)
# e tutte le tiles con le rispettive label. 
# possiamo avere più cicloni con le rispettive coordinate, da trovare uniche, perché si ripetono in tutte le tiles vicine
    lista_immagini = []
    for path_img, group_df in df_grouped:
        # Apriamo l'immagine
        img = Image.open(path_img)#.convert("RGB")
        #center_px_list = (x_pix, y_pix)
        center_px_list = group_df[['x_pix','y_pix']].value_counts().index.values
        #labeled_tiles_offsets = get_tile_labels(lat, lon)
        labeled_tiles_offsets = group_df['label'].values

        date_str = group_df['datetime'].unique()[0].strftime(" %H:%M %d-%m-%Y")
        
        # Disegniamo
        default_offsets = calc_tile_offsets()
        out_img = draw_tiles_and_center(img, default_offsets,
            cyclone_centers=center_px_list,
            labeled_tiles_offsets=labeled_tiles_offsets
            )
        stamped_img = draw_timestamp_in_bottom_right(out_img, date_str, margin=15)
        pi_img = plot_image(stamped_img, basemap, draw_parallels_meridians=True)
        lista_immagini.append(pi_img)
        #display(out_img)
    
    lista_immagini[0].save(nome_gif, save_all=True, append_images=lista_immagini[1:], duration=200, loop=0)

In [17]:
samples = next(iter(data_loader_val))  # samples è una tupla tipicamente (video, label, index, ...)
videos = samples[0]  # tensore di shape [B, C, T, H, W] 
labels = samples[1]  # shape [B]
folder_path = samples[2]

time = get_time_from_row(folder_path)

if get_prediction:
    videos = videos.to(args.device)
    with torch.no_grad():
        logits = model(videos)  # (B, nb_classes)
        pred = torch.argmax(logits, dim=1)  # intero con l'indice di classe

    pred_class = pred.detach().cpu().numpy()
else:
    pred_class = -1
    
label = labels.detach().cpu().numpy()
print(f"Label reale: {label}, Label predetta: {pred_class}")


v0 = videos[0]  # shape [C, T, H, W]
v0 = v0.permute(1, 2, 3, 0).cpu()  # [T, H, W, C]

v0_min = v0.min()
v0_max = v0.max()
v0_vis = (v0 - v0_min) / (v0_max - v0_min + 1e-5)  # normalizzato [0,1]

video_display = display_video_clip(v0_vis)
display(video_display)

print(f"Classe Predetta: {label}")


FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\Loki\.conda\envs\geo_env\lib\site-packages\torch\utils\data\_utils\worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "C:\Users\Loki\.conda\envs\geo_env\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "C:\Users\Loki\.conda\envs\geo_env\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "C:\Users\Daniele.LOKI\Documents\medicanes\VideoMAEv2\dataset\datasets.py", line 660, in __getitem__
    frame_files = sorted(os.listdir(folder_path))
FileNotFoundError: [WinError 3] The system cannot find the path specified: 'E:\\Medicanes_Data\\airmassRGB\\supervised\\18-09-2020_0230_0_0'


In [None]:
m_sup = df_data.datetime <= time 
m_inf = df_data.datetime >= (time - pd.Timedelta(minutes=80))
df_res = df_data[m_sup & m_inf]

grouped = df_res.groupby("path", dropna=False)

create_labeled_images_with_tiles(grouped, 'test_from_tile4.gif')

# crea video intero mediterraneo

In [6]:
from view_test_tiles import create_mediterranean_video
input_dir = r"E:\Medicanes_Data\fromgcloud"

In [7]:
start, end = pd.to_datetime("2020-01-01"), pd.to_datetime("2021-01-01")
m_sup = df_data.datetime <= end 
m_inf = df_data.datetime >= start
df_res = df_data[m_sup & m_inf]

In [8]:
df_res# = df_data

Unnamed: 0.1,Unnamed: 0,path,datetime,tile_offset_x,tile_offset_y,label,lat,lon,x_pix,y_pix,name,source
126530,126530,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0000.png,2020-01-01 00:00:00,0,0,0,[],[],[],[],,[]
126531,126531,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0000.png,2020-01-01 00:00:00,224,0,0,[],[],[],[],,[]
126532,126532,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0000.png,2020-01-01 00:00:00,448,0,0,[],[],[],[],,[]
126533,126533,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0000.png,2020-01-01 00:00:00,672,0,0,[],[],[],[],,[]
126534,126534,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0000.png,2020-01-01 00:00:00,896,0,0,[],[],[],[],,[]
...,...,...,...,...,...,...,...,...,...,...,...,...
1099305,1099305,E:\Medicanes_Data\from_gcloud\airmass_rgb_20201231_2355.png,2020-12-31 23:55:00,0,196,0,[],[],[],[],,[]
1099306,1099306,E:\Medicanes_Data\from_gcloud\airmass_rgb_20201231_2355.png,2020-12-31 23:55:00,224,196,0,[],[],[],[],,[]
1099307,1099307,E:\Medicanes_Data\from_gcloud\airmass_rgb_20201231_2355.png,2020-12-31 23:55:00,448,196,0,[],[],[],[],,[]
1099308,1099308,E:\Medicanes_Data\from_gcloud\airmass_rgb_20201231_2355.png,2020-12-31 23:55:00,672,196,0,[],[],[],[],,[]


In [9]:
df_res['dt_floor'] = df_res['datetime'].dt.floor('H')
mask = df_res['datetime'] == df_res['dt_floor']
df_h = df_res[mask]
print(f"Abbiamo tenuto soltanto le ore intere: rimangono {len(df_h)} elementi")
grouped = df_h.groupby("path", dropna=False)

Abbiamo tenuto soltanto le ore intere: rimangono 81060 elementi


abbiamo 10 tiles per ora

In [36]:
giorni = df_res['datetime'].dt.floor('D').unique()
all_days = pd.date_range(start='2020-01-01', end='2020-12-31', freq='D')
missing_days = all_days.difference(giorni)
if missing_days.empty:
    print("✅ Tutti i giorni dell'anno sono presenti.")
else:
    print(f"❌ Mancano {len(missing_days)} giorni:")
    print(missing_days)


❌ Mancano 27 giorni:
DatetimeIndex(['2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
               '2020-01-11', '2020-01-12', '2020-01-13', '2020-01-14',
               '2020-01-15', '2020-01-16', '2020-01-17', '2020-01-18',
               '2020-01-19', '2020-01-20', '2020-01-21', '2020-01-22',
               '2020-01-23', '2020-01-24', '2020-01-25', '2020-01-26',
               '2020-01-27', '2020-01-28', '2020-01-29', '2020-01-30',
               '2020-01-31', '2020-02-01', '2020-02-02'],
              dtype='datetime64[ns]', freq='D')


In [10]:
allg = list(grouped)
len(allg)  # SONO I FILE CON ORARIO INTERO, OGNI GRUPPO HA LE (10) TILES
#short_group = allg[-2700:-2650]

8106

In [106]:
first_g = allg[:100]

##### verifiche codice

In [85]:
path, df = allg[1]

In [86]:
df

Unnamed: 0.1,Unnamed: 0,path,datetime,tile_offset_x,tile_offset_y,label,lat,lon,x_pix,y_pix,name,source,dt_floor
126650,126650,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,0,0,0,[],[],[],[],,[],2020-01-01 01:00:00
126651,126651,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,224,0,0,[],[],[],[],,[],2020-01-01 01:00:00
126652,126652,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,448,0,0,[],[],[],[],,[],2020-01-01 01:00:00
126653,126653,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,672,0,0,[],[],[],[],,[],2020-01-01 01:00:00
126654,126654,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,896,0,0,[],[],[],[],,[],2020-01-01 01:00:00
126655,126655,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,0,196,0,[],[],[],[],,[],2020-01-01 01:00:00
126656,126656,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,224,196,0,[],[],[],[],,[],2020-01-01 01:00:00
126657,126657,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,448,196,0,[],[],[],[],,[],2020-01-01 01:00:00
126658,126658,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,672,196,0,[],[],[],[],,[],2020-01-01 01:00:00
126659,126659,E:\Medicanes_Data\from_gcloud\airmass_rgb_20200101_0100.png,2020-01-01 01:00:00,896,196,0,[],[],[],[],,[],2020-01-01 01:00:00


In [87]:
df.loc[126650, ['x_pix','y_pix', 'source']] = '[967, 967, 960, 960, 957, 970, 967]','[177, 177, 170, 170, 179, 180, 177]',"['CL2', 'CL3', 'CL4', 'CL5', 'CL6', 'CL7', 'CL8']"

In [88]:
import ast 
from view_test_tiles import safe_parse, safe_literal_eval, deduplicate_xy_source


In [89]:
center_px_df = df[['x_pix','y_pix', 'source']]
center_px_df

Unnamed: 0,x_pix,y_pix,source
126650,"[967, 967, 960, 960, 957, 970, 967]","[177, 177, 170, 170, 179, 180, 177]","['CL2', 'CL3', 'CL4', 'CL5', 'CL6', 'CL7', 'CL8']"
126651,[],[],[]
126652,[],[],[]
126653,[],[],[]
126654,[],[],[]
126655,[],[],[]
126656,[],[],[]
126657,[],[],[]
126658,[],[],[]
126659,[],[],[]


In [90]:
center_px_df_parsed = center_px_df.map(safe_literal_eval)
center_px_df_parsed

Unnamed: 0,x_pix,y_pix,source
126650,"[967, 967, 960, 960, 957, 970, 967]","[177, 177, 170, 170, 179, 180, 177]","[CL2, CL3, CL4, CL5, CL6, CL7, CL8]"
126651,[],[],[]
126652,[],[],[]
126653,[],[],[]
126654,[],[],[]
126655,[],[],[]
126656,[],[],[]
126657,[],[],[]
126658,[],[],[]
126659,[],[],[]


In [91]:
xy_source_list = center_px_df_parsed.apply(
            lambda row: deduplicate_xy_source(row['x_pix'], row['y_pix'], row['source']),
            axis=1)
xy_source_list

126650    [(967, 177, CL8), (960, 170, CL5), (957, 179, CL6), (970, 180, CL7)]
126651                                                                      []
126652                                                                      []
126653                                                                      []
126654                                                                      []
126655                                                                      []
126656                                                                      []
126657                                                                      []
126658                                                                      []
126659                                                                      []
dtype: object

In [93]:
xy_source_list.loc[126650]

[(967, 177, 'CL8'), (960, 170, 'CL5'), (957, 179, 'CL6'), (970, 180, 'CL7')]

In [100]:
sorted_xysource = sorted(xy_source_list.loc[126650], key=lambda xys: xys[-1])
sorted_xysource

[(960, 170, 'CL5'), (957, 179, 'CL6'), (970, 180, 'CL7'), (967, 177, 'CL8')]

In [103]:
for center in xy_source_list:
    ordin = sorted(center, key=lambda xys: xys[-1])
    for cx_cy_source in ordin:
        print(cx_cy_source)

(960, 170, 'CL5')
(957, 179, 'CL6')
(970, 180, 'CL7')
(967, 177, 'CL8')


### crea video

In [14]:
start = time()
video = create_mediterranean_video(allg)
end = time()
#display(video)

In [None]:
start = time()
HTML(video.to_jshtml())

In [111]:
end = time()
round((end-start)/60.0, 2)

0.1

In [15]:
start = time()
writer = FFMpegWriter(
    fps=10,
    codec='libx264',
    bitrate=None,             # se usi crf, puoi lasciare bitrate a None
    extra_args=[
        '-crf', '18',         # qualità alta (visualmente quasi lossless)
        '-preset', 'slow',    # migliore compressione
        '-pix_fmt', 'yuv420p' # formato compatibile con la maggior parte dei player
    ])
#writer = FFMpegWriter(fps=10, codec='libx264', extra_args=['-pix_fmt', 'yuv420p'], bitrate=1800) #, executable=r'C:\ffmpeg\bin\ffmpeg.exe')
video.save('animazione_long.mp4', writer=writer)
end = time()

In [None]:
round((end-start)/60.0, 2)