## Load and extract images and metada data from the NASA MERRA-2 datasets including Hurricane Ida 

### Libraries

In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from pathlib import Path
from huggingface_hub import snapshot_download, login
from pathlib import Path
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

from typing import List, Tuple, Dict, Optional


from tqdm import tqdm
from netCDF4 import Dataset, num2date
from datetime import datetime, timedelta

import xarray as xr

  from .autonotebook import tqdm as notebook_tqdm


### Configuration parameters

In [None]:
# ---- CONFIGURATION PARAMETERS----

@dataclass
class Config:

    # Directories paths
    HERE = Path(os.getcwd()).resolve()
    dataset_dir = OUT_DIR = HERE / "Database"
    output_dir = OUT_DIR = HERE / "Outputs"  
    
    surf_dir = dataset_dir / "surface_dataset"
    press_dir =  dataset_dir / "pressure_dataset"
    surf_dir_name = "surface_dataset"
    press_dir_name =  "pressure_dataset"
    trajectory_csv =  HERE / "Storm_trajectory.csv"
    hf_token = """  # Add your Hugging Face token here if needed"""

    # Output CSV names
    pres_csv: str = "pressure_index.csv"
    sfc_csv:  str = "surface_index.csv"
    sfc_flat_csv_name: str = "surface_images_flat.csv"
    pres_flat_csv_name: str = "pressure_images_flat.csv"
    track_csv: str = "track_estimate.csv"

    pres_csv_test: str = "test_pressure_index.csv"
    sfc_csv_test:  str = "test_surface_index.csv"
    sfc_flat_csv_name_test: str = "test_surface_images_flat.csv"
    pres_flat_csv_name_test: str = "test_pressure_images_flat.csv"
    track_csv_test: str = "test_track_estimate.csv"

    pres_csv_train: str = "train_pressure_index.csv"
    sfc_csv_train:  str = "train_surface_index.csv"
    sfc_flat_csv_name_train: str = "train_surface_images_flat.csv"
    pres_flat_csv_name_train: str = "train_pressure_images_flat.csv"
    track_csv_train: str = "train_track_estimate.csv"
     
     # File name filters
    pressure_token: str = "pres"
    surface_token: str  = "sfc"

    pres_vars: Tuple[str, ...] = ("U", "V", "T", "QV", "OMEGA", "PL", "CLOUD", "QI", "QL")
    sfc_vars: Tuple[str, ...]  = (
        "LON", "LAT", "U10M","V10M","T2M","QV2M","PS","SLP","TS","TQI","TQL","TQV",
        "GWETROOT","LAI","EFLUX","HFLUX","Z0M","PRECTOT","LWGEM","LWGAB",
        "LWTUP","SWGNT","SWTNT","PHIS","FRLAND","FROCEAN","FRACI"
    )
    
    # File creation options
    storm_id_from: str = "parent"      # "stem" -> filename stem, "parent" -> parent folder name
    nonhurricane_label: str = "non_hurricane"
    hurricane_label: str = "hurricane"
    default_storm: str = "Ida"

    # Image options
    resize_to: Optional[int] = None  # e.g., 224 to standardize; None keeps native
    cmap: str = "viridis"  

    # Other options   
    smooth_window: int = 0                # rolling mean window (0 = off)
    align_tolerance: str = "3h"  
    FILL = 1e14

### Download the datasets

In [3]:
# Download Hurrican dataset from Hugging Face (.nc files)
def download_ibm_nasa_hurricane_dataset(config: Config):
    """
    Downloads the Hugging Face dataset repo `ibm-nasa-geospatial/hurricane`
    into a local folder.

    Returns the absolute path to the downloaded snapshot.
    """
    out = Path(config.dataset_dir)
    out.mkdir(parents=True, exist_ok=True)
    out_ida = out / "Hurricane_Ida"
    out_ida.mkdir(parents=True, exist_ok=True)

    snapshot_path = snapshot_download(
        repo_id="ibm-nasa-geospatial/hurricane",
        repo_type="dataset",
        local_dir=str(out_ida),
        local_dir_use_symlinks=False, 
        allow_patterns=["**/*.nc", "**/*.csv", "**/*.txt"], 
        resume_download=True
    )
    print(f"Downloaded to: {out_ida}")
    return out

# Download complete Merra-2 dataset from Hugging Face (.nc files)
def download_ibm_nasa_prithvi_dataset(config: Config):
    
    login(token=config.hf_token)

    out = Path(config.dataset_dir)
    out.mkdir(parents=True, exist_ok=True)

    # Download pressure .nc files
    snapshot_path = snapshot_download(
        repo_id="ibm-nasa-geospatial/Prithvi-WxC-1.0-2300M",
        local_dir=str(out),
        local_dir_use_symlinks=False,  # copy files instead of symlinks  
        allow_patterns=["merra-2/MERRA_pres*.nc"], 
        resume_download=True,
        token=config.hf_token

    )
    # Download surface .nc files
    snapshot_path = snapshot_download(
        repo_id="ibm-nasa-geospatial/Prithvi-WxC-1.0-2300M",
        local_dir=str(out),
        local_dir_use_symlinks=False,  # copy files instead of symlinks  
        allow_patterns=["merra-2/MERRA2_sfc*.nc"], 
        resume_download=True,
        token=config.hf_token

    )
    print(f"Downloaded to: {out}")
    return out

# Download MERRA2 subset in range of dates and save in specific folder if correspond to Hurricane events from Hugging Face (.nc files)
def download_ibm_nasa_prithvi_subset(config: Config, 
                                     start_date: str, 
                                     end_date: str, 
                                     hurricane: str) :
    
    login(token=config.hf_token)

    file_types: List[str] = ["sfc", "pres"]

    # Convert dates
    start_dt = datetime.strptime(start_date, "%Y%m%d")
    end_dt = datetime.strptime(end_date, "%Y%m%d")

    base_repo = "ibm-nasa-geospatial/Prithvi-WxC-1.0-2300M"

    for file_type in file_types:
        # Select output subfolder based on file type
        subfolder = config.surf_dir_name if file_type == "sfc" else config.press_dir_name
        if (hurricane is not None and hurricane != ""):
            print(hurricane)
            subfolder = hurricane + "/" + subfolder
            print(subfolder)
        out_dir = Path(config.dataset_dir) / subfolder
        out_dir.mkdir(parents=True, exist_ok=True)

        date_patterns = []
        current = start_dt
        while current <= end_dt:            
            if  file_type == "pres":
                filename = f"MERRA_{file_type}_{current.strftime('%Y%m%d')}.nc"
            else:
                filename = f"MERRA2_{file_type}_{current.strftime('%Y%m%d')}.nc"

            if(hurricane is None or hurricane == ""):
                date_patterns.append(f"{filename}")
            date_patterns.append(f"merra-2/{filename}")
            #date_patterns.append(f"merra-2/{filename}")
            current += timedelta(days=1)
  
        print(f"Downloading {file_type} files from {start_date} to {end_date}...")
        snapshot_download(
            repo_id=base_repo,
            local_dir=str(out_dir),
            local_dir_use_symlinks=False,
            allow_patterns=date_patterns,
            resume_download=True,
            token=config.hf_token,
        )

    print(f"Downloaded to: {out_dir}")



### Extract images and build csv with the metada from .nc files

In [4]:
# Replace NaNs or thresholds with a fill value
def mask_fill(a: np.ndarray) -> np.ndarray:
    a = np.asarray(a, dtype=np.float32)
    a = np.where(np.isfinite(a), a, np.nan)
    a = np.where(np.abs(a) >= Config.FILL, np.nan, a)
    return a

# Takes a NumPy array of numerical data and normalizes it to a uint8 format.
def to_uint8_stretch(arr2d: np.ndarray) -> np.ndarray:
    x = mask_fill(arr2d)
    if not np.isfinite(x).any():
        return np.zeros_like(x, dtype=np.uint8)   # <-- fixed
    lo, hi = np.nanpercentile(x, 1), np.nanpercentile(x, 99)
    if not np.isfinite(lo): lo = np.nanmin(x)
    if not np.isfinite(hi): hi = np.nanmax(x)
    if hi <= lo: hi = lo + 1.0
    x = np.clip((x - lo) / (hi - lo), 0, 1)
    return (x * 255.0 + 0.5).astype(np.uint8)

# Save image and resize if the parameter is set
def save_png(u8_2d: np.ndarray, path: Path, resize_to: Optional[int], cmap: str):
    path.parent.mkdir(parents=True, exist_ok=True)
    if resize_to is not None:
        im = Image.fromarray(u8_2d)
        im = im.resize((resize_to, resize_to), Image.BILINEAR)
        u8_2d = np.asarray(im)
    plt.imsave(str(path), u8_2d, cmap=cmap)
 
# Decode time variable 
def decode_time(nc: Dataset) -> List[pd.Timestamp]:
    if "time" not in nc.variables:
        return []
    tvar = nc.variables["time"]
    vals = tvar[:]
    units = getattr(tvar, "units", None)
    calendar = getattr(tvar, "calendar", "standard")
    dts = num2date(vals, units=units, calendar=calendar)
    return [pd.Timestamp(str(dt)) for dt in dts]

# Get storm id from name of path to incluide in the csv files
def storm_id_for(path: Path, strategy: str) -> str:
    if strategy == "stem":
        return path.stem
    else:
        for parent in path.parents:
            if "hurricane" in parent.name.lower():
                return parent.name
    return ""

# Adjust spatial mean calculation missing or outlier values
def spatial_mean(x: np.ndarray) -> float:
    x = mask_fill(x)
    return float(np.nanmean(x))

# Normalilize longitude to [-180, 180]
def to_lon180(lonE):
    return ((lonE + 180.0) % 360.0) - 180.0

# Calculate parameters for additional data
def haversine_km(lat1, lon1, lat2, lon2):
    R = 6371.0
    phi1 = np.radians(lat1); phi2 = np.radians(lat2)
    dphi = np.radians(lat2 - lat1)
    dlmb = np.radians(lon2 - lon1)
    a = np.sin(dphi/2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(dlmb/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return R * c

# Calculate parameters for additional data
def bearing_deg(lat1, lon1, lat2, lon2):
    phi1 = np.radians(lat1); phi2 = np.radians(lat2)
    dlmb = np.radians(lon2 - lon1)
    y = np.sin(dlmb) * np.cos(phi2)
    x = np.cos(phi1)*np.sin(phi2) - np.sin(phi1)*np.cos(phi2)*np.cos(dlmb)
    return (np.degrees(np.arctan2(y, x)) + 360.0) % 360.0

# Return latitude and longitude using the Trajectory dataset building manually as a support of the model 
def get_lat_lon_at_time(df, timestamp, tolerance="3h"):    
   
    if "time" not in df.columns and "Timestamp" in df.columns:
        df = df.rename(columns={"Timestamp": "time"})
    if "storm_lat" in df.columns and "storm_lon" in df.columns:
        df = df.rename(columns={"storm_lat": "lat", "storm_lon": "lon"})

    df["time"] = pd.to_datetime(df["time"], errors="coerce").dt.tz_localize(None)
    df = df.dropna(subset=["time"]).sort_values("time")

    ts = pd.Timestamp(timestamp).tz_localize(None)
    probe = pd.DataFrame({"time": [ts]})

    # nearest match within tolerance
    m = pd.merge_asof(
        probe, df, on="time", direction="nearest",
        tolerance=pd.Timedelta(tolerance)
    )
    if m.empty or pd.isna(m.loc[0, "lat"]) or pd.isna(m.loc[0, "lon"]):
        return np.nan, np.nan

    lon = float(m.loc[0, "lon"])
    lon = (lon + 180.0) % 360.0 - 180.0

    return float(m.loc[0, "lat"]), lon

# Extract Pressure images and build CSV with metadata 
def export_pressure(cfg: Config, load_images: bool, list_excluded: List[str], test: bool, train: bool) -> str:
    nc_root = Path(cfg.dataset_dir)
    out_root = Path(cfg.output_dir)
    img_root = out_root / "images"
    out_root.mkdir(parents=True, exist_ok=True)
    img_root.mkdir(parents=True, exist_ok=True)
    trajectory_csv = Path(cfg.trajectory_csv)
    df_trajectory = pd.read_csv(trajectory_csv)

    nc_files = [p for p in nc_root.rglob("*.nc") if cfg.pressure_token in p.name.lower()]       
    nc_files = [f for f in nc_files if not any(excl in p.name for p in f.parents for excl in list_excluded)]
    

    rows: List[Dict] = []

    for pfile in tqdm(nc_files, desc="Pressure (.pres)"):
        with Dataset(str(pfile), "r") as nc:
            
            times = decode_time(nc)

            T = len(times)
            nlev = nc.dimensions["lev"].size if "lev" in nc.dimensions else 0
            lev_vals = nc.variables["lev"][:] if "lev" in nc.variables else np.arange(nlev)

            sid = storm_id_for(pfile, cfg.storm_id_from)
            present_vars = [v for v in cfg.pres_vars if v in nc.variables]

            for ti in range(T):
                ts = times[ti]
                for zi in range(nlev):
                    level_value = float(lev_vals[zi])  

                    lat_c = np.nan; lon_c = np.nan
                    lat_c, lon_c = get_lat_lon_at_time(df_trajectory,ts, cfg.align_tolerance)

                    row: Dict[str, object] = {
                        "Timestamp": ts,
                        "Level": level_value,
                        "Label": cfg.hurricane_label if sid != "" and lat_c is not None and lon_c is not None else cfg.nonhurricane_label,
                        "Storm_id": sid,
                        "Latitude": lat_c,
                        "Longitud": lon_c,   # normalized to [-180,180]
                    }

                    # ---- write images + means for requested vars ----
                    for v in present_vars:
                        var = nc.variables[v]
                        if var.ndim == 4:    # (time, lev, lat, lon)
                            arr2d = mask_fill(var[ti, zi, :, :])
                        elif var.ndim == 3:  # (time, lat, lon) (rare in pres set)
                            arr2d = mask_fill(var[ti, :, :])
                        elif var.ndim == 2:  # (lat, lon) static
                            arr2d = mask_fill(var[:, :])
                        else:
                            continue                        
                        img_name = f"{sid}_pres_{v}_lev{zi}_{ts.strftime('%Y%m%d')}.png"
                        img_path = img_root / sid / "pres" / v / img_name
                        if load_images and not img_path.exists():
                            img_u8 = to_uint8_stretch(arr2d)
                            save_png(img_u8, img_path, cfg.resize_to, cfg.cmap)
                        row[f"img_{v}"] = img_path.relative_to(out_root).as_posix()
                        row[f"pres_{v}_mean"] = spatial_mean(arr2d)

                    rows.append(row)

    if not rows:
        raise RuntimeError("No pressure rows produced. Check pressure_token/pres_vars.")
    df = pd.DataFrame(rows)
    base_cols = ["Timestamp", "Level", "Label", "Storm_id", "Latitude", "Longitud"]
    img_cols  = [c for c in df.columns if c.startswith("img_")]
    mean_cols = [c for c in df.columns if c.startswith("pres_")]
    df = df[base_cols + img_cols + mean_cols]

    if not test and not train:
        out_csv = Path(cfg.output_dir) / cfg.pres_csv
    elif test:
        out_csv = Path(cfg.output_dir) / cfg.pres_csv_test
    elif train:
        out_csv = Path(cfg.output_dir) / cfg.pres_csv_train
        
    df.to_csv(out_csv, index=False)
    print(f"[PRESSURE] Wrote CSV: {out_csv}  rows={len(df)}")
    print(f"[PRESSURE] Images under: {out_root/'images'}")
    return str(out_csv)



# Extract Surface images and build CSV with metadata 
def export_surface(cfg: Config, load_images: bool, list_excluded: List[str], test: bool, train: bool) -> str:
    nc_root = Path(cfg.dataset_dir)
    out_root = Path(cfg.output_dir)
    img_root = out_root / "images"
    out_root.mkdir(parents=True, exist_ok=True)
    img_root.mkdir(parents=True, exist_ok=True)
    trajectory_csv = Path(cfg.trajectory_csv)
    df_trajectory = pd.read_csv(trajectory_csv)

    nc_files = [p for p in nc_root.rglob("*.nc") if cfg.surface_token in p.name.lower()]
    nc_files = [f for f in nc_files if not any(excl in p.name for p in f.parents for excl in list_excluded)]
    
    

    rows: List[Dict] = []

    for sfile in tqdm(nc_files, desc="Surface (.sfc)"):
        with Dataset(str(sfile), "r") as nc:
            times = decode_time(nc)
            T = len(times)
            sid = storm_id_for(sfile, cfg.storm_id_from)

            present_vars = [v for v in cfg.sfc_vars if v in nc.variables]
            static_cache: Dict[str, str] = {}
      
            for ti in range(T):
                ts = times[ti]
                
                lat_c = np.nan; lon_c = np.nan
                lat_c, lon_c = get_lat_lon_at_time(df_trajectory,ts, cfg.align_tolerance)
                
                row: Dict[str, object] = {"Timestamp": ts, "Latitude": lat_c, "Longitud": lon_c, "Storm_id": sid}

                for v in present_vars:
                    var = nc.variables[v]
                    if var.ndim == 3:   # (time,lat,lon)
                        arr2d = mask_fill(var[ti, :, :])
                        img_name = f"{sid}_sfc_{v}_{ts.strftime('%Y%m%d')}.png"
                        img_path = img_root / sid / "sfc" / v / img_name
                        if load_images:
                            img_u8 = to_uint8_stretch(arr2d)                            
                            save_png(img_u8, img_path, cfg.resize_to, cfg.cmap)
                        row[f"img_{v}"] = img_path.relative_to(out_root).as_posix()
                        row[f"sfc_{v}_mean"] = spatial_mean(arr2d)
                    elif var.ndim == 2: # static field
                        if v not in static_cache:
                            arr2d = mask_fill(var[:, :])                            
                            img_name = f"{sid}_sfc_{v}_static_{ts.strftime('%Y%m%d')}.png"
                            img_path = img_root / sid / "sfc" / v / img_name
                            if load_images and not img_path.exists():
                                img_u8 = to_uint8_stretch(arr2d)
                                save_png(img_u8, img_path, cfg.resize_to, cfg.cmap)
                            static_cache[v] = img_path.relative_to(out_root).as_posix()
                        row[f"img_{v}"] = static_cache[v]
                        row[f"sfc_{v}_mean"] = spatial_mean(nc.variables[v][:, :])
                    else:
                        continue

                rows.append(row)

    if not rows:
        raise RuntimeError("No surface rows produced")
    df = pd.DataFrame(rows)

    base_cols = ["Timestamp", "Storm_id", "Latitude", "Longitud"]
    img_cols  = [c for c in df.columns if c.startswith("img_")]
    mean_cols = [c for c in df.columns if c.startswith("sfc_")]
    df = df[base_cols + img_cols + mean_cols]

    if not test and not train:
        out_csv = Path(cfg.output_dir) / cfg.sfc_csv
    elif test:
        out_csv = Path(cfg.output_dir) / cfg.sfc_csv_test
    elif train:
        out_csv = Path(cfg.output_dir) / cfg.sfc_csv_train

    df.to_csv(out_csv, index=False)
    print(f"[SURFACE]  Wrote CSV: {out_csv}  rows={len(df)}")
    print(f"[SURFACE]  Images under: {out_root/'images'}")
    return str(out_csv)


# Extract Pressure images and its information in a CVS for verification of the data per image
def make_pressure_image_flat_csv(cfg, list_excluded: List[str] = [], test: bool = False, train: bool = False) -> str:
  
    nc_root = Path(cfg.dataset_dir)
    out_root = Path(cfg.output_dir)
    img_root = out_root / "images"
    rows = []

    pres_files = [p for p in Path(nc_root).rglob("*.nc") if cfg.pressure_token in p.name.lower()]
    pres_files = [f for f in pres_files if not any(excl in p.name for p in f.parents for excl in list_excluded)]
    
    for pfile in pres_files:
        with Dataset(str(pfile), "r") as nc:
            times = decode_time(nc)
            T = len(times)
            if "lev" not in nc.dimensions:
                continue
            nlev = nc.dimensions["lev"].size
            sid = storm_id_for(Path(pfile), cfg.storm_id_from)
            present_vars = [v for v in cfg.pres_vars if v in nc.variables]

            for ti in range(T):
                ts = times[ti]
                for zi in range(nlev):
                    values = {}
                    for v in present_vars:
                        var = nc.variables[v]
                        if var.ndim == 4:          # (time, lev, lat, lon)
                            values[v] = spatial_mean(var[ti, zi, :, :])
                        elif var.ndim == 3:        # (time, lat, lon) 
                            values[v] = spatial_mean(var[ti, :, :])
                        elif var.ndim == 2:        # (lat, lon) static
                            values[v] = spatial_mean(var[:, :])
                        else:
                            values[v] = np.nan

                    for v in present_vars:
                        img_name = f"{sid}_pres_{v}_lev{zi}_{ts.strftime('%Y%m%d')}.png"
                        img_path = img_root / sid / "pres" / v / img_name
                        if not img_path.exists():
                            continue
                        rel = img_path.relative_to(out_root).as_posix()

                        row = {"Image_name": rel, "Timestamp": ts}
                        for vv in present_vars:
                            row[vv] = values.get(vv, np.nan)
                        rows.append(row)

    if not rows:
        raise RuntimeError("No pressure images found.")
    df = pd.DataFrame(rows)
    var_cols = [c for c in df.columns if c not in ("Image_name", "Timestamp")]
    df = df[["Image_name", "Timestamp"] + var_cols]   

    if not test and not train:
        out_csv = Path(cfg.output_dir) / cfg.pres_flat_csv_name
    elif test:
        out_csv = Path(cfg.output_dir) / cfg.pres_flat_csv_name_test
    elif train:
        out_csv = Path(cfg.output_dir) / cfg.pres_flat_csv_name_train
        
    df.to_csv(out_csv, index=False)
    print(f"[PRESSURE-IMAGES-FLAT] {out_csv}  rows={len(df)}")
    return str(out_csv)


# Extract Surface images and its information in a CVS for verification of the data per image
def make_surface_image_flat_csv(cfg, list_excluded: List[str] = [], test: bool = False, train: bool = False) -> str:

    nc_root = Path(cfg.dataset_dir)
    out_root = Path(cfg.output_dir)
    img_root = out_root / "images"
    rows = []

    sfc_files = [p for p in Path(nc_root).rglob("*.nc") if cfg.surface_token in p.name.lower()]
    sfc_files = [f for f in sfc_files if not any(excl in p.name for p in f.parents for excl in list_excluded)]

    for sfile in sfc_files:
        with Dataset(str(sfile), "r") as nc:
            times = decode_time(nc)
            T = len(times)
            sid = storm_id_for(Path(sfile), cfg.storm_id_from)
            present_vars = [v for v in cfg.sfc_vars if v in nc.variables]

            static_val = {}
            static_rel = {}
            for v in present_vars:
                var = nc.variables[v]
                if var.ndim == 2:  # static field
                    static_val[v] = spatial_mean(var[:, :])
                    img_name = f"{sid}_sfc_{v}_static_{times[0].strftime('%Y%m%d')}.png"
                    img_path = img_root / sid / "sfc" / v / img_name
                    if img_path.exists():
                        static_rel[v] = img_path.relative_to(out_root).as_posix()
      
            for ti in range(T):
                ts = times[ti]
       
                values = {}
                for v in present_vars:
                    var = nc.variables[v]
                    if var.ndim == 3:      # (time, lat, lon)
                        values[v] = spatial_mean(var[ti, :, :])
                    elif var.ndim == 2:    # static (lat, lon)
                        values[v] = static_val.get(v, np.nan)
                    else:
                        values[v] = np.nan
                
                for v in present_vars:
                    var = nc.variables[v]
                    if var.ndim == 3:
                        img_name = f"{sid}_sfc_{v}_{ts.strftime('%Y%m%d')}.png"
                        img_path = img_root / sid / "sfc" / v / img_name
                        if not img_path.exists():
                            continue
                        rel = img_path.relative_to(out_root).as_posix()
                    elif var.ndim == 2:
                        rel = static_rel.get(v)
                        if rel is None:
                            continue
                    else:
                        continue

                    row = {"Image_name": rel, "Timestamp": ts}
                    # Fill every variable column with its value for this timestamp
                    for vv in present_vars:
                        row[vv] = values.get(vv, np.nan)
                    rows.append(row)

    if not rows:
        raise RuntimeError("No surface images found.")
    df = pd.DataFrame(rows)
    var_cols = [c for c in df.columns if c not in ("Image_name", "Timestamp")]
    df = df[["Image_name", "Timestamp"] + var_cols]

    if not test and not train:
        out_csv = Path(cfg.output_dir) / cfg.sfc_flat_csv_name
    if test:
        out_csv = Path(cfg.output_dir) / cfg.sfc_flat_csv_name_test
    if train:
        out_csv = Path(cfg.output_dir) / cfg.sfc_flat_csv_name_train

    df.to_csv(out_csv, index=False)
    print(f"[SURFACE-IMAGES-FLAT] {out_csv}  rows={len(df)}")
    return str(out_csv)


# Extract data and build time index from .nc files
def index_times(nc_paths: List[Path]) -> pd.DataFrame:
    rows = []
    for p in nc_paths:
        with Dataset(str(p), "r") as nc:
            times = decode_time(nc)
        for i, ts in enumerate(times):
            rows.append({"file": str(p), "t_index": i, "time": ts})
    df = pd.DataFrame(rows).sort_values("time").reset_index(drop=True)
    return df

# Build tracking data of the records from surface and pressure files and trajectory csv
def build_tracking(cfg: Config, list_excluded: List[str], test: bool, train: bool) -> pd.DataFrame:    
    surface_dir=cfg.surf_dir
    pressure_dir=cfg.press_dir
    trajectory_csv = Path(cfg.trajectory_csv)
    df_trajectory = pd.read_csv(trajectory_csv)
    nc_root = Path(cfg.dataset_dir)


    sfc_files = sorted([p for p in Path(nc_root).rglob("*.nc") if cfg.surface_token in p.name.lower()])
    sfc_files = [f for f in sfc_files if not any(excl in p.name for p in f.parents for excl in list_excluded)]

    if not sfc_files:
        raise FileNotFoundError(f"No surface files with token '{cfg.surface_token}' under {surface_dir}")

    pres_files = []
    if pressure_dir is not None:
        pres_files = sorted([p for p in Path(nc_root).rglob("*.nc") if cfg.pressure_token in p.name.lower()])

    s_index = index_times(sfc_files)  # file, t_index, time
    p_index = index_times(pres_files) if pres_files else pd.DataFrame(columns=["file","t_index","time"])

    # Align pressure to surface by nearest time within tolerance
    s_index = s_index.sort_values("time")
    if not p_index.empty:
        p_index = p_index.sort_values("time")
        merged = pd.merge_asof(
            s_index, p_index, on="time", direction="nearest",
            suffixes=("", "_pres"),
            tolerance=pd.Timedelta(cfg.align_tolerance)
        )
    else:
        merged = s_index.copy()
        merged["file_pres"] = np.nan
        merged["t_index_pres"] = np.nan

    rows = []
    cur_sfc_path, cur_pres_path = None, None
    sfc_nc, pres_nc = None, None

    try:
        for _, r in merged.iterrows():           
            if r["file"] != cur_sfc_path:
                if sfc_nc is not None:
                    sfc_nc.close()
                cur_sfc_path = r["file"]
                sfc_nc = Dataset(cur_sfc_path, "r")

            t_s = int(r["t_index"])
            ts = pd.Timestamp(r["time"])
            sid = storm_id_for(Path(r["file"]), cfg.storm_id_from)

            lat_s, lon_s, = get_lat_lon_at_time(df_trajectory,ts,cfg.align_tolerance)
            
            rows.append({
                "time": ts,
                "Label": cfg.hurricane_label if sid != "" and lat_s is not None and lon_s is not None else cfg.nonhurricane_label,
                "Storm_id": sid,
                "lat": lat_s,
                "lon": lon_s,               
                
            })
    finally:
        if sfc_nc is not None:
            sfc_nc.close()
        if pres_nc is not None:
            pres_nc.close()

    # Compute speed and heading across the concatenated timeline
    df = pd.DataFrame(rows).sort_values("time").reset_index(drop=True)
    spd_ms = [np.nan]; spd_kt = [np.nan]; hdg = [np.nan]
    for i in range(1, len(df)):
        km = haversine_km(df.loc[i-1, "lat"], df.loc[i-1, "lon"], df.loc[i, "lat"], df.loc[i, "lon"])
        dt_h = (df.loc[i, "time"] - df.loc[i-1, "time"]).total_seconds() / 3600.0
        if dt_h <= 0 or not np.isfinite(km):
            spd_ms.append(np.nan); spd_kt.append(np.nan); hdg.append(np.nan); continue
        ms = (km * 1000.0) / (dt_h * 3600.0)
        kt = ms * 1.94384
        brg = bearing_deg(df.loc[i-1, "lat"], df.loc[i-1, "lon"], df.loc[i, "lat"], df.loc[i, "lon"])
        spd_ms.append(ms); spd_kt.append(kt); hdg.append(brg)

    df["speed_ms"] = spd_ms
    df["speed_kt"] = spd_kt
    df["heading_deg"] = hdg

    # Optional smoothing (simple rolling mean)
    if cfg.smooth_window and cfg.smooth_window > 1:
        for col in ["lat", "lon", "speed_ms", "speed_kt", "heading_deg"]:
            df[col] = df[col].rolling(cfg.smooth_window, center=True, min_periods=1).mean()

   
    if not test and not train:
        out_csv = Path(cfg.output_dir) / cfg.track_csv
    if test:
        out_csv = Path(cfg.output_dir) / cfg.track_csv_test
    if train:
        out_csv = Path(cfg.output_dir) / cfg.track_csv_train

    df.to_csv(out_csv, index=False)

    return df

### Functions to call the extraction of images and metadata CSVs 

In [5]:

def download_hurricane_dataset():
    config = Config()
    download_ibm_nasa_hurricane_dataset(config)

    print("Dataset download completed.")

    return 

def download_prithvi_dataset():
    config = Config()
    download_ibm_nasa_prithvi_dataset(config)
    print("Dataset download completed.")
    return 


def download_prithvi_dataset_times(start_date: str, end_date: str, hurricane: str = ""):                            
    config = Config()
    download_ibm_nasa_prithvi_subset(config, start_date, end_date, hurricane)
    print("Dataset download completed.")
    return 

def extract_images_and_metadata(load_images: bool = True, generate_flat_csvs: bool = True, excluded_list: List[str] = []):
    config = Config()
    pres_csv_path = export_pressure(config, load_images, excluded_list, test=False, train=False)
    print(f"Pressure files exported")
    sfc_csv_path  = export_surface(config, load_images, excluded_list, test=False, train=False)
    print(f"Surface files exported")

    if generate_flat_csvs:
        pressure_flat = make_pressure_image_flat_csv(config, excluded_list, test=False, train=False)
        print(f"Pressure flat files exported")
        surface_flat = make_surface_image_flat_csv(config, excluded_list, test=False, train=False)
        print(f"Surface flat files exported")
        
    print("Full images and metadata extraction completed.")


def extract_images_and_metadata_test(load_images: bool = True, generate_flat_csvs: bool = True, excluded_list: List[str] = []):
    config = Config()
    pres_csv_path = export_pressure(config, load_images, excluded_list, test=True, train=False)
    print(f"Pressure files exported")
    sfc_csv_path  = export_surface(config, load_images, excluded_list, test=True, train=False)
    print(f"Surface files exported")

    if generate_flat_csvs:
        pressure_flat = make_pressure_image_flat_csv(config, excluded_list, test=True, train=False)
        print(f"Pressure flat files exported")
        surface_flat = make_surface_image_flat_csv(config, excluded_list, test=True , train=False)
        print(f"Surface flat files exported")
        
    print("Test images and metadata extraction completed.")

def extract_images_and_metadata_train(load_images: bool = True, generate_flat_csvs: bool = True, excluded_list: List[str] = []):
    config = Config()
    pres_csv_path = export_pressure(config, load_images, excluded_list, test=False, train=True)
    print(f"Pressure files exported")
    sfc_csv_path  = export_surface(config, load_images, excluded_list, test=False, train=True)
    print(f"Surface files exported")

    if generate_flat_csvs:
        pressure_flat = make_pressure_image_flat_csv(config, excluded_list, test=False, train=True)
        print(f"Pressure flat files exported")
        surface_flat = make_surface_image_flat_csv(config, excluded_list, test=False, train=True)
        print(f"Surface flat files exported")
        
    print("Train images and metadata extraction completed.")

def generate_hurricane_track(excluded_list: List[str] = []):
    config = Config()
    track = build_tracking(config, excluded_list, test=False, train=False)
    print("Full longitud and latitud track hurricane saved")

def generate_hurricane_track_test(excluded_list: List[str] = []):
    config = Config()
    track = build_tracking(config, excluded_list, test=True, train=False)
    print("Test longitud and latitud track hurricane saved")

def generate_hurricane_track_train(excluded_list: List[str] = []):
    config = Config()
    track = build_tracking(config, excluded_list, test=False, train=True)
    print("Train longitud and latitud track hurricane saved")



### Call the functions to download, extract images and metadata, and generate CSV files for specific hurricane events or cases without hurricanes

In [6]:

download_hurricane_dataset()
download_prithvi_dataset_times("20201112", "20201115", "Hurricane_Iota")
download_prithvi_dataset_times("20200101", "20200104")
download_prithvi_dataset_times("20200201", "20200203")
download_prithvi_dataset_times("20201025", "20201028", "Hurricane_Goni")
download_prithvi_dataset_times("20200819", "20200823", "Hurricane_Laura")
download_prithvi_dataset_times("20200829", "20200902", "Hurricane_Haishen")

download_prithvi_dataset_times("20201004", "20201008", "Hurricane_Delta")
download_prithvi_dataset_times("20201030", "20201103", "Hurricane_Eta")
download_prithvi_dataset_times("20200911", "20200915", "Hurricane_Teddy")
download_prithvi_dataset_times("20200928", "20201002", "Hurricane_Marie")




test_list: List[str] = []
extract_images_and_metadata(True, False, excluded_list=test_list)
generate_hurricane_track(excluded_list=test_list)

test_list: List[str] = ["Hurricane_Iota", "Hurricane_Goni", "Hurricane_Delta", "Hurricane_Eta", "Hurricane_Teddy", "Hurricane_Marie"]
extract_images_and_metadata_test(True, False, excluded_list=test_list)
generate_hurricane_track_test(excluded_list=test_list)

train_list: List[str] = ["Hurricane_Laura", "Hurricane_Haishen", "Hurricane_Ida"]
extract_images_and_metadata_train(True, False, excluded_list=train_list)
generate_hurricane_track_train(excluded_list=train_list)

Fetching 6 files: 100%|██████████| 6/6 [00:17<00:00,  2.96s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Ida
Dataset download completed.
Hurricane_Iota
Hurricane_Iota/surface_dataset
Downloading sfc files from 20201112 to 20201115...


Fetching 4 files: 100%|██████████| 4/4 [00:06<00:00,  1.58s/it]


Hurricane_Iota
Hurricane_Iota/pressure_dataset
Downloading pres files from 20201112 to 20201115...


Fetching 4 files: 100%|██████████| 4/4 [00:19<00:00,  4.80s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Iota\pressure_dataset
Dataset download completed.
Downloading sfc files from 20200101 to 20200104...


Fetching 4 files: 100%|██████████| 4/4 [00:05<00:00,  1.28s/it]


Downloading pres files from 20200101 to 20200104...


Fetching 4 files: 100%|██████████| 4/4 [00:24<00:00,  6.08s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\pressure_dataset
Dataset download completed.
Downloading sfc files from 20200201 to 20200203...


Fetching 3 files: 100%|██████████| 3/3 [00:04<00:00,  1.55s/it]


Downloading pres files from 20200201 to 20200203...


Fetching 3 files: 100%|██████████| 3/3 [00:14<00:00,  4.84s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\pressure_dataset
Dataset download completed.
Hurricane_Goni
Hurricane_Goni/surface_dataset
Downloading sfc files from 20201025 to 20201028...


Fetching 4 files: 100%|██████████| 4/4 [00:05<00:00,  1.32s/it]


Hurricane_Goni
Hurricane_Goni/pressure_dataset
Downloading pres files from 20201025 to 20201028...


Fetching 4 files: 100%|██████████| 4/4 [00:19<00:00,  4.82s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Goni\pressure_dataset
Dataset download completed.
Hurricane_Laura
Hurricane_Laura/surface_dataset
Downloading sfc files from 20200819 to 20200823...


Fetching 5 files: 100%|██████████| 5/5 [00:06<00:00,  1.20s/it]


Hurricane_Laura
Hurricane_Laura/pressure_dataset
Downloading pres files from 20200819 to 20200823...


Fetching 5 files: 100%|██████████| 5/5 [00:24<00:00,  4.89s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Laura\pressure_dataset
Dataset download completed.
Hurricane_Haishen
Hurricane_Haishen/surface_dataset
Downloading sfc files from 20200829 to 20200902...


Fetching 5 files: 100%|██████████| 5/5 [00:05<00:00,  1.19s/it]


Hurricane_Haishen
Hurricane_Haishen/pressure_dataset
Downloading pres files from 20200829 to 20200902...


Fetching 5 files: 100%|██████████| 5/5 [00:22<00:00,  4.60s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Haishen\pressure_dataset
Dataset download completed.
Hurricane_Delta
Hurricane_Delta/surface_dataset
Downloading sfc files from 20201004 to 20201008...


Fetching 5 files: 100%|██████████| 5/5 [00:05<00:00,  1.18s/it]


Hurricane_Delta
Hurricane_Delta/pressure_dataset
Downloading pres files from 20201004 to 20201008...


Fetching 5 files: 100%|██████████| 5/5 [00:23<00:00,  4.75s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Delta\pressure_dataset
Dataset download completed.
Hurricane_Eta
Hurricane_Eta/surface_dataset
Downloading sfc files from 20201030 to 20201103...


Fetching 5 files: 100%|██████████| 5/5 [00:06<00:00,  1.20s/it]


Hurricane_Eta
Hurricane_Eta/pressure_dataset
Downloading pres files from 20201030 to 20201103...


Fetching 5 files: 100%|██████████| 5/5 [00:23<00:00,  4.73s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Eta\pressure_dataset
Dataset download completed.
Hurricane_Teddy
Hurricane_Teddy/surface_dataset
Downloading sfc files from 20200911 to 20200915...


Fetching 5 files: 100%|██████████| 5/5 [00:06<00:00,  1.37s/it]


Hurricane_Teddy
Hurricane_Teddy/pressure_dataset
Downloading pres files from 20200911 to 20200915...


Fetching 5 files: 100%|██████████| 5/5 [00:24<00:00,  4.93s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Teddy\pressure_dataset
Dataset download completed.
Hurricane_Marie
Hurricane_Marie/surface_dataset
Downloading sfc files from 20200928 to 20201002...


Fetching 5 files: 100%|██████████| 5/5 [00:06<00:00,  1.36s/it]


Hurricane_Marie
Hurricane_Marie/pressure_dataset
Downloading pres files from 20200928 to 20201002...


Fetching 5 files: 100%|██████████| 5/5 [00:24<00:00,  4.83s/it]


Downloaded to: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Database\Hurricane_Marie\pressure_dataset
Dataset download completed.


  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  v

[PRESSURE] Wrote CSV: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\pressure_index.csv  rows=5376
[PRESSURE] Images under: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\images
Pressure files exported


  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  v

[SURFACE]  Wrote CSV: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\surface_index.csv  rows=384
[SURFACE]  Images under: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\images
Surface files exported
Full images and metadata extraction completed.
Full longitud and latitud track hurricane saved


  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  v

[PRESSURE] Wrote CSV: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\test_pressure_index.csv  rows=2240
[PRESSURE] Images under: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\images
Pressure files exported


  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  v

[SURFACE]  Wrote CSV: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\test_surface_index.csv  rows=160
[SURFACE]  Images under: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\images
Surface files exported
Test images and metadata extraction completed.
Test longitud and latitud track hurricane saved


  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  v

[PRESSURE] Wrote CSV: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\train_pressure_index.csv  rows=3920
[PRESSURE] Images under: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\images
Pressure files exported


  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  vals = tvar[:]
  vals = tvar[:]
cannot be safely cast to variable data type
  v

[SURFACE]  Wrote CSV: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\train_surface_index.csv  rows=280
[SURFACE]  Images under: C:\Users\adrid\OneDrive\Documents\Master UMiami AI\Topic DS - Computer vision\Final Project\Outputs\images
Surface files exported
Train images and metadata extraction completed.
Train longitud and latitud track hurricane saved
