In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys

folder_path = '/content/drive/MyDrive/Colab Notebooks/DLRN101021/'
sys.path.append(folder_path)

In [5]:
!pip install iterative-stratification
COLOR_TO_CLASS_NAME_MAP = {
    (255, 255, 255): "background",
    (1, 88, 255): "sky",
    (156, 76, 30): "rough trail",
    (178, 176, 153): "smooth trail",
    (128, 255, 0): "traversable grass",
    (40, 80, 0): "high vegetation",
    (0, 160, 0): "non-traversable low vegetation",
    (255, 0, 128): "puddle",
    (255, 0, 0): "obstacle",
}

CLASS_CODE_TO_CLASS_NAME_MAP = {
    4: "puddle",
    5: "obstacle",
    0: "background",
    6: "non-traversable low vegetation",
    8: "sky",
    2: "traversable grass",
    1: "smooth trail",
    3: "rough trail",
    7: "high vegetation",
}



In [6]:
# CONFIGURAZIONE
import torch.optim as optim
from torchvision import models
import torch
import numpy as np

NUM_CLASSES = 9
DATASET_DIR = '/content/drive/MyDrive/training_sets/train/'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")
if DEVICE.type == "cuda":
    print(f"GPU Name: {torch.cuda.get_device_name(DEVICE)}")

Using device: cuda
GPU Name: NVIDIA GeForce GTX 1650 Ti


In [7]:
import random
import numpy as np
from PIL import Image
import os
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
import json

# sample male etichettati
excluded_dirs = {
    "0007", "0009", "0090", "0095", "0101", "0104", "0105",
    "0162", "0163", "0284", "0305", "0306", "0307", "0308", "0309", "0310", "0311",
    "0351", "0372", "0373", "0376",
    "0498", "0499", "0500", "0501", "0526", "0527", "0530", "0531", "0542",
    "0564", "0585", "0586", "0587", "0588", "0589", "0590",
    #seguono nuovo immagini da escludere
    "0000", "0001", "0052"
}

# Elenco di cartelle numeriche valide
all_dirs = sorted([
    d for d in os.listdir(DATASET_DIR)
    if os.path.isdir(os.path.join(DATASET_DIR, d)) and d.isdigit() and d not in excluded_dirs
])

#  COSTRUZIONE DELLA MATRICE MULTILABEL
def build_presence_matrix(dir_list, n_classes=9):
    """Restituisce un np.array (n_sample, n_classes) con booleani indicanti la presenza o meno della classe"""
    presence = np.zeros((len(dir_list), n_classes), dtype=int)

    for idx, d in enumerate(dir_list):
        label_path = os.path.join(DATASET_DIR, d, "labels.png")
        lbl = np.array(Image.open(label_path))
        uniq = np.unique(lbl)
        presence[idx, uniq] = 1          # segna le classi presenti
    return presence

Y = build_presence_matrix(all_dirs, NUM_CLASSES)

#  MULTILABEL STRATIFIED SPLIT
msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
train_idx, val_idx = next(msss.split(np.zeros(len(all_dirs)), Y))

train_ids = [all_dirs[i] for i in train_idx]
val_ids   = [all_dirs[i] for i in val_idx]

print("Campioni:", len(train_ids), "train |", len(val_ids), "val")

Campioni: 714 train | 177 val


In [9]:
# Salvataggio split nel caso si voglia riutilizzare
val_ids_clean = [os.path.basename(path) for path in val_ids]

with open("val_ids.json", "w") as f:
    json.dump(val_ids_clean, f, indent=4)