In [None]:
!pip install -q timm albumentations==1.4.3 torchmetrics

import os, glob, random
from pathlib import Path
from collections import Counter

import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
import timm
from PIL import Image
from sklearn.model_selection import StratifiedKFold
import albumentations as A
from albumentations.pytorch import ToTensorV2


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/137.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.0/137.0 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.9/981.9 kB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m113.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m93.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m60.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!unzip -q /content/dataset_colores.zip -d /content/dataset_color

In [None]:
ROOT = Path('/content/dataset_color/dataset_colores_clasificados_v2')
PARTS = ['upper', 'lower']
COLORS = sorted([p.name for p in (ROOT / PARTS[0]).iterdir() if p.is_dir()])  # 12 colores

def build_paths_labels(root):
    paths, labels = [], []
    for part in PARTS:
        for color in COLORS:
            for img in glob.glob(str(root / part / color / '*')):
                paths.append(img)
                labels.append(COLORS.index(color))
    return np.array(paths), np.array(labels)

paths, labels = build_paths_labels(ROOT)
print('Total imágenes:', len(paths))
print('Distribución por clase:', Counter(labels))
num_classes = len(COLORS)


Total imágenes: 9274
Distribución por clase: Counter({np.int64(9): 2075, np.int64(3): 2037, np.int64(8): 1559, np.int64(1): 731, np.int64(4): 700, np.int64(11): 394, np.int64(5): 356, np.int64(2): 342, np.int64(10): 330, np.int64(7): 309, np.int64(0): 285, np.int64(6): 156})


In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
train_idx, val_idx = next(skf.split(paths, labels))  # primera fold
train_paths, val_paths = paths[train_idx], paths[val_idx]
train_labels, val_labels = labels[train_idx], labels[val_idx]


In [None]:
IMG_SIZE = 224

train_tf = A.Compose([
    A.RandomResizedCrop(IMG_SIZE, IMG_SIZE, scale=(0.6, 1.0), ratio=(0.8, 1.2)),
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.15, rotate_limit=20, p=0.8, border_mode=0),
    A.Perspective(scale=(0.02, 0.05), p=0.3),
    A.RandomBrightnessContrast(brightness_limit=0.08, contrast_limit=0.08, p=0.2),  # leve
    A.GaussianBlur(blur_limit=3, p=0.1),
    A.ISONoise(p=0.1),
    A.CoarseDropout(max_holes=8, max_height=IMG_SIZE//10, max_width=IMG_SIZE//10, p=0.5),
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2(),
])

val_tf = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
    ToTensorV2(),
])


  A.RandomResizedCrop(IMG_SIZE, IMG_SIZE, scale=(0.6, 1.0), ratio=(0.8, 1.2)),


In [None]:
class ColorDataset(Dataset):
    def __init__(self, paths, labels, tfm):
        self.paths = paths
        self.labels = labels
        self.tfm = tfm
    def __len__(self):
        return len(self.paths)
    def __getitem__(self, i):
        img = np.array(Image.open(self.paths[i]).convert('RGB'))
        img = self.tfm(image=img)['image']
        return img, int(self.labels[i])

train_ds = ColorDataset(train_paths, train_labels, train_tf)
val_ds   = ColorDataset(val_paths, val_labels, val_tf)


In [None]:
# pesos por clase inversamente proporcionales a su frecuencia
class_counts = np.bincount(train_labels, minlength=num_classes)
class_weights = 1.0 / (class_counts + 1e-6)
sample_weights = class_weights[train_labels]
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

BATCH_SIZE = 512
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2, pin_memory=True)
val_dl   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

# Para la loss
class_weights_t = torch.tensor(class_weights, dtype=torch.float32)


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = timm.create_model('resnet101', pretrained=True, num_classes=num_classes)
# Warm-up: congelamos todas menos la FC
for name, p in model.named_parameters():
    if 'fc' not in name:
        p.requires_grad = False
model.to(device)

# pérdida (elige UNA)
use_focal = False
if use_focal:
    # Focal Loss simple
    class FocalLoss(nn.Module):
        def __init__(self, alpha=None, gamma=2.0):
            super().__init__()
            self.alpha = alpha
            self.gamma = gamma
        def forward(self, logits, targets):
            ce = nn.functional.cross_entropy(logits, targets, weight=self.alpha, reduction='none')
            pt = torch.exp(-ce)
            loss = ((1 - pt) ** self.gamma) * ce
            return loss.mean()
    criterion = FocalLoss(alpha=class_weights_t.to(device), gamma=2.0)
else:
    criterion = nn.CrossEntropyLoss(weight=class_weights_t.to(device), label_smoothing=0.05)

optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                              lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/179M [00:00<?, ?B/s]

In [None]:
from torchmetrics.classification import MulticlassF1Score, MulticlassAccuracy
import torch.nn.functional as F
from tqdm import tqdm

def run_epoch(dl, train=True):
    model.train(train)
    loss_sum, n = 0.0, 0
    f1 = MulticlassF1Score(num_classes=num_classes, average='macro').to(device)
    acc = MulticlassAccuracy(num_classes=num_classes, average='macro').to(device)

    for x, y in tqdm(dl, disable=False):
        x, y = x.to(device), y.to(device)
        with torch.set_grad_enabled(train):
            logits = model(x)
            loss = criterion(logits, y)
        if train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        loss_sum += loss.item() * x.size(0)
        n += x.size(0)
        f1.update(logits, y)
        acc.update(logits, y)

    return loss_sum / n, f1.compute().item(), acc.compute().item()

best_f1 = 0
EPOCHS_WARM = 3
EPOCHS_FT = 20

print('--- Warm-up (solo la cabeza) ---')
for epoch in range(EPOCHS_WARM):
    tr_loss, tr_f1, tr_acc = run_epoch(train_dl, True)
    va_loss, va_f1, va_acc = run_epoch(val_dl, False)
    scheduler.step()
    print(f"[W{epoch+1}] train loss {tr_loss:.4f} f1 {tr_f1:.3f}  | val loss {va_loss:.4f} f1 {va_f1:.3f}")

# Unfreeze para fine‑tuning completo
for p in model.parameters():
    p.requires_grad = True
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS_FT)

print('--- Fine-tuning ---')
patience, patience_cnt = 5, 0
for epoch in range(EPOCHS_FT):
    tr_loss, tr_f1, tr_acc = run_epoch(train_dl, True)
    va_loss, va_f1, va_acc = run_epoch(val_dl, False)
    scheduler.step()
    print(f"[{epoch+1}] train loss {tr_loss:.4f} f1 {tr_f1:.3f} | val loss {va_loss:.4f} f1 {va_f1:.3f}")

    if va_f1 > best_f1:
        best_f1 = va_f1
        patience_cnt = 0
        torch.save(model.state_dict(), 'best_resnet101_color.pth')
    else:
        patience_cnt += 1
        if patience_cnt >= patience:
            print("Early stopping")
            break

print("Best macro-F1:", best_f1)


--- Warm-up (solo la cabeza) ---


100%|██████████| 232/232 [00:17<00:00, 13.08it/s]
100%|██████████| 58/58 [00:03<00:00, 15.63it/s]


[W1] train loss 2.2909 f1 0.016  | val loss 3.1058 f1 0.010


100%|██████████| 232/232 [00:16<00:00, 13.73it/s]
100%|██████████| 58/58 [00:03<00:00, 15.55it/s]


[W2] train loss 2.2135 f1 0.069  | val loss 3.1714 f1 0.054


100%|██████████| 232/232 [00:16<00:00, 13.90it/s]
100%|██████████| 58/58 [00:03<00:00, 15.41it/s]


[W3] train loss 2.1293 f1 0.083  | val loss 3.1878 f1 0.020
--- Fine-tuning ---


100%|██████████| 232/232 [00:50<00:00,  4.59it/s]
100%|██████████| 58/58 [00:03<00:00, 15.28it/s]


[1] train loss 1.8373 f1 0.188 | val loss 3.0298 f1 0.231


100%|██████████| 232/232 [00:50<00:00,  4.59it/s]
100%|██████████| 58/58 [00:03<00:00, 15.14it/s]


[2] train loss 1.1250 f1 0.433 | val loss 2.2944 f1 0.397


100%|██████████| 232/232 [00:51<00:00,  4.54it/s]
100%|██████████| 58/58 [00:03<00:00, 15.06it/s]


[3] train loss 0.8700 f1 0.571 | val loss 2.0787 f1 0.476


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.04it/s]


[4] train loss 0.7732 f1 0.650 | val loss 1.8545 f1 0.573


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.01it/s]


[5] train loss 0.7027 f1 0.708 | val loss 1.7587 f1 0.614


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.04it/s]


[6] train loss 0.6540 f1 0.729 | val loss 1.6998 f1 0.639


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.00it/s]


[7] train loss 0.6118 f1 0.764 | val loss 1.6938 f1 0.638


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.06it/s]


[8] train loss 0.5798 f1 0.770 | val loss 1.7081 f1 0.642


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.03it/s]


[9] train loss 0.5702 f1 0.789 | val loss 1.6679 f1 0.655


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.04it/s]


[10] train loss 0.5296 f1 0.808 | val loss 1.5215 f1 0.702


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.04it/s]


[11] train loss 0.5277 f1 0.805 | val loss 1.5696 f1 0.691


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.04it/s]


[12] train loss 0.4872 f1 0.832 | val loss 1.5202 f1 0.705


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.06it/s]


[13] train loss 0.4867 f1 0.830 | val loss 1.5026 f1 0.713


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.06it/s]


[14] train loss 0.4899 f1 0.832 | val loss 1.4973 f1 0.718


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.03it/s]


[15] train loss 0.4694 f1 0.846 | val loss 1.5275 f1 0.704


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.06it/s]


[16] train loss 0.4703 f1 0.840 | val loss 1.5098 f1 0.718


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.01it/s]


[17] train loss 0.4761 f1 0.841 | val loss 1.4710 f1 0.723


100%|██████████| 232/232 [00:51<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.05it/s]


[18] train loss 0.4661 f1 0.851 | val loss 1.4954 f1 0.718


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.02it/s]


[19] train loss 0.4630 f1 0.850 | val loss 1.4880 f1 0.722


100%|██████████| 232/232 [00:50<00:00,  4.55it/s]
100%|██████████| 58/58 [00:03<00:00, 15.05it/s]


[20] train loss 0.4733 f1 0.841 | val loss 1.4633 f1 0.724
Best macro-F1: 0.7235713005065918


In [None]:
# @title Guardar el modelo en Google Drive para inferencia
from google.colab import drive
drive.mount('/content/drive')

import os, torch, json, datetime

# === Configura tu ruta en Drive ===
SAVE_DIR = "/content/drive/MyDrive/color_classifier"  # cámbialo si quieres
os.makedirs(SAVE_DIR, exist_ok=True)

# Asegúrate de tener estas variables en el entorno (del entrenamiento):
# - model          -> tu red ya entrenada
# - num_classes    -> número de clases (12)
# - COLORS         -> lista con los nombres de las clases
# - IMG_SIZE       -> tamaño de entrada usado (p.ej. 224)

# Pasa el modelo a CPU y eval antes de guardar
model.eval()
model.cpu()

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
ckpt_path = os.path.join(SAVE_DIR, f"resnet101_color_best_{timestamp}.pth")

torch.save({
    "model_name": "resnet101",
    "state_dict": model.state_dict(),
    "num_classes": num_classes,
    "class_names": COLORS,
    "img_size": IMG_SIZE,
    "mean": (0.485, 0.456, 0.406),
    "std":  (0.229, 0.224, 0.225),
}, ckpt_path)

print("✔️ Checkpoint guardado en:", ckpt_path)

# (Opcional) exportar también una versión TorchScript para inferencia más simple
try:
    example = torch.randn(1, 3, IMG_SIZE, IMG_SIZE)
    scripted = torch.jit.trace(model, example)
    ts_path = os.path.join(SAVE_DIR, f"resnet101_color_scripted_{timestamp}.pt")
    scripted.save(ts_path)
    print("✔️ TorchScript guardado en:", ts_path)
except Exception as e:
    print("No se generó TorchScript (opcional). Error:", e)


Mounted at /content/drive
✔️ Checkpoint guardado en: /content/drive/MyDrive/color_classifier/resnet101_color_best_20250725_075421.pth
✔️ TorchScript guardado en: /content/drive/MyDrive/color_classifier/resnet101_color_scripted_20250725_075421.pt
