In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import matplotlib.pyplot as plt
import os
import datetime
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split

from baseCode.dataloaders.data_augmentation import data_aug_selector
from baseCode.models.classification import load_model
from baseCode.dataloaders.Image_Dataset import Image_Dataset
from baseCode.utils.tensorboard import start_tensorboard
from baseCode.utils.manual_stop import check_stop_training, set_stop_training, get_training_status 
from baseCode.train_classification import train_loop, validation_loop, compute_acc


In [None]:
# Configuración de parámetros
backbone = 'swin_v2_b'  # Cambia por el backbone que desees
weights = 'imagenet'  # 'none' o 'imagenet'
num_classes = 2
num_epochs = 50
learning_rate = 0.001
batch_size = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Dispositivo seleccionado: {device}")

In [None]:
# Configuración de TensorBoard
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
log_dir = f'runs/training_{backbone}_{timestamp}'
writer = SummaryWriter(log_dir)
tensorboard_process = start_tensorboard()

In [None]:
metadata_path = '../DataTIC/bcn20000_metadata_2025-10-19.csv'
images_dir = '../DataTIC/ISIC-images'
df = pd.read_csv(metadata_path)
df = df[df['diagnosis_1'].isin(['Benign', 'Malignant'])]
df = df[df['isic_id'].notna()]
df = df.head(100).copy()
df['filename'] = df['isic_id'].apply(lambda x: f"{x}.jpg")
df['filepath'] = df['filename'].apply(lambda x: os.path.join(images_dir, x))
df = df[df['filepath'].apply(os.path.exists)]
image_label_pairs = list(zip(df['filepath'], df['diagnosis_1']))

In [None]:
# Crear objeto args mínimo para reutilizar data_aug_selector e Image_Dataset
class Args:
    def __init__(self):
        self.da_library = 'torchvision'  # o 'albumentations'
        self.da_level = 'heavy'          # light | medium | heavy
        self.img_size = 224
        self.backbone = backbone
        self.weights = weights
        self.batch_size = batch_size
        self.jobs = 0  # workers
args = Args()

# Generar split train/val y construir listas temporales en memoria
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[label_col])
print(f"Split -> Train: {len(train_df)} | Val: {len(val_df)}")

# Map labels a índices ya definidos en label2idx
train_lines = [f"{row['filepath']} {label2idx[row[label_col]]}" for _, row in train_df.iterrows()]
val_lines   = [f"{row['filepath']} {label2idx[row[label_col]]}" for _, row in val_df.iterrows()]

# Guardar listas temporales para compatibilidad con read_list
os.makedirs('temp_lists', exist_ok=True)
train_list_path = 'temp_lists/train.txt'
val_list_path   = 'temp_lists/validation.txt'
with open(train_list_path, 'w') as f: f.write('\n'.join(train_lines))
with open(val_list_path, 'w') as f: f.write('\n'.join(val_lines))
print(f"Listas guardadas en temp_lists/")

# Construir transform de Data Augmentation según args
a_transform = data_aug_selector(args)

# Instanciar datasets usando Image_Dataset
train_dataset = Image_Dataset(train_list_path, args=args, transform=a_transform)
val_dataset   = Image_Dataset(val_list_path, img_size=(args.img_size,args.img_size), transform=None)
print(f"Train samples: {len(train_dataset)} | Val samples: {len(val_dataset)} | Clases: {train_dataset.n_classes}")

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
print(f"Batches -> Train: {len(train_loader)} | Val: {len(val_loader)}")

In [None]:
# Mapear los labels a índices numéricos para clasificación
label2idx = {label: idx for idx, label in enumerate(sorted(df['diagnosis_1'].unique()))}
label_col = 'diagnosis_1'
print('Diccionario de labels:', label2idx)

In [None]:
model = load_model(backbone, weights, num_classes)
model = model.to(device)
print_layers(model)
print(f"Modelo {backbone} cargado con {num_classes} clases y pesos: {weights}")

In [None]:
def train_with_basecode(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, writer=None):
    history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}
    control_file_path = 'baseCode/assets/training_control.json'

    for epoch in range(1, num_epochs+1):
        print(f"\nÉpoca {epoch}/{num_epochs}")
        # Train
        train_loss, train_acc = train_loop(model, device, train_loader, criterion, optimizer)
        # Validation
        val_loss, val_acc = validation_loop(model, device, val_loader, criterion)

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        # TensorBoard
        if writer:
            writer.add_scalar('Train/Loss', train_loss, epoch)
            writer.add_scalar('Train/Accuracy', train_acc, epoch)
            writer.add_scalar('Val/Loss', val_loss, epoch)
            writer.add_scalar('Val/Accuracy', val_acc, epoch)
            writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'], epoch)
            if epoch % 5 == 0:
                for name, param in model.named_parameters():
                    if param.requires_grad:
                        writer.add_histogram(f'Params/{name}', param, epoch)
                        if param.grad is not None:
                            writer.add_histogram(f'Grads/{name}', param.grad, epoch)

        print(f"Entrenamiento - Loss: {train_loss:.5f}, Acc: {train_acc:.3%}")
        print(f"Validación   - Loss: {val_loss:.5f}, Acc: {val_acc:.3%}")

        if check_stop_training(control_file_path):
            print(f"🛑 Parada manual activada tras la época {epoch}")
            break

    if writer:
        writer.close()
        print(f"📊 Logs almacenados en: {writer.log_dir}")

    return history

In [None]:
# Entrenar el modelo usando bucles de baseCode (Image_Dataset + data_aug_selector)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print("🚀 Iniciando entrenamiento con baseCode y Image_Dataset...")
print(f"📊 Logs TensorBoard en: {log_dir}")
print(f"📁 Train samples: {len(train_loader.dataset)} | Val samples: {len(val_loader.dataset)}")

history = train_with_basecode(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, writer)

# Guardar el modelo final
model_path = f"modelo_{backbone}_{timestamp}.pth"
os.makedirs('modelos_guardados', exist_ok=True)
final_model_path = os.path.join('modelos_guardados', f"{backbone}_{timestamp}.pth")
torch.save(model.state_dict(), final_model_path)
print(f"✅ Modelo final guardado en {final_model_path}")

In [None]:
# Visualizar resultados del entrenamiento
plt.figure(figsize=(14,5))

plt.subplot(1,2,1)
plt.plot(history['train_loss'], marker='o', label='Train Loss')
plt.plot(history['val_loss'], marker='o', label='Val Loss')
plt.title('Pérdida (Loss)')
plt.xlabel('Época')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(history['train_acc'], marker='o', label='Train Acc')
plt.plot(history['val_acc'], marker='o', label='Val Acc')
plt.title('Precisión (Accuracy)')
plt.xlabel('Época')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# 🔄 RESETEAR CONTROL PARA PRÓXIMO ENTRENAMIENTO
print("🔄 Reseteando control para próximo entrenamiento...")
set_stop_training(False)  # Asegurar que esté en False para próximos entrenamientos
get_training_status()     # Verificar el estado final