In [1]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [5]:
"""
This script fine-tunes a Vision Transformer (ViT-B/16) pre-trained on ImageNet-21k
for a regression task: predicting nutritional values from an image of a dish.
"""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
from PIL import Image
from pathlib import Path
from typing import Tuple, Any
import timm

from src.macro_estimator.models.vit_regressor import ViTRegressor
from src.macro_estimator.datasets import Nutrition5kDataset

# --- 1. Configuration and Constants ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Data Paths ---
IMAGES_CSV_PATH = Path("data/csv_files/images.csv")
LABELS_CSV_PATH = Path("data/csv_files/labels.csv")
MODEL_SAVE_PATH = Path("artifacts/models/vit_nutrition_regressor.pth")

# --- Training Hyperparameters ---
LEARNING_RATE = 1e-4
BATCH_SIZE = 16  # Adjust based on your GPU memory
EPOCHS = 20      # Fine-tuning might require more epochs
WEIGHT_DECAY = 1e-4
VAL_SPLIT = 0.2

def main():
    print(f"--- Using device: {DEVICE} ---")

    # --- CORRECCIÓN: Usar el nombre completo del modelo ---
    MODEL_NAME = 'vit_base_patch16_224.augreg_in21k'

    # --- Data Loading and Transformations ---
    # timm es inteligente. Si el nombre del modelo ya especifica los pesos,
    # no necesita 'pretrained=True' aquí.
    temp_model = timm.create_model(MODEL_NAME)
    data_config = timm.data.resolve_data_config(model=temp_model)
    transforms = timm.data.create_transform(**data_config)
    print(f"Data transforms loaded for {MODEL_NAME}.")

    # ... (El código de Dataset y DataLoader no cambia) ...
    full_dataset = Nutrition5kDataset(
        images_csv_path=IMAGES_CSV_PATH,
        labels_csv_path=LABELS_CSV_PATH,
        transform=transforms
    )
    # ... (split y DataLoaders) ...
    val_size = int(len(full_dataset) * VAL_SPLIT)
    train_size = len(full_dataset) - val_size
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    print(f"Data loaded: {train_size} training samples, {val_size} validation samples.")

    # --- Model, Loss, and Optimizer ---
    # --- CORRECCIÓN: Pasar el nombre correcto del modelo ---
    # La clase ViTRegressor no necesita cambios, solo le pasamos el nombre correcto.
    model = ViTRegressor(model_name=MODEL_NAME, n_outputs=4).to(DEVICE)
    
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    print("Model, Loss function, and Optimizer are ready.")
    
main()

--- Using device: cuda ---
Data transforms loaded for vit_base_patch16_224.augreg_in21k.
Data loaded: 22788 training samples, 5696 validation samples.


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Model, Loss function, and Optimizer are ready.
