# Fine Tuning

El fine tuning de una red neuronal consiste en ajustar sus la salida de la red y sus parametros ya preentrenados con una base de datos mas pequeña y especifica que la utilizada para el entrenamiento de base.

Como realizar el finetuning depende del modelo elegido. A continuacion veremos como realizar finetuning de un modelo con arquitectura ViT (Vision Transformer) para clasificacion de imagenes y en las proximas unidades veremos como realizar finetuning de un modelo Transformer para clasificacion de texto.

## Fine Tuning de Modelos Transformers con Huggingface



In [None]:
!pip install transformers[torch]
!pip install datasets

In [None]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

https://github.com/huggingface/datasets
https://huggingface.co/datasets

Seleccionemos un dataset para clasificacion de imagenes:
- [Analisis de emociones 11MB](https://huggingface.co/datasets/FastJobs/Visual_Emotional_Analysis)
- [Snaks 110MB](https://huggingface.co/datasets/Matthijs/snacks)
- [Flores 347MB](https://huggingface.co/datasets/nelorth/oxford-flowers)
- [RayosX Pulmones 203MB](https://huggingface.co/datasets/keremberke/chest-xray-classification)

In [None]:
from datasets import load_dataset
ds = load_dataset('FastJobs/Visual_Emotional_Analysis', split='train')
ds = ds.train_test_split(test_size=0.2, shuffle=True, stratify_by_column='label')
ds

In [None]:
fila_ejemplo = ds['train'][0]
fila_ejemplo

In [None]:
fila_ejemplo['image']

In [None]:
fila_ejemplo['label']

In [None]:
labels = ds['train'].features['label']
labels.names

In [None]:
import random
from PIL import ImageDraw, ImageFont, Image

def show_examples(ds, seed=42, examples_per_class=3, size=(90, 90)):

    w, h = size
    labels = ds['train'].features['label'].names
    grid = Image.new('RGB', size=(examples_per_class * w, len(labels) * h))
    draw = ImageDraw.Draw(grid)

    for label_id, label in enumerate(labels):

        # Filter the dataset by a single label, shuffle it, and grab a few samples
        ds_slice = ds['train'].filter(lambda ex: ex['label'] == label_id).shuffle(seed).select(range(examples_per_class))

        # Plot this label's examples along a row
        for i, example in enumerate(ds_slice):
            image = example['image']
            idx = examples_per_class * label_id + i
            box = (idx % examples_per_class * w, idx // examples_per_class * h)
            grid.paste(image.resize(size), box=box)
            draw.text(box, label, (255, 255, 255))

    return grid

show_examples(ds, seed=random.randint(0, 1337), examples_per_class=5)

## Convertir la imagen a la entrada que necesita el modelo

In [None]:
from transformers import ViTImageProcessor

model_name_or_path = 'google/vit-base-patch16-224-in21k'
processor = ViTImageProcessor.from_pretrained(model_name_or_path)

In [None]:
def process_example(example):
    inputs = processor(example['image'], return_tensors='pt')
    inputs['labels'] = example['label']
    return inputs

In [None]:
process_example(ds['train'][0])

In [None]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = processor([x for x in example_batch['image']], return_tensors='pt').to(device)
    inputs['labels'] = torch.tensor(example_batch['label']).to(device)
    return inputs

In [None]:
prepared_ds = ds.with_transform(transform)

## Create la funcion para darle de comer al modelo

In [None]:
import torch

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]).to(device),
        'labels': torch.tensor([x['labels'] for x in batch]).to(device)
    }

## Elegir la metrica para evaluar el modelo

In [None]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

## Cargar el modelo preentrenado para clasificacion

In [None]:
from transformers import ViTForImageClassification

labels = ds['train'].features['label'].names

model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)
model.to(device)

## Elegir los parametros para configurar como hacer el entrenamiento

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./vit-emotions",
    per_device_train_batch_size=16,
    evaluation_strategy="steps",
    num_train_epochs=4,
    fp16=True,
    save_steps=100,
    eval_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    report_to='tensorboard',
    load_best_model_at_end=True,
    dataloader_pin_memory=False,
)

## Crear el objeto para entrenar el modelo

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["test"],
    tokenizer=processor,
)

## Entrenar

In [None]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

## Evaluar

In [None]:
metrics = trainer.evaluate(prepared_ds['test'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

## Clasificar una imagen nueva

In [None]:
from PIL import Image
import requests
from io import BytesIO

response = requests.get('https://images.unsplash.com/photo-1494790108377-be9c29b29330')
image = Image.open(BytesIO(response.content))
image.thumbnail((600,300),Image.LANCZOS)
image

In [None]:
my_input = processor(image, return_tensors="pt").to(device)
pixel_values = my_input.pixel_values

In [None]:
import torch

with torch.no_grad():
    outputs = model(pixel_values)
logits = outputs.logits
logits.shape

In [None]:
prediction = logits.argmax(-1)
labels.names[prediction]

#### Referencias:

- https://github.com/NielsRogge/Transformers-Tutorials/tree/master/VisionTransformer
- https://huggingface.co/blog/fine-tune-vit

# Fin: [Volver al contenido del curso](https://www.freecodingtour.com/cursos/espanol/deeplearning/deeplearning.html)