# 1. Configurar el entorno e instalar Pytorch 2.0

https://www.philschmid.de/getting-started-pytorch-2-0-transformers

In [1]:
# Install PyTorch 2.0 with cuda 11.7
!pip install "torch>=2.0" --extra-index-url https://download.pytorch.org/whl/cu117 --upgrade --quiet

In [2]:
import torch
torch.__version__

'2.0.0+cu118'

In [3]:
# Install transformers and dataset
!pip install "transformers==4.27.1" "datasets==2.9.0" "accelerate==0.17.1" "evaluate==0.4.0" tensorboard scikit-learn
# Install git-fls for pushing model and logs to the hugging face hub
!sudo apt-get install git-lfs --yes


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.27.1
  Downloading transformers-4.27.1-py3-none-any.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m63.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets==2.9.0
  Downloading datasets-2.9.0-py3-none-any.whl (462 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.8/462.8 kB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate==0.17.1
  Downloading accelerate-0.17.1-py3-none-any.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.8/212.8 kB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate==0.4.0
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading hugg

In [4]:
%%time
# Importar PyTorch y torch.compile
import torch
from torch import compile

# Definir un modelo simple de clasificación de imágenes
class ImageClassifier(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = torch.nn.Conv2d(3, 16, 3, padding=1)
    self.conv2 = torch.nn.Conv2d(16, 32, 3, padding=1)
    self.pool = torch.nn.MaxPool2d(2, 2)
    self.fc1 = torch.nn.Linear(32 * 8 * 8, 64)
    self.fc2 = torch.nn.Linear(64, 10)
    self.relu = torch.nn.ReLU()
    self.softmax = torch.nn.Softmax(dim=1)

  def forward(self, x):
    x = self.pool(self.relu(self.conv1(x)))
    x = self.pool(self.relu(self.conv2(x)))
    x = x.view(-1, 32 * 8 * 8)
    x = self.relu(self.fc1(x))
    x = self.softmax(self.fc2(x))
    return x

# Crear una instancia del modelo y compilarlo con torch.compile
model = ImageClassifier()
model = compile(model)

# Definir un criterio de pérdida y un optimizador
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Crear un conjunto de datos sintético con 100 imágenes y sus etiquetas
images = torch.randn(100, 3, 32, 32)
labels = torch.randint(0, 10, (100,))




CPU times: user 699 ms, sys: 82 ms, total: 781 ms
Wall time: 834 ms


In [5]:
%%time
# Entrenar el modelo por 100 épocas
for epoch in range(100):
  # Inicializar la pérdida y la precisión acumuladas
  running_loss = 0.0
  running_acc = 0.0

  # Iterar sobre el conjunto de datos por lotes de tamaño 10
  for i in range(0, 100, 10):
    # Obtener un lote de imágenes y etiquetas
    inputs = images[i:i+10]
    targets = labels[i:i+10]

    # Poner a cero los gradientes del optimizador
    optimizer.zero_grad()

    # Pasar el lote por el modelo y obtener las predicciones
    outputs = model(inputs)
    _, preds = torch.max(outputs, 1)

    # Calcular la pérdida y los gradientes
    loss = criterion(outputs, targets)
    loss.backward()

    # Actualizar los parámetros del modelo con el optimizador
    optimizer.step()

    # Acumular la pérdida y la precisión del lote
    running_loss += loss.item()
    running_acc += torch.sum(preds == targets).item()

  # Imprimir la pérdida y la precisión medias de la época
  if epoch%20==0:
    print(f"Epoch {epoch+1}, loss: {running_loss/10:.3f}, acc: {running_acc/100:.3f}")

Epoch 1, loss: 2.304, acc: 0.080
Epoch 21, loss: 2.295, acc: 0.160
Epoch 41, loss: 2.280, acc: 0.160
Epoch 61, loss: 2.270, acc: 0.160
Epoch 81, loss: 2.262, acc: 0.160
CPU times: user 10.6 s, sys: 570 ms, total: 11.1 s
Wall time: 31.2 s
