#INSTALADORES

In [1]:
!pip install tensorboard
!pip install torchinfo
!pip install --upgrade torch torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


#GOOGLE DRIVE

In [46]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [47]:
! cp "/content/drive/MyDrive/Colab Notebooks/MLP/data.csv" /content
! cp "/content/drive/MyDrive/Colab Notebooks/MLP/img.zip" /content
! unzip -q img.zip
! rm img.zip

#Weights and Biases INICIALIZACION

In [None]:
!pip install wandb
!wandb login

FALTA GENERAR EL PROYECTO MLP-Obligatorio  Y  OBTENER CLAVE

import wandb
from wandb.keras import WandbCallback

#FUNCIONES BASE ENTRENO

In [2]:
import time
import datetime
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.utils.tensorboard import SummaryWriter
import torchinfo
import numpy as np
import random
import pandas as pd
import csv
from PIL import Image

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data.dataloader import DataLoader

from sklearn.metrics import accuracy_score, confusion_matrix

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [25]:
def to_oneHot(label, numberOfClass):
  oneHot_label = torch.zeros(label.shape[0],numberOfClass).to(device)
  for i in range(label.shape[0]):
    oneHot_label[i][label[i]]=1
  return oneHot_label

def train_step(mlp_model, criterion, optim, img,tabulars, label, batch_size, numberOfClass):
    optim.zero_grad()
    output = mlp_model(img, tabulars)
    loss = criterion(output, label)
    loss.backward()
    optim.step()
    step_loss = loss.item()
    return step_loss

def train_epoch(mlp_model, loader, criterion, optim,mb,numberOfClass):
    epoch_loss_accumulated = 0.0
    for img, tabulars, label in  progress_bar(loader,parent = mb):
      batch_size = img.size(0)
      epoch_loss_accumulated += train_step(mlp_model,criterion,optim, img, tabulars, label, batch_size,numberOfClass)
    return epoch_loss_accumulated/len(loader)

In [4]:
def validation_epoch(mlp_model, val_loader, criterion, classes = None):
    mlp_model.eval()
    epoch_loss = 0.0
    all_labels = []
    all_predictions = []
    
    with torch.no_grad():
      for images, tabulars, label in val_loader:
        all_labels.extend(labels.numpy())  
        labels = labels.to(device)
        labels = to_oneHot(label,classes)
        predictions = mlp_model(images.to(device), tabulars)
        all_predictions.extend(torch.argmax(predictions, dim=1).cpu().numpy())

        loss = criterion(predictions, labels)

        epoch_loss += loss.item()    

    return epoch_loss / len(val_loader), accuracy_score(all_labels, all_predictions) * 100

In [5]:
def train_model(mlp_model, train_loader, valid_loader, criterion, optim, number_epochs,numberOfClass):
  train_history = []
  valid_history = []
  accuracy_history = []
  now = datetime.datetime.now()
  date_time = now.strftime("%m%d%Y_%H%M%S")
  name = 'runs/'+mlp_model.name+'_'+date_time
  tensorBoard_writer = SummaryWriter(name) 
  mb = master_bar(range(1, number_epochs+1))
  for epoch in mb:
      start_time = time.time()     
      train_loss = train_epoch(mlp_model, train_loader, criterion, optim,mb,numberOfClass)
      train_history.append(train_loss)
      print("Training epoch {} | Loss {:.6f} | Time {:.2f} seconds"
            .format(epoch + 1, train_loss, time.time() - start_time))
      
      start_time = time.time()
      val_loss, acc = validation_epoch(mlp_model, valid_loader, criterion)
      valid_history.append(val_loss)
      accuracy_history.append(acc)
      print("Validation epoch {} | Loss {:.6f} | Accuracy {:.2f}% | Time {:.2f} seconds"
            .format(epoch + 1, val_loss, acc, time.time() - start_time))
      # Se carga en tensorBoard #Loss #Validation en train y val
      tensorBoard_writer.add_scalar(tag="Train Loss", scalar_value=train_loss, global_step=epoch)
      tensorBoard_writer.add_scalar(tag="Validation Loss", scalar_value=val_loss, global_step=epoch)
      tensorBoard_writer.add_scalar(tag="Validation Accuracy", scalar_value=acc, global_step=epoch)
  tensorBoard_writer.close()

#CARGA DE DATOS

In [13]:
def load_data():  
  data = []
  with open("./data.csv", 'r') as file:
    lector_csv = csv.reader(file)
    next(lector_csv)
    for fila in lector_csv:
      fila_enteros = [int(float(valor)) for valor in fila]
      data.append(fila_enteros)

  data = np.array(data)
  nombres_imagenes = data[:, 0]

  datos_con_imagenes = []

  for nombre_imagen, fila_datos in zip(nombres_imagenes, data):
      imagen = 0
      try:
        imagen = Image.open("./img/" + str(nombre_imagen))
        label = 0
        if fila_datos[6] > 100000 and fila_datos[6] <= 200000:
          label = 1
        elif fila_datos[6] > 200000 and fila_datos[6] <= 300000:
          label = 2
        elif fila_datos[6] > 300000:
          label = 3
        datos_con_imagenes.append((np.array(imagen), fila_datos[1:6], label))
      except FileNotFoundError:
        print("Error: El archivo de imagen " + str(nombre_imagen)+ ".jpeg no existe.")
      
  print(len(datos_con_imagenes))
  return datos_con_imagenes   

In [14]:
def get_dataloaders(train_transf,batch_size):
# Vector de vectores img, tabulares, label -------------------------------------------------------------------------------------------------------------------------------------
  train_dataset = load_data()
# Vector de vectores img, tabulares, label -------------------------------------------------------------------------------------------------------------------------------------

  BATCH_SIZE = batch_size

  # Separamos en train y validation
  train_size = int(0.8 * len(train_dataset))
  valid_size = len(train_dataset) - train_size

  train, validation = torch.utils.data.random_split(train_dataset, [train_size,valid_size])

  print(f"{len(train)} Training Items, {len(validation)} Validation Items")

  # Podemos usar data loaders como vimos en el práctico.
  train_loader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True,pin_memory=True)
  valid_loader = DataLoader(validation, batch_size=BATCH_SIZE, pin_memory=True)
  
  return train_loader, valid_loader

#MODELO

In [15]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [39]:
class MLP_Model(nn.Module):
  def __init__(self,name="MLP_MODEL", vocab_size=260, embedding_dim=4, num_classes=4):
    super().__init__()
    self.name = name
    self.embedding = nn.Embedding(num_embeddings=vocab_size,embedding_dim=embedding_dim)
    self.conv1 = nn.Conv2d(3, 64, 4, stride=2, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.conv2 = nn.Conv2d(64, 128, 4, stride=2, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(128)
    self.conv3 = nn.Conv2d(128, 256, 4, stride=2, padding=1, bias=False)
    self.bn3 = nn.BatchNorm2d(256)
    self.conv4 = nn.Conv2d(256, 64, 4, stride=2, padding=1, bias=False)
    self.bn4 = nn.BatchNorm2d(64)
    self.linear1 = nn.Linear(64*16*12+5+(embedding_dim-1), 1024)
    self.linear2 = nn.Linear(1024, 512)
    self.linear3 = nn.Linear(512, 128)
    self.linear4 = nn.Linear(128, 64)
    self.out = nn.Linear(64, num_classes)


  def forward(self, x,tabulars):
    # entrada de 256*192
    emb_Location = self.embedding(tabulars[4])
    x = x.view(x.size(0), 3, 256, 192)
    x = F.relu(self.bn1(self.conv1(x)))
    x = F.relu(self.bn2(self.conv2(x)))
    x = F.relu(self.bn3(self.conv3(x)))
    x = F.relu(self.bn4(self.conv4(x)))
    x = x.view(x.size(0), -1)
    x = torch.concat([x, tabulars[0:4], emb_Location], -1)
    x = F.relu(self.linear1(x))
    x = F.relu(self.linear2(x))
    x = F.relu(self.linear3(x))
    x = F.relu(self.linear4(x))
    return x

In [None]:
torchinfo.summary(MLP_Model())

#PARAMETROS

In [None]:
config = wandb.config # Config is a variable that holds and saves hyperparameters and inputs

config.LR = 2e-4
config.epochs = 50
config.batch_size = 32
config.B = [0.5,0.999]
config.info = 'Modelo MLP'

In [29]:
# Creamos los dataloaders
train_transform = transforms.Compose([
    transforms.Resize([256,192]),
    transforms.ToTensor()
])

# Creamos el loaders
train_loader, val_loader = get_dataloaders(train_transform,batch_size=32)

Error: El archivo de imagen 1787.jpeg no existe.
Error: El archivo de imagen 1942.jpeg no existe.
Error: El archivo de imagen 3140.jpeg no existe.
Error: El archivo de imagen 3280.jpeg no existe.
Error: El archivo de imagen 3297.jpeg no existe.
Error: El archivo de imagen 3308.jpeg no existe.
Error: El archivo de imagen 3667.jpeg no existe.
Error: El archivo de imagen 3720.jpeg no existe.
Error: El archivo de imagen 3784.jpeg no existe.
Error: El archivo de imagen 4348.jpeg no existe.
Error: El archivo de imagen 4621.jpeg no existe.
Error: El archivo de imagen 5423.jpeg no existe.
Error: El archivo de imagen 5492.jpeg no existe.
Error: El archivo de imagen 5900.jpeg no existe.
Error: El archivo de imagen 5918.jpeg no existe.
Error: El archivo de imagen 6018.jpeg no existe.
Error: El archivo de imagen 6071.jpeg no existe.
Error: El archivo de imagen 7318.jpeg no existe.
Error: El archivo de imagen 7701.jpeg no existe.
Error: El archivo de imagen 7769.jpeg no existe.
Error: El archivo de

#ENTRENAMIENTO

In [40]:
# Definimos el modelo y el optimizador
LR = 2e-4
epochs = 50
batch_size = 32
B = [0.5,0.999]
mlp_model = MLP_Model("MLP MODEL", vocab_size=260, embedding_dim=4, num_classes=4).to(device)
opt = torch.optim.Adam(mlp_model.parameters(), lr=LR,betas=B)
crit = nn.CrossEntropyLoss()


In [41]:
train_model(mlp_model, train_loader, val_loader, crit, opt, epochs, 4)

RuntimeError: ignored

In [None]:
# Guardado del modelo

torch.save(mlp_model.state_dict(),mlp_model.name+".dat")

In [None]:
%tensorboard --logdir=runs/

#Weights and Biases IMPLEMENTACION

In [None]:
def train_model_2():
  
  train_loader, val_loader, num_classes = get_dataloaders(train_transform,config.batch_size)
  mlp_model = MLP_Model("MLP MODEL", vocab_size=200, embedding_dim=4, num_classes=4).to(device)
  opt = torch.optim.Adam(mlp_model.parameters(), lr=config.LR,betas=config.B)
  crit = nn.CrossEntropyLoss()
  
  train_history = []
  valid_history = []
  accuracy_history = []
  now = datetime.datetime.now()
  date_time = now.strftime("%m%d%Y_%H%M%S")
  name = 'runs/'+mlp_model.name+'_'+date_time
  tensorBoard_writer = SummaryWriter(name) 
  mb = master_bar(range(1, config.epochs+1))
  for epoch in mb:
      start_time = time.time()     
      train_loss = train_epoch(mlp_model, train_loader, crit, opt,mb,num_classes)
      train_history.append(train_loss)
      print("Training epoch {} | Loss {:.6f} | Time {:.2f} seconds"
            .format(epoch + 1, train_loss, time.time() - start_time))
      
      start_time = time.time()
      val_loss, acc = validation_epoch(mlp_model, val_loader, crit)
      valid_history.append(val_loss)
      accuracy_history.append(acc)
      print("Validation epoch {} | Loss {:.6f} | Accuracy {:.2f}% | Time {:.2f} seconds"
            .format(epoch + 1, val_loss, acc, time.time() - start_time))
      # Se carga en tensorBoard #Loss #Validation en train y val
      tensorBoard_writer.add_scalar(tag="Train Loss", scalar_value=train_loss, global_step=epoch)
      tensorBoard_writer.add_scalar(tag="Validation Loss", scalar_value=val_loss, global_step=epoch)
      tensorBoard_writer.add_scalar(tag="Validation Accuracy", scalar_value=acc, global_step=epoch)
  tensorBoard_writer.close()

In [None]:
sweep_configuration = {
    'method': 'grid',         # 'grid', 'hyperopt', 'bayesian'
    'metric': {
        'name': 'acc',     # 'accuracy'
        'goal': 'maximize'      # 'maximize'
    },
    'parameters': {
        'batch_size': {'values': [32]},
        'epochs': {'values': [5,10,20,50,100]},
        'B': {'values': [[0.5,0.999]]},
        'learning_rate': {'values': [0.0002, 0.0004, 0.0006, 0.0010, 0.010, 0.1]}
     }
}

In [None]:
sweep_id = wandb.sweep(sweep_configuration, project="MLP-Obligatorio", entity="vainilla")
wandb.agent(sweep_id, function=train_model_2, count=15, project='MLP-Obligatorio')