#INSTALADORES

In [67]:
!pip install tensorboard
!pip install torchinfo
!pip install --upgrade torch torchvision



#GOOGLE DRIVE

In [2]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [3]:
! cp "/content/drive/MyDrive/Colab Notebooks/MLP/data.csv" /content
! cp "/content/drive/MyDrive/Colab Notebooks/MLP/img.zip" /content
! unzip -q img.zip
! rm img.zip

#Weights and Biases INICIALIZACION

In [4]:
!pip install wandb
!wandb login

#59faca4190d33815ba5b1ce0a93431b26067d3a8

import wandb

Collecting wandb
  Downloading wandb-0.15.4-py3-none-any.whl (2.1 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/2.1 MB[0m [31m7.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━[0m [32m1.8/2.1 MB[0m [31m25.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.26.0-py2.py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.4/209.4 kB[0m [31m16.3 MB

#FUNCIONES BASE ENTRENO

In [68]:
import time
import datetime
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.utils.tensorboard import SummaryWriter
import torchinfo
import numpy as np
import random
import pandas as pd
import csv
from PIL import Image
import os

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data.dataloader import DataLoader

from sklearn.metrics import accuracy_score, confusion_matrix

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [69]:
def to_oneHot(label, numberOfClass):
  oneHot_label = torch.zeros(label.shape[0],numberOfClass).to(device)
  for i in range(label.shape[0]):
    oneHot_label[i][label[i]]=1
  return oneHot_label

def train_step(mlp_model, criterion, optim, img,tabulars, label, batch_size, numberOfClass):
    optim.zero_grad()
    output = mlp_model(img, tabulars)
    loss = criterion(output, label)
    loss.backward()
    optim.step()
    step_loss = loss.item()
    return step_loss

def train_epoch(mlp_model, loader, criterion, optim,mb,numberOfClass):
    epoch_loss_accumulated = 0.0
    for img, tabulars, labels in  progress_bar(loader,parent = mb):
      batch_size = img.size(0)
      epoch_loss_accumulated += train_step(mlp_model,criterion,optim, img.to(device), tabulars.to(device), labels.to(device), batch_size,numberOfClass)
    return epoch_loss_accumulated/len(loader)

In [70]:
def validation_epoch(mlp_model, val_loader, criterion, classes = None):
    mlp_model.eval()
    epoch_loss = 0.0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
      for images, tabulars, labels in val_loader:
        all_labels.extend(torch.argmax(labels, dim=1).cpu().numpy())
        tabulars = tabulars.to(device)
        labels = labels.to(device)
        predictions = mlp_model(images.to(device), tabulars)
        all_predictions.extend(torch.argmax(predictions, dim=1).cpu().numpy())
        loss = criterion(predictions, labels)

        epoch_loss += loss.item()

    return epoch_loss / len(val_loader), accuracy_score(all_labels, all_predictions) * 100

In [71]:
def train_model(mlp_model, train_loader, valid_loader, criterion, optim, number_epochs,numberOfClass):
  train_history = []
  valid_history = []
  accuracy_history = []
  now = datetime.datetime.now()
  date_time = now.strftime("%m%d%Y_%H%M%S")
  name = 'runs/'+mlp_model.name+'_'+date_time
  tensorBoard_writer = SummaryWriter(name)
  mb = master_bar(range(1, number_epochs+1))
  for epoch in mb:
      start_time = time.time()
      train_loss = train_epoch(mlp_model, train_loader, criterion, optim,mb,numberOfClass)
      train_history.append(train_loss)
      print("Training epoch {} | Loss {:.6f} | Time {:.2f} seconds"
            .format(epoch + 1, train_loss, time.time() - start_time))

      start_time = time.time()
      val_loss, acc = validation_epoch(mlp_model, valid_loader, criterion)
      valid_history.append(val_loss)
      accuracy_history.append(acc)
      print("Validation epoch {} | Loss {:.6f} | Accuracy {:.2f}% | Time {:.2f} seconds"
            .format(epoch + 1, val_loss, acc, time.time() - start_time))
      # Se carga en tensorBoard #Loss #Validation en train y val
      wandb.log({"Train Loss": train_loss})
      wandb.log({"Validation Loss": val_loss})
      wandb.log({"Accuracy": acc})

      tensorBoard_writer.add_scalar(tag="Train Loss", scalar_value=train_loss, global_step=epoch)
      tensorBoard_writer.add_scalar(tag="Validation Loss", scalar_value=val_loss, global_step=epoch)
      tensorBoard_writer.add_scalar(tag="Validation Accuracy", scalar_value=acc, global_step=epoch)
  tensorBoard_writer.close()

#CARGA DE DATOS

In [83]:
# Creamos los dataloaders
val_transform = transforms.Compose([
    transforms.Resize([256,192]),
])


class transformIterator:

    def __init__(self):
        self.case = 0
        self.base_transform = transforms.Compose([
          transforms.Resize([256,192]),
        ])
        self.augmentations =  [transforms.Compose([
          transforms.Resize([256,192]),
          transforms.RandomRotation(degrees=(0,45))
        ]),
        transforms.Compose([
          transforms.Resize([256,192]),
          transforms.RandomHorizontalFlip()
        ]),
        transforms.Compose([
          transforms.Resize([256,192]),
          transforms.ColorJitter(brightness=(0,2))
        ]),
        transforms.Compose([
          transforms.Resize([256,192]),
          transforms.ColorJitter(hue=(-0.25,0.25))
        ]),
        transforms.Compose([
          transforms.Resize([256,192]),
          transforms.RandomGrayscale(p=0.7)
        ]),
        transforms.Compose([
          transforms.Resize([256,192]),
          transforms.GaussianBlur(kernel_size=(3,3))
        ]) ]

    def __call__(self, x):
      rate_of_augmentation = 0.3
      if(random.random() < rate_of_augmentation):
        compose = self.augmentations[self.case]
        self.case += 1
        if(self.case == len(self.augmentations)):
          self.case = 0
      else:
        compose = self.base_transform
      return compose(x)

In [82]:
from torch.utils.data import Dataset
from torchvision.io import read_image
import os


class MLP_Dataset(Dataset):
    def __init__(self, image_dir='./img/', data_dir='./data.csv', in_train=False):
        self.image_dir = image_dir
        self.data_dir = data_dir

        self.in_train = in_train
        self.val_transform = val_transform
        self.train_transform = transformIterator()

        self.tabular_data = self.openDataFile()
        arr = np.array(self.tabular_data)
        print(arr.shape)

        self.tabular_data = list(filter(self.hasImage, self.tabular_data))
        self.labels = torch.tensor(list(map(self.targetToClass, self.tabular_data)))

        #image file is not to be changed to numpy
        self.image_files = [str(row[0]) for row in self.tabular_data]
        self.tabular_data = torch.tensor(np.array(self.tabular_data)[:,1:6])

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        image = torchvision.io.read_image(img_path)

        tabular_data = self.tabular_data[idx]
        label = self.labels[idx]

        if self.in_train:
          image = train_transform(image)
        else:
          image = val_transform(image)
        return image,tabular_data, label

    def hasImage(self, line):
      image_path = os.path.join(self.image_dir + str(line[0]))
      return os.path.isfile(image_path)

    def targetToClass(self, line):
        label = [1.0,0.0,0.0,0.0]
        if line[6] > 100000 and line[6] <= 200000:
          label = [0.0,1.0,0.0,0.0]
        elif line[6] > 200000 and line[6] <= 300000:
          label = [0.0,0.0,1.0,0.0]
        elif line[6] > 300000:
          label = [0.0,0.0,0.0,1.0]
        return label

    def openDataFile(self):
      tabular_data = []
      with open("./data.csv", 'r') as file:
        lector_csv = csv.reader(file)
        next(lector_csv)
        for fila in lector_csv:
          fila_enteros = [int(float(valor)) for valor in fila]
          tabular_data.append(fila_enteros)
      return tabular_data


In [81]:
def get_dataloaders(data,batch_size):
# Vector de vectores img, tabulares, label -------------------------------------------------------------------------------------------------------------------------------------
  dataset = MLP_Dataset(image_dir='./img/', data_dir='./data.csv')
  #dataset = MLP_Dataset(image_dir='/Users/joaquinoldan/Documents/MASTER AI/3- Machine Learning en Producción/Obligatorio/MLOps/modelos/img/', data_dir='/Users/joaquinoldan/Documents/MASTER AI/3- Machine Learning en Producción/Obligatorio/MLOps/modelos/data.csv')
# Vector de vectores img, tabulares, label -------------------------------------------------------------------------------------------------------------------------------------

  BATCH_SIZE = batch_size

  # Separamos en train y validation
  train_size = int(0.8 * len(dataset))
  valid_size = len(dataset) - train_size

  train, validation = torch.utils.data.random_split(dataset, [train_size,valid_size])
  train.in_train = True

  print(f"{len(train)} Training Items, {len(validation)} Validation Items")

  # Podemos usar data loaders como vimos en el práctico.
  train_loader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True,pin_memory=True)
  valid_loader = DataLoader(validation, batch_size=BATCH_SIZE, pin_memory=True)

  return train_loader, valid_loader

#MODELO

In [56]:
%load_ext tensorboard

In [87]:
class MLP_Model(nn.Module):
  def __init__(self,name="MLP_MODEL", vocab_size=260, embedding_dim=4, num_classes=4):
    super().__init__()
    self.name = name
    self.embedding = nn.Embedding(num_embeddings=vocab_size,embedding_dim=embedding_dim)
    # Input -> 256*192
    self.conv1 = nn.Conv2d(3, 128, 4, stride=2, padding=1, bias=False)
    # 256*192 -> conv1(1/2) -> 128*96
    self.bn1 = nn.BatchNorm2d(128)
    self.conv2 = nn.Conv2d(128, 256, 4, stride=2, padding=1, bias=False)
    # 128*96 -> conv1(1/2) -> 64*48
    self.bn2 = nn.BatchNorm2d(256)
    self.conv3 = nn.Conv2d(256, 128, 4, stride=2, padding=1, bias=False)
    # 64*48 -> conv1(1/2) -> 32*24
    self.bn3 = nn.BatchNorm2d(128)
    self.conv4 = nn.Conv2d(128, 32, 4, stride=2, padding=1, bias=False)
    # 32*24 -> conv1(1/2) -> 16*12
    self.bn4 = nn.BatchNorm2d(32)
    self.conv5 = nn.Conv2d(32, 8, 4, stride=2, padding=1, bias=False)
    # 16*12 -> conv1(1/2) -> 8*6
    self.bn5 = nn.BatchNorm2d(8)
    self.conv6 = nn.Conv2d(8, 1, 4, stride=2, padding=1, bias=False)
    # 8*6 -> conv1(1/2) -> 4*3
    self.bn6 = nn.BatchNorm2d(1)
    self.linear1 = nn.Linear(1*4*3+5+(embedding_dim-1), 1024)
    self.linear2 = nn.Linear(1024, 512)
    self.linear3 = nn.Linear(512, 128)
    self.linear4 = nn.Linear(128, 64)
    self.out = nn.Linear(64, num_classes)


  def forward(self, x,tabulars,debug = False):
    # entrada de 256*192
    emb_Location = self.embedding(tabulars[:,4])
    # area data scaling
    rescalArea = tabulars[:,0:1] /100.0
    #plt.imshow(x[0])
    x = x.view(x.size(0), 3, 256, 192)
    x = torch.round(x).to(torch.float32)
    x = F.relu(self.bn1(self.conv1(x)))
    x = F.relu(self.bn2(self.conv2(x)))
    x = F.relu(self.bn3(self.conv3(x)))
    x = F.relu(self.bn4(self.conv4(x)))
    x = F.relu(self.bn5(self.conv5(x)))
    x = F.relu(self.bn6(self.conv6(x)))
    x = x.view(x.size(0), -1)
    x = torch.concat([x,rescalArea,tabulars[:,1:4], emb_Location], -1)
    if debug:
      print("Tabulars START:")
      print(x)
      print("Tabulars END:")
    x = F.relu(self.linear1(x))
    x = F.relu(self.linear2(x))
    x = F.relu(self.linear3(x))
    x = F.relu(self.linear4(x))
    x = self.out(x)
    x = F.softmax(x,dim = 1)
    return x

In [58]:
torchinfo.summary(MLP_Model())

Layer (type:depth-idx)                   Param #
MLP_Model                                --
├─Embedding: 1-1                         1,040
├─Conv2d: 1-2                            3,072
├─BatchNorm2d: 1-3                       128
├─Conv2d: 1-4                            131,072
├─BatchNorm2d: 1-5                       256
├─Conv2d: 1-6                            524,288
├─BatchNorm2d: 1-7                       512
├─Conv2d: 1-8                            262,144
├─BatchNorm2d: 1-9                       128
├─Linear: 1-10                           12,592,128
├─Linear: 1-11                           524,800
├─Linear: 1-12                           65,664
├─Linear: 1-13                           8,256
├─Linear: 1-14                           260
Total params: 14,113,748
Trainable params: 14,113,748
Non-trainable params: 0

#PARAMETROS

In [76]:
config = {
    "LR": 2e-4,
    "epochs": 5,
    "batch_size": 32,
    "B": [0.5,0.999],
    "info" : 'Modelo MLP'
}

wandb.init(project="MLP-Obligatorio", entity="vainilla")

config = wandb.config # Config is a variable that holds and saves hyperparameters and inputs

config.learning_rate = 2e-4
config.epochs = 50
config.batch_size = 32
config.B = [0.5,0.999]
config.info = 'Modelo MLP'

VBox(children=(Label(value='0.001 MB of 0.058 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.016623…

In [77]:
# Definimos el modelo y el optimizador
LR = config.learning_rate
epochs = config.epochs
batch_size = config.batch_size
B = config.B

#ENTRENAMIENTO

In [88]:
# Creamos el loaders
train_loader, val_loader = get_dataloaders(train_transform, batch_size)

(25099, 7)
19981 Training Items, 4996 Validation Items


In [89]:
mlp_model = MLP_Model("MLP MODEL", vocab_size=260, embedding_dim=4, num_classes=4).to(device)
opt = torch.optim.Adam(mlp_model.parameters(), lr=LR,betas=B)

weights = torch.tensor([0.31, 0.07, 0.31, 0.31]).to(device)

crit = nn.CrossEntropyLoss(weight=weights)

wandb.watch(mlp_model, log_freq=100)


[]

In [90]:
train_model(mlp_model, train_loader, val_loader, crit, opt, epochs, 4)

KeyboardInterrupt: ignored

In [None]:
# Guardado del modelo

torch.save(mlp_model.state_dict(),mlp_model.name+".dat")

In [None]:
%tensorboard --logdir=runs/MLP MODEL_06162023_164609