<a href="https://colab.research.google.com/github/enVives/Caltech101/blob/main/Caltech101.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [222]:
import torch,torchvision
import numpy as np
import cv2
import time

from torchvision import transforms,models
from torch import nn
from IPython.display import clear_output,display
from torch.utils.data import DataLoader, random_split,Subset
from google.colab.patches import cv2_imshow

ROOT = './sample_data/'
CLASS_A = 'cougar_body'
CLASS_B = 'windsor_chair'

DOWNLOAD = False
#!rm -rf /content/sample_data/*

In [223]:
def find_mean_std(dataset):
  dataloader = DataLoader(dataset,batch_size = 1,shuffle=FALSE)
  mean = torch.zeros(3)
  std = torch.zeros(3)
  samples = 0

  for image,_ in dataloader:
    mean += image.mean(dim=[0, 2, 3])  # Mean for each channel [batchsize,channels,height,width]
    std += image.std(dim=[0, 2, 3])    # Std for each channel
    samples += 1

  mean /= samples
  std /= samples

  print(mean)
  print(std)

In [224]:
def check_sets(sett):
  dist_set = np.zeros(3)

  for _,label in sett:
    dist_set[label] += 1

  print(np.round(dist_set.astype(int)/sett.__len__(),2))

In [225]:
def get_labels(sett):
  labels = np.array([])
  frequency = np.zeros(101)
  for _,label in sett:
    labels = np.append(labels,label)
    frequency[label] += 1
  return labels.astype(int),frequency

In [226]:
#from pickle import FALSE
from sklearn.model_selection import train_test_split

TRAINING = 0.80
VAL = 0.10
TESTING = 0.10

#[0.5459, 0.5288, 0.5022]
#[0.2424, 0.2393, 0.2409]
mean = torch.tensor([0.5459, 0.5288, 0.5022])
std = torch.tensor([0.2424, 0.2393, 0.2409])

transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),  # Ensure all images are RGB
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

dataset = torchvision.datasets.Caltech101(root= ROOT,download=DOWNLOAD,transform=transform)
longitud = dataset.__len__()
labels,frequency = get_labels(dataset)
class_weights = 1.0/frequency
class_weights = class_weights / class_weights.sum()
class_weights = torch.tensor(class_weights.astype(float))

#find_mean_std(dataset)

#Construïm el diccionari per codificar/decodificar classes, així podrem averiguar les nostres
dictionary = {}
category = 0
for c in dataset.categories:
  dictionary[c] = category
  category += 1

#Canviam les labels per 0:resta 1:classe 1 2:classe 2

train_size = int(TRAINING*len(dataset))
validation_size = int(VAL*len(dataset))

#Obtenim els sets de training,validation i testing (amb les imatges ordenades)

train_val_indices,test_indices = train_test_split(np.arange(len(dataset)), test_size=0.1, stratify=labels, random_state=42)
train_indices, val_indices = train_test_split(
    train_val_indices, test_size=0.1, stratify=labels[train_val_indices], random_state=42
)

training = Subset(dataset,train_indices)
validation = Subset(dataset,val_indices)
testing = Subset(dataset,test_indices)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#training,validation,testing = random_split(dataset,[train_size,validation_size,len(dataset)-train_size-validation_size])

In [227]:
print(f"LONGITUD SET DE TRAINING: {len(training)}")
print(f"LONGITUD SET DE VALIDATION: {len(validation)}")
print(f"LONGITUD SET DE TESTING: {len(testing)}")
print(f"CODI CLASSE windsor_chair: {dictionary['windsor_chair']}")
print(f"CODI CLASSE cougar_body: {dictionary['cougar_body']}")

LONGITUD SET DE TRAINING: 7028
LONGITUD SET DE VALIDATION: 781
LONGITUD SET DE TESTING: 868
CODI CLASSE windsor_chair: 98
CODI CLASSE cougar_body: 24


Ara ja sabem que hem de predir si una imatge pertany a les classes 25 o 99

In [228]:
def pick_algorithm(number):
  if number == 0:
    alexnetmulticlass = models.alexnet(weights=None)

    alexnetmulticlass.classifier = nn.Sequential(
    torch.nn.Linear(9216, 1024),
    nn.ReLU(),
    torch.nn.Linear(1024, 1024),
    nn.ReLU(),
    torch.nn.Linear(1024, 512),
    nn.ReLU(),
    torch.nn.Linear(512, 101),  # Ja que tenim 101 classes.
    )
    loss_fn = nn.CrossEntropyLoss(weight=class_weights.to(device))

    return alexnetmulticlass,loss_fn

In [229]:
def veure_imatges(train_data,std,mean):
  for i in range(len(train_data)):
    imatge,label = train_data[i]

    print(imatge.ndimension())
    print(imatge.shape)

    imatge = imatge * (std[:, None, None]*255) + (mean[:, None, None]*255)
    # Convert the tensor back to a NumPy array
    img_numpy = imatge.permute(1, 2, 0).numpy()  # Change from (C, H, W) to (H, W, C)
    cv2_imshow(img_numpy)
    time.sleep(5)
    clear_output(wait=True)

In [230]:
BATCH_SIZE = 64
EPOCHS = 95

train_loader = torch.utils.data.DataLoader(training, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation, batch_size=BATCH_SIZE, shuffle=True)
testing_loader = torch.utils.data.DataLoader(testing, batch_size=BATCH_SIZE, shuffle=True)

model,loss_fn = pick_algorithm(0)
model.to(device)

learning_rate = 1e-2
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#veure_imatges(training,std,mean)

In [231]:
img, target = next(iter(train_loader))
print(img.shape)

torch.Size([64, 3, 224, 224])


In [232]:
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score
from tqdm.auto import tqdm
import pylab as pl

t_loss = np.zeros(EPOCHS)
v_loss = np.zeros(EPOCHS)
acc_t = np.zeros(EPOCHS) #accuracy
acc_v = np.zeros(EPOCHS)
f1_t = np.zeros(EPOCHS) #f1
f1_v = np.zeros(EPOCHS)
recall_t = np.zeros(EPOCHS) #recall
recall_v = np.zeros(EPOCHS)
precision_t = np.zeros(EPOCHS)
precision_v = np.zeros(EPOCHS) #precisió


pbar = tqdm(range(1, EPOCHS + 1))  # tdqm permet tenir text dinàmic

classes = []

def extreu_classes(target):
    for i in target:
      if i not in classes:
        classes.append(i)

for epoch in pbar:

    train_loss = 0
    train_acc = 0
    train_f1 = 0
    train_recall = 0
    train_precision = 0
    val_acc = 0
    val_recall = 0
    val_f1 = 0
    val_loss = 0
    val_precision = 0

    batch_num = 1

    for batch_num, (input_img, target) in tqdm(enumerate(train_loader), desc=f"Batches (Època {epoch})"):

        model.train()

        optimizer.zero_grad()
        #extreu_classes(target.to(device))

        output = model(input_img.to(device))

        # print(target.shape)
        # print(output.shape)

        print(output)
        print(target)
        target = target.to(device)
        loss = loss_fn(output, target.float())


        model.zero_grad()
        loss.backward()

        with torch.no_grad():
            optimizer.step()

        #print(f"Pèrdua entrenament batch: {batch_num} epoch: {epoch+1}  train_loss: {loss.item()}")
        model.eval()

        y_class_predict = torch.argmax(output, dim=1)
        y_class_predict = (y_class_predict.cpu().detach().numpy())
        target = target.cpu().detach().numpy()


        train_acc += accuracy_score(target,y_class_predict)
        train_f1 += f1_score(target,y_class_predict,zero_division=1,average='macro')
        train_recall += recall_score(target,y_class_predict,zero_division=1,average='macro')
        train_precision += precision_score(target,y_class_predict,zero_division=1,average='macro')
        train_loss += loss.item()

    model.eval()
    with torch.no_grad():
        for batch_num, (input_img, target) in enumerate(validation_loader):


            output = model(input_img.to(device))
            target = target.to(device)
            loss = loss_fn(output, target)

            y_class_predict = torch.argmax(output, dim=1)
            y_class_predict = (y_class_predict.cpu().detach().numpy())

            target = target.cpu().detach().numpy()

            val_acc  += accuracy_score(target,y_class_predict)
            val_f1 += f1_score(target,y_class_predict,zero_division=1,average='macro')
            val_recall += recall_score(target,y_class_predict,zero_division=1,average='macro')
            val_precision += precision_score(target,y_class_predict,zero_division=1,average='macro')

            val_loss += loss.item()

            #print(f"Pèrdua entrenament batch validacio: {batch_num} epoch: {epoch+1}  val_loss: {val_loss.item()}")

            # RESULTATS
    train_loss /= len(train_loader)
    t_loss[epoch - 1] = train_loss

    train_acc /= len(train_loader)
    acc_t[epoch - 1] = train_acc

    train_f1 /= len(train_loader)
    f1_t[epoch - 1] = train_f1

    train_recall /= len(train_loader)
    recall_t[epoch - 1] = train_recall

    train_precision /= len(train_loader)
    precision_t[epoch-1] = train_precision

    print(f"Pèrdua entrenament epoch: {epoch}  train_loss: {train_loss}")
    print(f"Accuracy train epoch: {epoch}  train_acc: {train_acc}")
    print(f"F1 train epoch: {epoch}  train_f1: {train_f1}")
    print(f"Recall train epoch: {epoch}  train_recall: {train_recall}")
    print(f"Precision train epoch: {epoch}  train_recall: {train_precision}")

    val_loss /= len(validation_loader)
    v_loss[epoch - 1] = val_loss

    val_acc /= len(validation_loader)
    acc_v[epoch - 1] = val_acc

    val_f1 /= len(validation_loader)
    f1_v[epoch - 1] = val_f1

    val_recall /= len(validation_loader)
    recall_v[epoch - 1] = val_recall

    val_precision /= len(validation_loader)
    precision_v[epoch-1] = val_precision

    print()
    print()
    print(f"Pèrdua validació epoch: {epoch}  val_loss: {val_loss}")
    print(f"Accuracy val epoch: {epoch}  val_acc: {val_acc}")
    print(f"F1 val epoch: {epoch}  val_f1: {val_f1}")
    print(f"Recall val epoch: {epoch}  val_reall: {val_recall}")
    print(f"Precision val epoch: {epoch}  val_reall: {val_precision}")

  0%|          | 0/95 [00:00<?, ?it/s]

Batches (Època 1): 0it [00:00, ?it/s]

tensor([[-0.0405, -0.0338, -0.0376,  ..., -0.0140, -0.0076, -0.0083],
        [-0.0404, -0.0333, -0.0382,  ..., -0.0135, -0.0078, -0.0085],
        [-0.0402, -0.0333, -0.0374,  ..., -0.0138, -0.0076, -0.0081],
        ...,
        [-0.0407, -0.0335, -0.0385,  ..., -0.0136, -0.0077, -0.0088],
        [-0.0406, -0.0336, -0.0380,  ..., -0.0138, -0.0074, -0.0084],
        [-0.0407, -0.0342, -0.0376,  ..., -0.0138, -0.0074, -0.0088]],
       device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([38,  5,  8,  1, 19,  0, 40, 20, 41,  0,  3,  1, 68,  3,  0,  5,  1, 39,
         5, 51,  3, 60,  3,  4, 26,  9,  9, 95, 16, 46, 87, 12, 96,  0, 31, 19,
         0, 91, 28,  5,  3, 65, 12,  0, 10, 55, 82,  5,  5, 28, 36, 94,  3, 94,
        34, 70, 42,  5, 40, 88, 72, 61, 72, 35])


RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'

In [None]:
etiquetes = np.array([])
for i in classes:
  if i.cpu().detach().numpy() not in etiquetes:
    etiquetes = np.append(etiquetes,i.cpu().detach().numpy())
print(np.sort(etiquetes.astype(int)))
print(dictionary)