In [None]:
import os
import io
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import torchvision
import pandas as pd

In [None]:
class EgoHandsDataset(Dataset):
    '''EgoHands dataset'''
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.images_dir = os.path.join(root_dir, 'all_images')
        self.bounding_boxes_file_name = os.path.join(root_dir, 'all_images_bounding_boxes.csv')
        self.bounding_boxes = pd.read_csv(self.bounding_boxes_file_name)
        self.transform = transform

    def __len__(self):
        return len(os.listdir(self.images_dir))

    def __getitem__(self, idx):
        '''
        Funzione per ottenere un elemento del dataset

        :param idx: Indice dell'elemento
        :return: Una tupla (immagine, bounding_boxes) dove bounding_boxes è un tensore
        '''
        if torch.is_tensor(idx):
            idx = idx.tolist()

        x = self.bounding_boxes.iloc[idx]
        img_name = os.path.join(self.images_dir, x['image_name'])
        image = cv.imread(img_name)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image = np.moveaxis(image, 2, 0)
        image = image.astype(np.float32) / 255.0
        image = torch.from_numpy(image)

        if self.transform:
            image = self.transform(image)

        bounding_boxes = torch.zeros((4, 4))
        bounding_boxes[0] = torch.from_numpy(
            x.loc[['box0_x1', 'box0_y1', 'box0_x2', 'box0_y2']].values.astype(np.float32))
        bounding_boxes[1] = torch.from_numpy(
            x.loc[['box1_x1', 'box1_y1', 'box1_x2', 'box1_y2']].values.astype(np.float32))
        bounding_boxes[2] = torch.from_numpy(
            x.loc[['box2_x1', 'box2_y1', 'box2_x2', 'box2_y2']].values.astype(np.float32))
        bounding_boxes[3] = torch.from_numpy(
            x.loc[['box3_x1', 'box3_y1', 'box3_x2', 'box3_y2']].values.astype(np.float32))

        bounding_boxes = bounding_boxes[~torch.isnan(bounding_boxes[:, 0]), :]

        target = {
            'boxes': bounding_boxes,
            'labels': torch.full((bounding_boxes.shape[0],), 1)
        }

        return image, target

    @staticmethod
    def egohands_collate_fn(data):
        tmp = tuple(zip(*data))
        images = tmp[0]
        images = torch.stack(images, dim=0)
        return images, tmp[1]

In [None]:
# montare Google Drive. Si può fare dall'interfaccia grafica del sito

In [None]:
dataset = EgoHandsDataset('drive/MyDrive/egohands_data')

In [None]:
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(num_classes=2)

Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth


  0%|          | 0.00/21.1M [00:00<?, ?B/s]

In [None]:
loader = DataLoader(dataset, batch_size=10, shuffle=True, collate_fn=EgoHandsDataset.egohands_collate_fn, num_workers=8)

  cpuset_checked))


In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
model = model.to(device)

In [None]:
epochs = 1

In [None]:
def convert_targets_to_device(targets, device):
  for x in targets:
    x['boxes'] = x['boxes'].to(device)
    x['labels'] = x['labels'].to(device)
  return targets

In [None]:
params = [p for p in model.parameters() if p.requires_grad]
opt = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

In [None]:
for e in range(epochs):
  for i, (images, targets) in enumerate(tqdm(loader)):
    images = images.to(device)
    targets = convert_targets_to_device(targets, device)
    #import pdb; pdb.set_trace()

    out = model(images, targets)

    if i % 20 == 0:
      print(out)
    
    opt.zero_grad()

    for idx, key in enumerate(out.keys()):
      if idx < len(out) - 1:
        out[key].backward(retain_graph=True)
      else:
        out[key].backward()
    
    opt.step()

  0%|          | 0/480 [00:00<?, ?it/s]

  cpuset_checked))
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


{'loss_classifier': tensor(0.7549, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0884, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.7029, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(0.0392, device='cuda:0', grad_fn=<DivBackward0>)}
{'loss_classifier': tensor(0.1887, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.2587, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.6194, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(0.0228, device='cuda:0', grad_fn=<DivBackward0>)}
{'loss_classifier': tensor(0.2275, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.3444, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.1896, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(0.0409, device='cuda:0', grad_fn=<DivBackward0>)}
{'loss_cla

In [None]:
torch.save(model.state_dict(), 'drive/MyDrive/salvataggi_pytorch/trained_one_epoch/model_state_dict')

In [None]:
torch.save(model, 'drive/MyDrive/salvataggi_pytorch/trained_one_epoch/model')

In [None]:
torch.save(opt.state_dict(), 'drive/MyDrive/salvataggi_pytorch/trained_one_epoch/optimizer_state_dict')

In [None]:
for key in out.keys():
  torch.save(out[key], 'drive/MyDrive/salvataggi_pytorch/trained_one_epoch/' + str(key))