In [0]:
!wget 'http://foodcam.mobi/dataset100.zip'
!unzip '/content/dataset100.zip'
!rm '/content/dataset100.zip'

In [0]:
import torch
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import os
import numpy as np
import cv2
from tqdm import tqdm

In [0]:
def LoadFoodData(folder_path, n_cls):
  targets = []
  images = []
  boxes, labels, image_id, iscrowd = [], [], [], []
  for i in tqdm(range(1, n_cls+1)):
    info = {}
    path = os.path.join(folder_path, str(i))
    file = open(path + '/bb_info.txt')
    txt = file.read()
    file.close()
    txt = txt.split('\n')
    # Making a dict of text file
    for j in txt[1:]:
      if len(j) > 0:
        temp = j.split(' ')
        info[temp[0]] = [int(x) for x in temp[1:]]
    # For loading images and targets
    for key in info:
      target = {}
      filename = os.path.join(path, key + '.jpg')
      img = cv2.imread(filename)
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      y, x = img.shape[0], img.shape[1]

      x_scalar = 224/x
      y_scalar = 224/y
      x1 = int(info[key][0]*x_scalar)
      y1 = int(info[key][1]*y_scalar)
      x2 = int(info[key][2]*x_scalar)
      y2 = int(info[key][3]*y_scalar)
      poly = [x1, y1, x2, y2]
      area = (poly[0]-poly[2]) * (poly[1]-poly[3])
      poly = torch.tensor(poly)
      poly = torch.unsqueeze(poly, 0)

      target['boxes'] = poly
      target['labels'] = torch.tensor([int(i)])
      target['image_id'] = torch.tensor([int(key)])
      target['area'] = torch.tensor([area])
      target['iscrowd'] = torch.tensor([0])

      images.append(img)
      targets.append(target)

  return images, targets

In [0]:
class FoodData(Dataset):
  def __init__(self, images, targets, transforms=None):
    self.images = images
    self.targets = targets
    self.transforms = transforms

  def __len__(self):
    return len(self.images)


  def __getitem__(self, idx):
    image = self.images[idx]
    target = self.targets[idx]
    image = torchvision.transforms.ToPILImage()(image)
    if self.transforms:
      image = self.transforms(image)
    return image, target

def collate(batch):
  return tuple(zip(*batch))


In [5]:
images, targets = LoadFoodData('/content/UECFOOD100', 100)

train_images, test_images, train_targets, test_targets = train_test_split(images, targets, test_size = 0.2, random_state = 7)

100%|██████████| 100/100 [00:58<00:00,  1.71it/s]


In [0]:
transform = torchvision.transforms.Compose([transforms.Resize((224,224)),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.5,], [0.5,])])
traindata = FoodData(train_images, train_targets, transform)
trainloader = DataLoader(traindata, batch_size=16, shuffle=True, collate_fn=collate, num_workers=4)

In [0]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 100
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [0]:
device = torch.device('cuda')

In [0]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=3e-4)

In [0]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
epochs = 2
iteration = 100
itr = 0
loss = 0
for e in range(epochs):
  for img, tar in tqdm(trainloader):
    img = list(image.to(device) for image in img)
    tar = [{k: v.to(device) for k, v in t.items()} for t in tar]
    
    optimizer.zero_grad()
    loss_dict = model(img, tar)
    losses = sum(loss for loss in loss_dict.values())
    loss_value = losses.item()
    losses.backward()
    optimizer.step()
    
    loss += loss_value
    itr += 1
    if (itr%iteration == 0):
      print('\t\tIteration:{}\t\tLoss:{}'.format(itr, (loss/iteration)))
      loss = 0

 14%|█▍        | 100/718 [02:35<15:58,  1.55s/it]

		Iteration:100		Loss:0.3440188483893871


 28%|██▊       | 200/718 [05:10<13:28,  1.56s/it]

		Iteration:200		Loss:0.2717272047698498


 42%|████▏     | 300/718 [07:46<10:50,  1.56s/it]

		Iteration:300		Loss:0.252243183106184


 56%|█████▌    | 400/718 [10:21<08:12,  1.55s/it]

		Iteration:400		Loss:0.2367002995312214


 70%|██████▉   | 500/718 [12:56<05:38,  1.55s/it]

		Iteration:500		Loss:0.22806488513946532


 84%|████████▎ | 600/718 [15:31<03:02,  1.55s/it]

		Iteration:600		Loss:0.22561468422412873


 97%|█████████▋| 700/718 [18:06<00:27,  1.55s/it]

		Iteration:700		Loss:0.22086596131324768


100%|██████████| 718/718 [18:34<00:00,  1.55s/it]
 11%|█▏        | 82/718 [02:07<16:30,  1.56s/it]

		Iteration:800		Loss:0.21325237050652504


 25%|██▌       | 182/718 [04:43<13:54,  1.56s/it]

		Iteration:900		Loss:0.20519811049103737


 39%|███▉      | 282/718 [07:19<11:18,  1.56s/it]

		Iteration:1000		Loss:0.2073324005305767


 53%|█████▎    | 382/718 [09:54<08:42,  1.55s/it]

		Iteration:1100		Loss:0.20506529226899148


 67%|██████▋   | 482/718 [12:29<06:06,  1.55s/it]

		Iteration:1200		Loss:0.21537565916776658


 81%|████████  | 582/718 [15:05<03:31,  1.56s/it]

		Iteration:1300		Loss:0.1996181422472


 95%|█████████▍| 682/718 [17:41<00:55,  1.55s/it]

		Iteration:1400		Loss:0.1978827513754368


100%|██████████| 718/718 [18:38<00:00,  1.56s/it]


In [0]:
torch.save(model.state_dict(), '/content/drive/My Drive/FasterRCNN/fasterrcnn_foodtracker.pth')