In [0]:
!wget 'http://foodcam.mobi/dataset100.zip'
!unzip '/content/dataset100.zip'
!rm '/content/dataset100.zip'

In [0]:
import torch
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import os
import numpy as np
import cv2
from tqdm import tqdm

In [0]:
def LoadFoodData(folder_path, n_cls):
  targets = []
  images = []
  boxes, labels, image_id, iscrowd = [], [], [], []
  for i in tqdm(range(1, n_cls+1)):
    info = {}
    path = os.path.join(folder_path, str(i))
    file = open(path + '/bb_info.txt')
    txt = file.read()
    file.close()
    txt = txt.split('\n')
    # Making a dict of text file
    for j in txt[1:]:
      if len(j) > 0:
        temp = j.split(' ')
        info[temp[0]] = [int(x) for x in temp[1:]]
    # For loading images and targets
    for key in info:
      target = {}
      filename = os.path.join(path, key + '.jpg')
      img = cv2.imread(filename)
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      y, x = img.shape[0], img.shape[1]

      x_scalar = 224/x
      y_scalar = 224/y
      x1 = int(info[key][0]*x_scalar)
      y1 = int(info[key][1]*y_scalar)
      x2 = int(info[key][2]*x_scalar)
      y2 = int(info[key][3]*y_scalar)
      poly = [x1, y1, x2, y2]
      area = (poly[0]-poly[2]) * (poly[1]-poly[3])
      poly = torch.tensor(poly)
      poly = torch.unsqueeze(poly, 0)

      target['boxes'] = poly
      target['labels'] = torch.tensor([int(i)])
      target['image_id'] = torch.tensor([int(key)])
      target['area'] = torch.tensor([area])
      target['iscrowd'] = torch.tensor([0])

      images.append(img)
      targets.append(target)

  return images, targets

In [0]:
class FoodData(Dataset):
  def __init__(self, images, targets, transforms=None):
    self.images = images
    self.targets = targets
    self.transforms = transforms

  def __len__(self):
    return len(self.images)


  def __getitem__(self, idx):
    image = self.images[idx]
    target = self.targets[idx]
    image = torchvision.transforms.ToPILImage()(image)
    if self.transforms:
      image = self.transforms(image)
    return image, target

def collate(batch):
  return tuple(zip(*batch))


In [5]:
images, targets = LoadFoodData('/content/UECFOOD100', 100)

train_images, test_images, train_targets, test_targets = train_test_split(images, targets, test_size = 0.2, random_state = 7)

100%|██████████| 100/100 [00:51<00:00,  1.93it/s]


In [0]:
transform = torchvision.transforms.Compose([transforms.Resize((224,224)),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.5,], [0.5,])])
traindata = FoodData(train_images, train_targets, transform)
trainloader = DataLoader(traindata, batch_size=8, shuffle=True, collate_fn=collate, num_workers=4)

In [7]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 100
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


HBox(children=(FloatProgress(value=0.0, max=167502836.0), HTML(value='')))




In [0]:
device = torch.device('cuda')

In [0]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=3e-4)

In [0]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
epochs = 2
iteration = 100
itr = 0
for e in range(epochs):
  for img, tar in tqdm(trainloader):
    img = list(image.to(device) for image in img)
    tar = [{k: v.to(device) for k, v in t.items()} for t in tar]
    
    optimizer.zero_grad()
    loss_dict = model(img, tar)
    losses = sum(loss for loss in loss_dict.values())
    loss_value = losses.item()
    losses.backward()
    optimizer.step()
    
    loss += loss_value
    itr += 1
    if (itr%iteration == 0):
      print('Iteration:{}\tLoss:{}'.format(itr, (loss/iteration)))
      loss = 0


  7%|▋         | 100/1436 [01:27<19:29,  1.14it/s]

Iteration:100	Loss:0.3820470017194748


 14%|█▍        | 200/1436 [02:55<18:04,  1.14it/s]

Iteration:200	Loss:0.29029945001006124


 21%|██        | 300/1436 [04:23<16:39,  1.14it/s]

Iteration:300	Loss:0.26153816491365434


 28%|██▊       | 400/1436 [05:50<15:10,  1.14it/s]

Iteration:400	Loss:0.255713532269001


 35%|███▍      | 500/1436 [07:18<13:41,  1.14it/s]

Iteration:500	Loss:0.25230075791478157


 42%|████▏     | 600/1436 [08:46<12:16,  1.14it/s]

Iteration:600	Loss:0.23918661311268807


 49%|████▊     | 700/1436 [10:14<10:46,  1.14it/s]

Iteration:700	Loss:0.2381303496658802


 56%|█████▌    | 800/1436 [11:42<09:20,  1.13it/s]

Iteration:800	Loss:0.23598519936203957


 63%|██████▎   | 900/1436 [13:10<07:51,  1.14it/s]

Iteration:900	Loss:0.22399501830339433


 70%|██████▉   | 1000/1436 [14:38<06:23,  1.14it/s]

Iteration:1000	Loss:0.23054748743772507


 77%|███████▋  | 1100/1436 [16:06<04:55,  1.14it/s]

Iteration:1100	Loss:0.22830456629395485


 84%|████████▎ | 1200/1436 [17:34<03:27,  1.14it/s]

Iteration:1200	Loss:0.23037888169288634


 91%|█████████ | 1300/1436 [19:02<01:59,  1.14it/s]

Iteration:1300	Loss:0.22287818998098374


 97%|█████████▋| 1400/1436 [20:30<00:31,  1.13it/s]

Iteration:1400	Loss:0.22693948939442635


100%|██████████| 1436/1436 [21:02<00:00,  1.14it/s]
  4%|▍         | 64/1436 [00:56<20:05,  1.14it/s]

Iteration:1500	Loss:0.13075012117624282


 11%|█▏        | 164/1436 [02:24<18:38,  1.14it/s]

Iteration:1600	Loss:0.21514894887804986


 18%|█▊        | 264/1436 [03:52<17:09,  1.14it/s]

Iteration:1700	Loss:0.2088378019630909


 25%|██▌       | 364/1436 [05:20<15:45,  1.13it/s]

Iteration:1800	Loss:0.21027203932404517


 32%|███▏      | 464/1436 [06:47<14:11,  1.14it/s]

Iteration:1900	Loss:0.20341689318418502


 39%|███▉      | 564/1436 [08:15<12:45,  1.14it/s]

Iteration:2000	Loss:0.20612787172198296


 46%|████▌     | 664/1436 [09:43<11:18,  1.14it/s]

Iteration:2100	Loss:0.21196327224373818


 53%|█████▎    | 764/1436 [11:11<09:49,  1.14it/s]

Iteration:2200	Loss:0.21112696528434755


 60%|██████    | 864/1436 [12:39<08:23,  1.14it/s]

Iteration:2300	Loss:0.1974128421396017


 67%|██████▋   | 964/1436 [14:07<06:53,  1.14it/s]

Iteration:2400	Loss:0.2075698482990265


 74%|███████▍  | 1064/1436 [15:35<05:26,  1.14it/s]

Iteration:2500	Loss:0.20126375287771225


 81%|████████  | 1164/1436 [17:03<03:59,  1.14it/s]

Iteration:2600	Loss:0.20630924746394158


 88%|████████▊ | 1264/1436 [18:31<02:31,  1.13it/s]

Iteration:2700	Loss:0.19577991724014282


 95%|█████████▍| 1364/1436 [19:59<01:03,  1.14it/s]

Iteration:2800	Loss:0.19697982504963873


100%|██████████| 1436/1436 [21:03<00:00,  1.14it/s]


In [0]:
torch.save(model.state_dict(), '/content/drive/My Drive/FasterRCNN/fasterrcnn_foodtracker.pth')