In [1]:
import cv2
import json
import matplotlib.pyplot as plt
import pandas as pd
import random
import shutil


from glob import glob
from tqdm import tqdm

**Mounted Drive**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!unzip /content/drive/MyDrive/miniProject/benetech-making-graphs-accessible.zip -d dataset

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: dataset/train/images/eaed09039206.jpg  
  inflating: dataset/train/images/eaed1e245167.jpg  
  inflating: dataset/train/images/eaedb251e670.jpg  
  inflating: dataset/train/images/eaeead418936.jpg  
  inflating: dataset/train/images/eaeeb1c2ecdd.jpg  
  inflating: dataset/train/images/eaef70ba842d.jpg  
  inflating: dataset/train/images/eaf036991cb3.jpg  
  inflating: dataset/train/images/eaf06e9e07db.jpg  
  inflating: dataset/train/images/eaf20f7d8945.jpg  
  inflating: dataset/train/images/eaf3a9a87868.jpg  
  inflating: dataset/train/images/eaf3d05d20a6.jpg  
  inflating: dataset/train/images/eaf3d24fb032.jpg  
  inflating: dataset/train/images/eaf3f734b5ce.jpg  
  inflating: dataset/train/images/eaf467f871a2.jpg  
  inflating: dataset/train/images/eaf65b4e3c45.jpg  
  inflating: dataset/train/images/eaf7f876411a.jpg  
  inflating: dataset/train/images/eafa35356cf8.jpg  
  inflating: dataset/train/images/

#Create dataset

In [4]:
WIDTH = 480
HEIGHT = 480

In [5]:
def extract_plot_bb(data,image = "None"):
    w = data["plot-bb"]["width"]
    h = data["plot-bb"]["height"]
    x0 = data["plot-bb"]["x0"]
    y0 = data["plot-bb"]["y0"]
    x1 = data["plot-bb"]["y0"] + w
    y1 = data["plot-bb"]["y0"] + h
    image_w,image_h,c = image.shape
    x0 = max((x0/image_h) * WIDTH,0)
    y0 = max((y0/image_w) * HEIGHT,0)
    x1 = max(min((x1/image_h) * WIDTH,WIDTH),x0+0.01)
    y1 = max(min((y1/image_w) * HEIGHT,HEIGHT),y0+0.01)
    area = (y1 - y0) * (x1 - x0)
    return [x0, y0, x1, y1,area, "plot-bb"]



def extract_label_bbox(data, rows, img_id,image = "None"):
    common = []
    if data["chart-type"] != 'scatter':
      return None
    if img_id == '3ef41bbc82c3':
      return None
    common+= [img_id, data["source"], data["chart-type"]]
    # TRAIN_FOLDER = "/content/dataset/train/images/"
    # image = cv2.imread(TRAIN_FOLDER + img_id + ".jpg")
    image_w,image_h,c = image.shape
    # print(image_w,image_h)
    for box in data["text"]:
        # x0 = box["polygon"]["x0"]
        # y0 = box["polygon"]["y1"]
        # w = abs(box["polygon"]["x1"] - box["polygon"]["x0"]) + abs(box["polygon"]["x2"] - box["polygon"]["x1"])
        # h = abs(box["polygon"]["y3"] - box["polygon"]["y1"])
        # x0 = box["polygon"]["x0"]
        # y0 = box["polygon"]["y0"]
        # w = box["polygon"]["x1"] - box["polygon"]["x0"]
        # h = box["polygon"]["y3"] - box["polygon"]["y0"]
        # print(box["polygon"]["x2"],box["polygon"]["x0"],box["polygon"]["y2"],box["polygon"]["y0"])
        x0 = box["polygon"]["x0"] 
        y0 = box["polygon"]["y0"]
        x1 = box["polygon"]["x2"] 
        y1 = box["polygon"]["y2"]

        x0 = max((x0/image_h) * WIDTH,0)
        y0 = max((y0/image_w) * HEIGHT,0)
        x1 = max(min((x1/image_h) * WIDTH,WIDTH),x0+0.01)
        y1 = max(min((y1/image_w) * HEIGHT,HEIGHT),y0+0.01)
        area = (y1 - y0) * (x1 - x0)
        label = box["role"]
        # print(label)
        box_row = common + [x0, y0, x1, y1,area, label]
        rows.append(box_row)
    rows.append(common + extract_plot_bb(data,image)) 
    return rows
        
def extract_tick_bbox(data, rows, img_id,image = "None"):
    common = []
    common+= [img_id , data["source"], data["chart-type"]]
    # TRAIN_FOLDER = "/content/dataset/train/images/"
    # image = cv2.imread(TRAIN_FOLDER + img_id + ".jpg")
    image_w,image_h,c = image.shape
    for axis in data["axes"].keys():
        for box in data["axes"][axis]["ticks"]:
            x0 = box["tick_pt"]["x"] - 2.5
            y0 = box["tick_pt"]["y"] - 2.5
            x1 = box["tick_pt"]["x"] + 2.5
            y1 = box["tick_pt"]["y"] + 2.5

            x0 = max((x0/image_h) * WIDTH,0)
            y0 = max((y0/image_w) * HEIGHT,0)
            x1 = max(min((x1/image_h) * WIDTH,WIDTH),x0+0.01)
            y1 = max(min((y1/image_w) * HEIGHT,HEIGHT),y0+0.01)

            label = axis + "-tick"
            area = (y1 - y0) * (x1 - x0)
            box_row = common + [x0, y0, x1, y1,area,label]
            rows.append(box_row)

    for axis in data["visual-elements"]["scatter points"]:
        for box in axis:

            x0 = box["x"] - 5
            y0 = box["y"] - 5
            x1 = box["x"] + 5
            y1 = box["y"] + 5

            x0 = max((x0/image_h) * WIDTH,0)
            y0 = max((y0/image_w) * HEIGHT,0)
            x1 = max(min((x1/image_h) * WIDTH,WIDTH),x0+0.01)
            y1 = max(min((y1/image_w) * HEIGHT,HEIGHT),y0+0.01)
            area = (y1 - y0) * (x1 - x0)
            label = "visual-point"
            box_row = common + [x0, y0, x1, y1,area,label]
            rows.append(box_row)

    return rows

In [6]:
ANNOTATION = "/content/dataset/train/annotations/*.json"
TRAIN_FOLDER = "/content/dataset/train/images/"
rows = []
for file_name in tqdm(glob(ANNOTATION)):
    label_bbox = []
    tick_bbox = []
    with open(file_name) as f:
        data = json.load(f)
        img_id = file_name.split("/")[-1].split(".")[0]
        image = cv2.imread(TRAIN_FOLDER + img_id + ".jpg")
        label_bbox = extract_label_bbox(data, label_bbox, img_id,image)
        if label_bbox == None:
           continue
        tick_bbox = extract_tick_bbox(data, tick_bbox, img_id,image)
        # print(label_bbox)
        # print(tick_bbox)
        data_rows = label_bbox + tick_bbox
    rows += data_rows
    # break

100%|██████████| 60578/60578 [02:02<00:00, 495.04it/s]


In [7]:
df = pd.DataFrame(rows)
df.columns = ["image_id", "source", "chart", "x0", "y0", "x1", "y1", "area","label"]
df.head()

Unnamed: 0,image_id,source,chart,x0,y0,x1,y1,area,label
0,a78edf66d27c,generated,scatter,68.571429,9.6,413.333333,58.971429,17021.387755,chart_title
1,a78edf66d27c,generated,scatter,8.571429,58.971429,20.0,408.685714,3996.734694,axis_title
2,a78edf66d27c,generated,scatter,237.142857,451.2,280.952381,467.657143,720.979592,axis_title
3,a78edf66d27c,generated,scatter,41.904762,421.028571,67.619048,434.742857,352.653061,tick_label
4,a78edf66d27c,generated,scatter,107.619048,421.028571,133.333333,434.742857,352.653061,tick_label


In [None]:
df.to_csv('res.csv')

In [None]:
df = pd.read_csv('res.csv',index_col=0)

In [None]:
df.head()

Unnamed: 0,image_id,source,chart,x0,y0,x1,y1,area,label
0,a16e20985f9f,generated,scatter,66.95279,8.791209,414.077253,36.923077,9765.259633,chart_title
1,a16e20985f9f,generated,scatter,233.819742,446.593407,281.201717,471.208791,1166.32552,axis_title
2,a16e20985f9f,generated,scatter,7.2103,186.373626,19.570815,277.802198,1130.104231,axis_title
3,a16e20985f9f,generated,scatter,42.23176,413.186813,69.012876,432.527473,517.964439,tick_label
4,a16e20985f9f,generated,scatter,100.944206,413.186813,127.725322,430.769231,470.876763,tick_label


In [8]:
df[df['y1'] > 480] 

Unnamed: 0,image_id,source,chart,x0,y0,x1,y1,area,label


#Model Setup

In [9]:
!pip install pycocotools
!pip install --upgrade omegaconf
!pip install albumentations
!pip install colorama
!pip install timm
!pip install effdet

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting omegaconf
  Downloading omegaconf-2.3.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting antlr4-python3-runtime==4.9.* (from omegaconf)
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25l[?25hdone
  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=454ee9914afbd2bb060ccbc83ac4a55039d6afc473fb31

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.9.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 M

In [10]:
def get_stoi(df):
    """Get String to Index dictionary"""
    stoi = {}
    for idx, string in enumerate(df.label.unique()):
        stoi[string] = idx + 1
    itos = {item[1]: item[0] for item in stoi.items()}
    df = df.replace({"label": stoi})
    return stoi

stoi = get_stoi(df)
print(stoi)
NUM_CLASSES =  len(stoi)
df = df[df["label"].isin(list(stoi.keys()))]
df = df.replace({"label": stoi})

{'chart_title': 1, 'axis_title': 2, 'tick_label': 3, 'plot-bb': 4, 'x-axis-tick': 5, 'y-axis-tick': 6, 'visual-point': 7, 'other': 8, 'tick_grouping': 9}


In [11]:
!pip install pycocotools --quiet
!git clone https://github.com/pytorch/vision.git
!git checkout v0.3.0

!cp vision/references/detection/utils.py ./
!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./

Cloning into 'vision'...
remote: Enumerating objects: 337140, done.[K
remote: Counting objects: 100% (53372/53372), done.[K
remote: Compressing objects: 100% (1269/1269), done.[K
remote: Total 337140 (delta 52685), reused 52644 (delta 52075), pack-reused 283768[K
Receiving objects: 100% (337140/337140), 676.32 MiB | 30.43 MiB/s, done.
Resolving deltas: 100% (310536/310536), done.
fatal: not a git repository (or any of the parent directories): .git


In [12]:
import albumentations as A
import copy
import cv2
import gc
import matplotlib.pyplot as plt
import multiprocessing
import numpy as np
import os
import pandas as pd
import PIL
import random
import time
import timm
import torch
import torch.nn as nn

from sklearn.model_selection import train_test_split
from albumentations.pytorch.transforms import ToTensorV2
from colorama import Fore, Back, Style
from datetime import datetime, timedelta
from glob import glob
from pprint import pprint
from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.optim import lr_scheduler
from tqdm import tqdm
import utils

c_  = Fore.GREEN
sr_ = Style.RESET_ALL
print(f"There are {multiprocessing.cpu_count()} CPUs available")

There are 2 CPUs available


In [13]:
NUM_CLASSES

9

In [14]:
train_path = "/content/dataset/train/images/"
class CustDat(torch.utils.data.Dataset):
    def __init__(self,df,unique_imgs,indices,transform=None):
      self.df = df
      self.unique_imgs = unique_imgs
      self.indices = indices
      self.transform = transform
    def __len__(self):
      return len(self.indices)
    def __getitem__(self,idx):
      image_name = self.unique_imgs[self.indices[idx]]
      boxes=self.df[self.df.image_id == image_name].values[:,3:7].astype('float')
      boxes = torch.as_tensor(boxes,dtype = torch.float32)

      labels = self.df[self.df.image_id == image_name].values[:,-1:].astype('int')
      labels = [ele[0] for ele in labels]
      labels = torch.as_tensor(labels,dtype=torch.int64)

      area = self.df[self.df.image_id == image_name].values[:,-2:-1].astype('float')
      area = [ele[0] for ele in area]
      area = torch.as_tensor(area,dtype=torch.float32)

      iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)      
      
      img = cv2.imread(f"/content/dataset/train/images/{image_name}.jpg")
      img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
      img_res = cv2.resize(img_rgb, (WIDTH, HEIGHT), cv2.INTER_AREA)
      # img_res /= 255.0 #uncomment for validation

      target = {}
      target["boxes"] = torch.as_tensor(boxes)
      target["labels"] = labels
      
      target["area"] = area #uncomment for validation
      target["iscrowd"] = iscrowd #uncomment for validation
      if self.transform is not None:
         sample = self.transform(image = img_res,
                                     bboxes = target['boxes'],
                                     labels = labels)
        
         img_res = sample['image']
         target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
         target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  

      target["img_size"] = (HEIGHT, WIDTH)
      target["img_scale"] = torch.tensor([1.0])
      target['index'] = torch.tensor([idx])
      target["image_id"] = torch.tensor([idx])
      return img_res, target

In [15]:
unique_image = df.image_id.unique()

In [16]:
train_ids, val_ids = train_test_split(range(unique_image.shape[0]),test_size = 0.1)
dataset = CustDat(df, unique_image, train_ids)

In [None]:
img,target,idx = dataset[0]

In [None]:
target

In [17]:
train_ids, val_ids = train_test_split(range(unique_image.shape[0]),test_size = 0.2)
val_ids, test_ids = train_test_split(val_ids,test_size = 0.5)

**Configuration**

In [18]:
class config:
    BATCH_SIZE_TRAIN = 4
    BATCH_SIZE_VALID = 2
    DEBUG = False
    DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    EPOCHS = 5
    FOLDS = 5
    LR = 2e-4
    MIN_LR = 1e-6
    NUM_WORKERS = multiprocessing.cpu_count()
    RESOLUTION = 480
    SAMPLE = 30_000
    SEED = 42
    SCHEDULER = 'CosineAnnealingLR'
    T_0 = 25
    T_MAX = int(30_000/BATCH_SIZE_TRAIN*EPOCHS)+50
    WARMUP_EPOCHS = 0
    WEIGHT_DECAY = 1e-6
    
    
class paths:
    TRAIN_ANNOTATIONS_FOLDER = "/content/dataset/train/annotations/"
    TRAIN_IMAGES_FOLDER = "/content/dataset/train/images/"

In [19]:
def get_train_transforms():
    return A.Compose(
        [
            A.Resize(height=config.RESOLUTION, width=config.RESOLUTION, p=1),
            A.Normalize(p=1),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc', min_area=0,  min_visibility=0, label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=config.RESOLUTION, width=config.RESOLUTION, p=1.0),
            A.Normalize(p=1),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']
        )
    )

In [None]:
dataset_val[0]

In [20]:
dataset_train = CustDat(df, unique_image, train_ids,get_train_transforms())
dataset_test = CustDat(df, unique_image, val_ids, get_valid_transforms())
dataset_val = CustDat(df, unique_image, test_ids,  get_valid_transforms())

# define training and validation data loaders
def collate_fn(batch):
    return tuple(zip(*batch))

data_loader_train = torch.utils.data.DataLoader(dataset_train, 
                              batch_size=config.BATCH_SIZE_TRAIN if not config.DEBUG else 20, 
                              num_workers=config.NUM_WORKERS,
                              sampler=RandomSampler(dataset_train),
                              pin_memory=False, drop_last=False, collate_fn=collate_fn)
data_loader_val = torch.utils.data.DataLoader(dataset_val,
                          batch_size=config.BATCH_SIZE_VALID if not config.DEBUG else 20, 
                          num_workers=config.NUM_WORKERS,
                          sampler=SequentialSampler(dataset_val),
                          shuffle=False, pin_memory=True, collate_fn=collate_fn)

# data_loader_train = torch.utils.data.DataLoader(
#     dataset_train, batch_size=10, shuffle=True, num_workers=4,
#     collate_fn=utils.collate_fn)

# data_loader_test = torch.utils.data.DataLoader(
#     dataset_test, batch_size=10, shuffle=False, num_workers=4,
#     collate_fn=utils.collate_fn)

# data_loader_val = torch.utils.data.DataLoader(
#     dataset_val, batch_size=10, shuffle=False, num_workers=4,
#     collate_fn=utils.collate_fn)

In [None]:
next(iter(data_loader_val))

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f90038e48b0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f90038e48b0>    
self._shutdown_workers()Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    
if w.is_alive():
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
      File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
assert self._parent_pid == os.getpid(), 'can only test a

[[tensor([[[2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
           [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
           [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
           ...,
           [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
           [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
           [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489]],
  
          [[2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
           [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
           [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
           ...,
           [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
           [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
           [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286]],
  
          [[2.6400, 2.6400, 2.6400,  ..., 2.6400, 2.6400, 2.6400],
           [2.6400, 2.6400, 2.6400,  ..., 2.6400, 2.6400, 2.6400],
           [2.6400, 2.

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(config.SEED)

**Scheduler**

In [21]:
def fetch_scheduler(optimizer):
    if config.SCHEDULER == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=config.T_MAX, 
                                                   eta_min=config.MIN_LR)
    elif config.SCHEDULER == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=config.T_0, 
                                                             eta_min=config.MIN_LR)
    elif config.SCHEDULER == 'ReduceLROnPlateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.1,
                                                   patience=7,
                                                   threshold=0.0001,
                                                   min_lr=config.MIN_LR,)
    elif config.SCHEDULER == 'ExponentialLR':
        scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.85)
    elif config.SCHEDULER == None:
        return None
        
    return scheduler

from effdet.config.model_config import efficientdet_model_param_dict
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchPredict
from effdet.efficientdet import HeadNet
from effdet.config.model_config import efficientdet_model_param_dict

def create_model(num_classes=NUM_CLASSES, image_size=480,
                 architecture="tf_efficientnetv2_s", verbose=False):
    
    efficientdet_model_param_dict['tf_efficientnetv2_s'] = dict(
        name='tf_efficientnetv2_s',
        backbone_name='tf_efficientnetv2_s',
        backbone_args=dict(drop_path_rate=0.2),
        num_classes=num_classes,
        url='')
    
    cfg = get_efficientdet_config(architecture)
    cfg.update({'num_classes': num_classes})
    cfg.update({'image_size': (image_size, image_size)})
    
    if verbose:
        pprint(cfg)

    net = EfficientDet(cfg, pretrained_backbone=True)
    net.class_net = HeadNet(
        cfg,
        num_outputs=cfg.num_classes,
    )
    return DetBenchTrain(net, cfg)


def load_model(model_weights_path, model):
    """
    Load model weights.
    """
    model.load_state_dict(torch.load(model_weights_path))
    model.eval()
    return model

**Average Meter**

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        """Initializes an instance by reseting its values"""
        self.reset()

    def reset(self):
        """Resets all values to zero"""
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        """
        Tracks values, count, sum and average.
        :param val: usually the loss function value.
        :param n: usually the number of samples.
        """
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

**Training Function**

In [None]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train() # Set model in training mode
    loss_meter = AverageMeter() # Create instance
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Train ') # Progress bar
    for step, (images, targets, image_ids) in pbar:
        # === Collate ===
        images = torch.stack(images).to(device).float() # Get images (batch_size, 3, RESOLUTION, RESOLUTION)
        batch_size = images.shape[0] # Get batch size
        boxes = [target['boxes'].to(device).float() for target in targets] # Get bounding boxes
        labels = [target['labels'].to(device).float() for target in targets] # Get labels (tuple with strings)
        img_size = torch.tensor([target["img_size"] for target in targets]).to(device).float()
        img_scale = torch.tensor([target["img_scale"] for target in targets]).to(device).float()
        annotations = {
            "bbox": boxes,
            "cls": labels,
            "img_size": img_size,
            "img_scale": img_scale
        }
        optimizer.zero_grad() # Zero out gradients
        loss = model(images, annotations) # Forward pass
        loss = loss["loss"]
        loss.backward() # Back propagation
        # Since the reduction type of the loss is "mean" we multiply by batch_size
        loss_meter.update(loss.detach().item(), batch_size) # Update loss
        optimizer.step() # Update params
        scheduler.step() # Update learning rate
        
        # === Evaluate model ===
        
        mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # Track memory
        current_lr = optimizer.param_groups[0]['lr'] # Get current Learning Rate
        pbar.set_postfix(train_loss=f'{loss_meter.avg:0.4f}',
                         lr=f'{current_lr:0.5f}',
                         gpu_mem=f'{mem:0.2f} GB')
    # === Release memory ===
    torch.cuda.empty_cache()
    gc.collect()
    
    return loss_meter

In [None]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval() # Set model in evaluation mode
    loss_meter = AverageMeter() # Create instance
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Valid ') # Progress bar
    for step, (images, targets, image_ids) in pbar:  
        # === Collate ===
        images = torch.stack(images).to(device).float() # Get images
        batch_size = images.shape[0] # Get batch size
        boxes = [target['boxes'].to(device).float() for target in targets] # Get boxes
        labels = [target['labels'].to(device).float() for target in targets] # Get labels
        img_size = torch.tensor([target["img_size"] for target in targets]).to(device).float()
        img_scale = torch.tensor([target["img_scale"] for target in targets]).to(device).float()
        
        annotations = {
            "bbox": boxes,
            "cls": labels,
            "img_size": img_size,
            "img_scale": img_scale
        }
        loss = model(images, annotations) # Forward pass
        loss = loss["loss"]
        loss_meter.update(loss.detach().item(), batch_size) # Update loss
        # === Evaluate model ===
        
        mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # Track memory
        current_lr = optimizer.param_groups[0]['lr'] # Get current learning rate
        pbar.set_postfix(valid_loss=f'{loss_meter.avg:0.4f}',
                         lr=f'{current_lr:0.5f}',
                         gpu_memory=f'{mem:0.2f} GB')
    # === Release memory ===
    torch.cuda.empty_cache()
    gc.collect()
    
    return loss_meter

**Train-loop**

In [None]:
def train_loop(model, optimizer, scheduler, device, num_epochs, train_loader, valid_loader):
    f = open(f"/content/drive/MyDrive/miniProject/log.txt", "w+") # Create log file
    
    if torch.cuda.is_available(): # Check if GPU is available
        print("Cuda: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time() # Track execution time
    best_model_weights = copy.deepcopy(model.state_dict())
    epochs = config.EPOCHS
    best_loss = 1e10
    for epoch in range(1, epochs + 1):
        print(f'Epoch {epoch}/{num_epochs}', end='')
        loss_meter_train = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=config.DEVICE, epoch=epoch)
        
        loss_meter_valid = valid_one_epoch(model, valid_loader, 
                                           device=config.DEVICE, 
                                           epoch=epoch)
        
        duration = str(timedelta(seconds=time.time() - start))[:7]
        # === Print to log file ===
        with open(f"/content/drive/MyDrive/miniProject/log.txt", 'a+') as f:
            print('{} | Epoch: {}/{} | Train Loss: {:.4} '. \
            format(duration, epoch + 1, epochs, loss_meter_train.avg), file=f)
            print('{} | Epoch: {}/{} | Valid Loss: {:.4}'. \
            format(duration, epoch + 1, epochs, loss_meter_valid.avg), file=f)
            print("\n" + "-"*100 + "\n", file=f)
        
        # === Save model if there is an improvement ===
        if loss_meter_valid.avg < best_loss:
            best_loss = loss_meter_valid.avg
            best_epoch = epoch
            best_model_weights = copy.deepcopy(model.state_dict())
            PATH = f"/content/drive/MyDrive/miniProject/best_epoch-0.bin"
            torch.save(model.state_dict(), PATH)
            print(f"Model Saved | Best Epoch {best_epoch} | Best Loss {round(best_loss,2)} {sr_}")
            
        last_model_wts = copy.deepcopy(model.state_dict())
        PATH = f"last_epoch-0.bin"
        torch.save(model.state_dict(), PATH)
        print(); print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    
    return model

In [None]:
model = create_model()
model.to(config.DEVICE)
optimizer = torch.optim.AdamW(model.parameters(), lr=config.LR, weight_decay=config.WEIGHT_DECAY)
scheduler = fetch_scheduler(optimizer)

Downloading model.safetensors:   0%|          | 0.00/86.5M [00:00<?, ?B/s]

In [None]:
model = train_loop(model, optimizer, scheduler,
                    device=config.DEVICE,
                    num_epochs=config.EPOCHS,
                    train_loader = data_loader_train,valid_loader = data_loader_val)

Cuda: Tesla T4

Epoch 1/5

Train :   7%|▋         | 155/2249 [03:30<24:25,  1.43it/s, gpu_mem=8.43 GB, lr=0.00020, train_loss=455.5456]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f90038e48b0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f90038e48b0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-pack

Model Saved | Best Epoch 1 | Best Loss 0.77 [0m


Epoch 2/5

Train : 100%|██████████| 2249/2249 [33:11<00:00,  1.13it/s, gpu_mem=4.36 GB, lr=0.00019, train_loss=0.4152]
Valid : 100%|██████████| 563/563 [03:55<00:00,  2.39it/s, gpu_memory=0.90 GB, lr=0.00019, valid_loss=0.2665]


Model Saved | Best Epoch 2 | Best Loss 0.27 [0m


Epoch 3/5

Train : 100%|██████████| 2249/2249 [32:41<00:00,  1.15it/s, gpu_mem=4.37 GB, lr=0.00018, train_loss=0.2211]
Valid : 100%|██████████| 563/563 [03:55<00:00,  2.39it/s, gpu_memory=0.90 GB, lr=0.00018, valid_loss=0.1990]


Model Saved | Best Epoch 3 | Best Loss 0.2 [0m


Epoch 4/5

Train : 100%|██████████| 2249/2249 [32:44<00:00,  1.14it/s, gpu_mem=4.37 GB, lr=0.00017, train_loss=0.1801]
Valid : 100%|██████████| 563/563 [03:53<00:00,  2.41it/s, gpu_memory=0.90 GB, lr=0.00017, valid_loss=0.1697]


Model Saved | Best Epoch 4 | Best Loss 0.17 [0m


Epoch 5/5

Train :   2%|▏         | 45/2249 [00:42<34:30,  1.06it/s, gpu_mem=4.37 GB, lr=0.00017, train_loss=0.1607]


KeyboardInterrupt: ignored

In [22]:
PATH = f"/content/drive/MyDrive/miniProject/last.bin"
# torch.save(model.state_dict(), PATH)

In [34]:
def create_model(num_classes=NUM_CLASSES, image_size=480,
                 architecture="tf_efficientnetv2_s", verbose=False):
    efficientdet_model_param_dict['tf_efficientnetv2_s'] = dict(
        name='tf_efficientnetv2_s',
        backbone_name='tf_efficientnetv2_s',
        backbone_args=dict(drop_path_rate=0.2),
        num_classes=num_classes,
        url='', )
    
    cfg = get_efficientdet_config(architecture)
    cfg.update({'num_classes': num_classes})
    cfg.update({'image_size': (image_size, image_size)})
    
    if verbose:
        pprint(cfg)

    net = EfficientDet(cfg, pretrained_backbone=True)
    net.class_net = HeadNet(
        cfg,
        num_outputs=cfg.num_classes,
    )
    return DetBenchPredict(net)

def load_model(model_weights_path, model):
    """
    Load model weights.
    """
    model.load_state_dict(torch.load(model_weights_path))
    model.eval()
    return model

In [35]:
model = create_model(verbose=True)
model = load_model(PATH, model)
model.to(config.DEVICE)

{'name': 'tf_efficientnetv2_s', 'backbone_name': 'tf_efficientnetv2_s', 'backbone_args': {'drop_path_rate': 0.2}, 'backbone_indices': None, 'image_size': [480, 480], 'num_classes': 9, 'min_level': 3, 'max_level': 7, 'num_levels': 5, 'num_scales': 3, 'aspect_ratios': [[1.0, 1.0], [1.4, 0.7], [0.7, 1.4]], 'anchor_scale': 4.0, 'pad_type': 'same', 'act_type': 'swish', 'norm_layer': None, 'norm_kwargs': {'eps': 0.001, 'momentum': 0.01}, 'box_class_repeats': 3, 'fpn_cell_repeats': 3, 'fpn_channels': 88, 'separable_conv': True, 'apply_resample_bn': True, 'conv_bn_relu_pattern': False, 'downsample_type': 'max', 'upsample_type': 'nearest', 'redundant_bias': True, 'head_bn_level_first': False, 'head_act_type': None, 'fpn_name': None, 'fpn_config': None, 'fpn_drop_path_rate': 0.0, 'alpha': 0.25, 'gamma': 1.5, 'label_smoothing': 0.0, 'legacy_focal': False, 'jit_loss': False, 'delta': 0.1, 'box_loss_weight': 50.0, 'soft_nms': False, 'max_detection_points': 5000, 'max_det_per_image': 100, 'url': ''}

DetBenchPredict(
  (model): EfficientDet(
    (backbone): EfficientNetFeatures(
      (conv_stem): Conv2dSame(3, 24, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn1): BatchNormAct2d(
        24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
        (drop): Identity()
        (act): SiLU(inplace=True)
      )
      (blocks): Sequential(
        (0): Sequential(
          (0): ConvBnAct(
            (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn1): BatchNormAct2d(
              24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
              (drop): Identity()
              (act): SiLU(inplace=True)
            )
            (drop_path): Identity()
          )
          (1): ConvBnAct(
            (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn1): BatchNormAct2d(
              24, eps=0.001, momentum=0.1, affine=True, track_running_stats=

In [25]:
from engine import train_one_epoch, evaluate

In [26]:
model.eval()
evaluate(model, data_loader_val, device='cuda')

KeyboardInterrupt: ignored

In [44]:
from coco_eval import CocoEvaluator
from coco_utils import get_coco_api_from_dataset

In [45]:
coco = get_coco_api_from_dataset(data_loader_val.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)

creating index...
index created!


NameError: ignored

In [37]:
model.eval()
device = 'cuda'
for images, targets in data_loader_val:
  images = torch.stack(images).to(device).float()
  outputs = model(images)
  res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
  
  
  

AttributeError: ignored

In [31]:
outputs = [{k: v.to('cpu')} for k, v in outputs.items()]

In [40]:
outputs[0]

tensor([[4.6434e+01, 6.9713e+01, 4.5471e+02, 4.1566e+02, 9.8090e-01, 4.0000e+00],
        [2.2249e+02, 4.1426e+02, 2.2742e+02, 4.2278e+02, 8.0318e-01, 5.0000e+00],
        [1.6817e+02, 2.6959e+02, 1.7817e+02, 2.8652e+02, 7.9441e-01, 7.0000e+00],
        [3.2675e+02, 4.1430e+02, 3.3169e+02, 4.2290e+02, 7.7863e-01, 5.0000e+00],
        [2.7048e+01, 3.5026e+02, 4.2517e+01, 3.6903e+02, 7.6841e-01, 3.0000e+00],
        [4.0998e+02, 2.7125e+02, 4.2006e+02, 2.8823e+02, 7.6607e-01, 7.0000e+00],
        [2.8943e+02, 3.6436e+02, 2.9953e+02, 3.8142e+02, 7.5227e-01, 7.0000e+00],
        [1.8789e+02, 4.1372e+02, 1.9288e+02, 4.2235e+02, 7.4818e-01, 5.0000e+00],
        [1.1520e+02, 3.0080e+02, 1.2546e+02, 3.1848e+02, 7.4799e-01, 7.0000e+00],
        [4.3024e+02, 4.1383e+02, 4.3526e+02, 4.2245e+02, 7.4375e-01, 5.0000e+00],
        [9.7300e+01, 3.9663e+02, 1.0744e+02, 4.1366e+02, 7.3849e-01, 7.0000e+00],
        [3.5702e+02, 4.3262e+02, 3.7102e+02, 4.5116e+02, 7.3097e-01, 3.0000e+00],
        [3.3178e