In [1]:
import pandas as pd
import numpy as np
import cv2
import os
import re

from PIL import Image

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt

DIR_INPUT = '/home/hy/dataset/gwd'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_TEST = f'{DIR_INPUT}/test'

In [2]:
train_df = pd.read_csv(f'{DIR_INPUT}/train.csv')
train_df.shape

(147793, 5)

In [3]:
train_df

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1
...,...,...,...,...,...
147788,5e0747034,1024,1024,"[64.0, 619.0, 84.0, 95.0]",arvalis_2
147789,5e0747034,1024,1024,"[292.0, 549.0, 107.0, 82.0]",arvalis_2
147790,5e0747034,1024,1024,"[134.0, 228.0, 141.0, 71.0]",arvalis_2
147791,5e0747034,1024,1024,"[430.0, 13.0, 184.0, 79.0]",arvalis_2


In [4]:
train_df['x'] = -1
train_df['y'] = -1
train_df['w'] = -1
train_df['h'] = -1

In [5]:
train_df

Unnamed: 0,image_id,width,height,bbox,source,x,y,w,h
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1,-1,-1,-1,-1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1,-1,-1,-1,-1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1,-1,-1,-1,-1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1,-1,-1,-1,-1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...
147788,5e0747034,1024,1024,"[64.0, 619.0, 84.0, 95.0]",arvalis_2,-1,-1,-1,-1
147789,5e0747034,1024,1024,"[292.0, 549.0, 107.0, 82.0]",arvalis_2,-1,-1,-1,-1
147790,5e0747034,1024,1024,"[134.0, 228.0, 141.0, 71.0]",arvalis_2,-1,-1,-1,-1
147791,5e0747034,1024,1024,"[430.0, 13.0, 184.0, 79.0]",arvalis_2,-1,-1,-1,-1


In [6]:
def expand_bbox(x):
    r = np.array(re.findall("([0-9]+[.]?[0-9]*)", x))
    if len(r)==0:
        r=[-1,-1,-1,-1]
    return r
train_df[['x','y','w','h']] = np.stack(train_df['bbox'].apply(lambda x: expand_bbox(x)))
train_df.drop(columns=['bbox'], inplace=True)
train_df['x'] = train_df['x'].astype(np.float)
train_df['y'] = train_df['y'].astype(np.float)
train_df['w'] = train_df['w'].astype(np.float)
train_df['h'] = train_df['h'].astype(np.float)

In [7]:
train_df

Unnamed: 0,image_id,width,height,source,x,y,w,h
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0
3,b6ab77fd7,1024,1024,usask_1,834.0,95.0,109.0,107.0
4,b6ab77fd7,1024,1024,usask_1,26.0,144.0,124.0,117.0
...,...,...,...,...,...,...,...,...
147788,5e0747034,1024,1024,arvalis_2,64.0,619.0,84.0,95.0
147789,5e0747034,1024,1024,arvalis_2,292.0,549.0,107.0,82.0
147790,5e0747034,1024,1024,arvalis_2,134.0,228.0,141.0,71.0
147791,5e0747034,1024,1024,arvalis_2,430.0,13.0,184.0,79.0


In [8]:
image_ids = train_df['image_id'].unique()
valid_ids = image_ids[-665:]
train_ids = image_ids[:-665]

In [9]:
valid_df = train_df[train_df['image_id'].isin(valid_ids)]
train_df = train_df[train_df['image_id'].isin(train_ids)]

In [10]:
valid_df.shape, train_df.shape

((25006, 8), (122787, 8))

In [11]:
train_df

Unnamed: 0,image_id,width,height,source,x,y,w,h
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0
3,b6ab77fd7,1024,1024,usask_1,834.0,95.0,109.0,107.0
4,b6ab77fd7,1024,1024,usask_1,26.0,144.0,124.0,117.0
...,...,...,...,...,...,...,...,...
122782,97372d371,1024,1024,arvalis_3,59.0,442.0,61.0,58.0
122783,97372d371,1024,1024,arvalis_3,30.0,517.0,69.0,105.0
122784,97372d371,1024,1024,arvalis_3,0.0,889.0,56.0,121.0
122785,97372d371,1024,1024,arvalis_3,104.0,872.0,84.0,109.0


In [12]:
class WheatDataset(Dataset):
    
    def __init__(self, dataframe, image_dir, transform=None):
        super().__init__()
        
        self.image_ids= dataframe['image_id'].unique()
        self.ds = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
    
    def __getitem__(self, index: int):
        records = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]
        
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        boxes = records[['x','y','w','h']].values
        

In [13]:
train_df['image_id'].unique()[0]

'b6ab77fd7'

In [14]:
train_df[train_df['image_id'] == 'b6ab77fd7'] #records

Unnamed: 0,image_id,width,height,source,x,y,w,h
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0
3,b6ab77fd7,1024,1024,usask_1,834.0,95.0,109.0,107.0
4,b6ab77fd7,1024,1024,usask_1,26.0,144.0,124.0,117.0
5,b6ab77fd7,1024,1024,usask_1,569.0,382.0,119.0,111.0
6,b6ab77fd7,1024,1024,usask_1,52.0,602.0,82.0,45.0
7,b6ab77fd7,1024,1024,usask_1,627.0,302.0,122.0,75.0
8,b6ab77fd7,1024,1024,usask_1,412.0,367.0,68.0,82.0
9,b6ab77fd7,1024,1024,usask_1,953.0,220.0,56.0,103.0


In [15]:
records = train_df[train_df['image_id'] == 'b6ab77fd7'] 

In [16]:
records[['x','y','w','h']].values

array([[834., 222.,  56.,  36.],
       [226., 548., 130.,  58.],
       [377., 504.,  74., 160.],
       [834.,  95., 109., 107.],
       [ 26., 144., 124., 117.],
       [569., 382., 119., 111.],
       [ 52., 602.,  82.,  45.],
       [627., 302., 122.,  75.],
       [412., 367.,  68.,  82.],
       [953., 220.,  56., 103.],
       [ 30.,  70., 126., 133.],
       [ 35., 541.,  46.,  46.],
       [103.,  60., 117.,  83.],
       [417.,   4., 110.,  91.],
       [764., 299., 119.,  93.],
       [539.,  58.,  58., 130.],
       [139., 274., 121.,  76.],
       [461., 634., 118.,  64.],
       [215., 634., 113.,  75.],
       [134., 903., 127.,  49.],
       [737., 545.,  87.,  48.],
       [292., 930.,  43.,  46.],
       [  0., 827.,  86.,  58.],
       [324.,  44.,  57.,  70.],
       [663., 794., 116.,  64.],
       [325., 730.,  76.,  72.],
       [155., 554.,  74.,  70.],
       [783., 833.,  70.,  91.],
       [534.,  46.,  73., 224.],
       [155., 281., 106., 138.],
       [10

In [17]:
boxes =records[['x','y','w','h']].values

In [18]:
boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

In [19]:
boxes

array([[ 834.,  222.,  890.,  258.],
       [ 226.,  548.,  356.,  606.],
       [ 377.,  504.,  451.,  664.],
       [ 834.,   95.,  943.,  202.],
       [  26.,  144.,  150.,  261.],
       [ 569.,  382.,  688.,  493.],
       [  52.,  602.,  134.,  647.],
       [ 627.,  302.,  749.,  377.],
       [ 412.,  367.,  480.,  449.],
       [ 953.,  220., 1009.,  323.],
       [  30.,   70.,  156.,  203.],
       [  35.,  541.,   81.,  587.],
       [ 103.,   60.,  220.,  143.],
       [ 417.,    4.,  527.,   95.],
       [ 764.,  299.,  883.,  392.],
       [ 539.,   58.,  597.,  188.],
       [ 139.,  274.,  260.,  350.],
       [ 461.,  634.,  579.,  698.],
       [ 215.,  634.,  328.,  709.],
       [ 134.,  903.,  261.,  952.],
       [ 737.,  545.,  824.,  593.],
       [ 292.,  930.,  335.,  976.],
       [   0.,  827.,   86.,  885.],
       [ 324.,   44.,  381.,  114.],
       [ 663.,  794.,  779.,  858.],
       [ 325.,  730.,  401.,  802.],
       [ 155.,  554.,  229.,  624.],
 

In [20]:
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
area = torch.as_tensor(area, dtype=torch.float32)

In [21]:
area

tensor([ 2016.,  7540., 11840., 11663., 14508., 13209.,  3690.,  9150.,  5576.,
         5768., 16758.,  2116.,  9711., 10010., 11067.,  7540.,  9196.,  7552.,
         8475.,  6223.,  4176.,  1978.,  4988.,  3990.,  7424.,  5472.,  5180.,
         6370., 16352., 14628.,  6150.,  6640.,  3772.,  3266.,  3060., 10360.,
         8736., 18170.,  8760.,  3526., 29240.,  4978.,   713., 17901.,  8712.,
         2565.,   920.])

In [22]:
records.shape[0]

47

In [23]:
# there is only one class
labels = torch.ones((records.shape[0],), dtype=torch.int64)
        
# suppose all instances are not crowd
iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)


target = {}
target['boxes'] = boxes
target['labels'] = labels
# target['masks'] = None
target['image_id'] = torch.tensor([0])
target['area'] = area
target['iscrowd'] = iscrowd

In [24]:
target

{'boxes': array([[ 834.,  222.,  890.,  258.],
        [ 226.,  548.,  356.,  606.],
        [ 377.,  504.,  451.,  664.],
        [ 834.,   95.,  943.,  202.],
        [  26.,  144.,  150.,  261.],
        [ 569.,  382.,  688.,  493.],
        [  52.,  602.,  134.,  647.],
        [ 627.,  302.,  749.,  377.],
        [ 412.,  367.,  480.,  449.],
        [ 953.,  220., 1009.,  323.],
        [  30.,   70.,  156.,  203.],
        [  35.,  541.,   81.,  587.],
        [ 103.,   60.,  220.,  143.],
        [ 417.,    4.,  527.,   95.],
        [ 764.,  299.,  883.,  392.],
        [ 539.,   58.,  597.,  188.],
        [ 139.,  274.,  260.,  350.],
        [ 461.,  634.,  579.,  698.],
        [ 215.,  634.,  328.,  709.],
        [ 134.,  903.,  261.,  952.],
        [ 737.,  545.,  824.,  593.],
        [ 292.,  930.,  335.,  976.],
        [   0.,  827.,   86.,  885.],
        [ 324.,   44.,  381.,  114.],
        [ 663.,  794.,  779.,  858.],
        [ 325.,  730.,  401.,  802.],
   