In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd '/content/gdrive/MyDrive/PASCAL/VOCdevkit/VOC2007/'
%ls

/content/gdrive/MyDrive/PASCAL/VOCdevkit/VOC2007
[0m[01;34mAnnotations[0m/  [01;34mImageSets[0m/      seg_df.csv
df.csv        [01;34mJPEGImages[0m/     [01;34mSegmentationClass[0m/
[01;34mFinetune[0m/     samples.pickle  [01;34mSegmentationObject[0m/


In [52]:
%ls '/content/gdrive/MyDrive/PASCAL/VOCdevkit/VOC2007/Finetune/Annotations/'

000134_n.csv  002045_n.csv  004705_n.csv  005979_n.csv  008461_n.csv
000134_p.csv  002045_p.csv  004705_p.csv  005979_p.csv  008461_p.csv
000210_n.csv  002056_n.csv  004873_n.csv  006062_n.csv  008483_n.csv
000210_p.csv  002056_p.csv  004873_p.csv  006062_p.csv  008483_p.csv
000233_n.csv  002533_n.csv  004890_n.csv  006124_n.csv  008665_n.csv
000233_p.csv  002533_p.csv  004890_p.csv  006124_p.csv  008665_p.csv
000263_n.csv  002734_n.csv  004946_n.csv  006196_n.csv  008750_n.csv
000263_p.csv  002734_p.csv  004946_p.csv  006196_p.csv  008750_p.csv
000477_n.csv  002804_n.csv  005067_n.csv  006224_n.csv  008768_n.csv
000477_p.csv  002804_p.csv  005067_p.csv  006224_p.csv  008768_p.csv
000860_n.csv  003195_n.csv  005199_n.csv  006235_n.csv  008923_n.csv
000860_p.csv  003195_p.csv  005199_p.csv  006235_p.csv  008923_p.csv
000906_n.csv  003355_n.csv  005483_n.csv  006375_n.csv  008939_n.csv
000906_p.csv  003355_p.csv  005483_p.csv  006375_p.csv  008939_p.csv
001069_n.csv  003363_n.csv  005566

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import random

import cv2

import pickle

전처리한 df [Object Detection.ipynb](https://github.com/hodurie/AI_Study/blob/master/Implementation/Datasets/Object%20Detection.ipynb)

In [None]:
df = pd.read_csv('df.csv')
df.head()

Unnamed: 0,file_name,file_type,object_length,object,xmin,ymin,xmax,ymax,pose,truncated,difficult
0,000012.jpg,train,1,car,156,97,351,270,Rear,0,0
1,000017.jpg,train,2,person,185,62,279,199,Left,0,0
2,000017.jpg,train,2,horse,90,78,403,336,Left,0,0
3,000023.jpg,train,6,bicycle,9,230,245,500,Unspecified,1,0
4,000023.jpg,train,6,bicycle,230,220,334,500,Frontal,1,0


PASCAL VOC 2007 directory
```
/VOCdevkit//VOC2007/
├── df.csv                # Object Detection 전처리 csv
├── seg.csv               # Segmentation 전처리 csv
└── Finetune              # positive_list, negative_list txt 폴더
     ├── train
     │   ├── *_n.csv / *_p.csv
     │   ├── Annotations
     │   └── bndboxes
     └── validation
          └── Annotations


# 기존 PASCAL VOC 2007 directory 구조
/VOCdevkit//VOC2007/
├── Annotations           
│   └── *.xml        
├── ImageSets
│   ├── Layout
│   │   └── *.txt
│   ├── Main
│   │   └── *.txt
│   └── Segmentation
│        └── *.txt
├── JPEGImages
│   └── .jpg
├── SegmentationClass
│   └── *.png
└── SegmentationObject
     └── *.png

```



In [53]:
finetune_root_dir = './Finetune/'

if not os.path.exists(finetune_root_dir):
    os.mkdir(finetune_root_dir)

for name in ['train', 'validation']:
    dst_root_dir = os.path.join(finetune_root_dir, name)

    if not os.path.exists(dst_root_dir):
        os.mkdir(dst_root_dir)

    dst_annotation_dir = os.path.join(finetune_root_dir, name, 'Annotations')
    if not os.path.exists(dst_annotation_dir):
        os.mkdir(dst_annotation_dir)

특정 obj 만 추출해서 sample 만들기

In [None]:
def sample_split(df, obj='car'):
    # car dataset 사용
    cond = df['object'] == obj
    df = df[cond]

    samples = {}

    for name in ['train', 'validation']:
        cond = df['file_type'] == name
        sample = df.loc[cond, 'file_name'].unique()

        length = len(sample)
        
        indices = random.sample(range(length), int(length/ 10))
        
        samples[name] = sample[indices]

    return samples

In [None]:
def IoU(pred_box, target_box):
    '''
    pred_box = [4] 
    target_box = [N, 4]
    '''
    # (xmax - xmin) * (ymax - ymin)

    if len(target_box.shape) == 1:
        target_box = target_box[np.newaxis, :]

    areaA = (target_box[:, 2] - target_box[:, 0]) * (target_box[:, 3] - target_box[:, 1])
    areaB = (pred_box[2] - pred_box[0]) * (pred_box[3] - pred_box[1])

    xA = np.maximum(pred_box[0], target_box[:, 0])
    yA = np.maximum(pred_box[1], target_box[:, 1])
    xB = np.minimum(pred_box[2], target_box[:, 2])
    yB = np.minimum(pred_box[3], target_box[:, 3])
    
    intersection = np.maximum(0.0, xB - xA) * np.maximum(0.0, yB - yA)
    
    scores = intersection / (areaA + areaB - intersection)


    return scores

In [None]:
def region_proposals(jpg, gs):
    global df
    path = os.path.join('./JPEGImages/', jpg)
    img = cv2.imread(path)

    gs.setBaseImage(img)
    gs.switchToSelectiveSearchQuality()

    rects = gs.process()
    rects[:, 2] += rects[:, 0]
    rects[:, 3] += rects[:, 1]

    cond = df['file_name'] == jpg
    cols = ['xmin', 'ymin', 'xmax', 'ymax']
    bndboxes = np.array(df.loc[cond, cols])

    maximum_bndbox_size = 0

    for bndbox in bndboxes:
        xmin, ymin, xmax, ymax = bndbox
        bndbox_size = (xmax - xmin) * (ymax - ymin)
        if bndbox_size > maximum_bndbox_size:
            maximum_bndbox_size = bndbox_size


    iou_list = []
    for rect in rects:
        scores = IoU(rect, bndboxes)
        iou_list.append(max(scores))
    
    return iou_list, rects, maximum_bndbox_size, bndboxes

In [None]:
def parse_annotation_jpg(jpg, gs):
    iou_list, rects, maximum_bndbox_size, bndboxes = region_proposals(jpg, gs)

    positive_list = []
    negative_list = []

    for i in range(len(iou_list)):
        xmin, ymin, xmax, ymax = rects[i]
        rect_size = (xmax - xmin) * (ymax - ymin)

        iou_score = iou_list[i]

        if iou_score >= 0.5:
            positive_list.append(rects[i])
        
        if 0 < iou_score < 0.5 and rect_size > maximum_bndbox_size / 5.0:
            negative_list.append(rects[i])
    
    return positive_list, negative_list

In [None]:
gs = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

samples = sample_split(df)

for name in ['train', 'validation']:
    total_num_positive = 0
    total_num_negative = 0


    for sample in samples[name]:
        positive_list, negative_list = parse_annotation_jpg(sample, gs)
        total_num_positive += len(positive_list)
        total_num_negative += len(negative_list)

        finetune_path = os.path.join(finetune_root_dir, name)

        positive_list_path = os.path.join(finetune_path, sample.replace('.jpg', '_p.csv'))
        negative_list_path = os.path.join(finetune_path, sample.replace('.jpg', '_n.csv'))
        
        np.savetxt(positive_list_path, np.array(positive_list), fmt='%d', delimiter=' ')
        np.savetxt(negative_list_path, np.array(negative_list), fmt='%d', delimiter=' ')

    print('%s positive num: %d' % (name, total_num_positive))
    print('%s negative num: %d' % (name, total_num_negative))

In [None]:
with open('samples.pickle','wb') as fw:
    pickle.dump(samples, fw)

In [None]:
with open('samples.pickle', 'rb') as fr:
    samples = pickle.load(fr)

In [None]:
samples

{'train': array(['000906.jpg', '008968.jpg', '005585.jpg', '005609.jpg',
        '008108.jpg', '004946.jpg', '008665.jpg', '005483.jpg',
        '002533.jpg', '008750.jpg', '008923.jpg', '008483.jpg',
        '003355.jpg', '002056.jpg', '004873.jpg', '008960.jpg',
        '009073.jpg', '005566.jpg', '008768.jpg', '002804.jpg',
        '003987.jpg', '008360.jpg', '006224.jpg', '004576.jpg',
        '003420.jpg', '006375.jpg', '005756.jpg', '003363.jpg',
        '000134.jpg', '004591.jpg', '000860.jpg', '009205.jpg',
        '006196.jpg', '000477.jpg', '007003.jpg', '004705.jpg',
        '008939.jpg', '001119.jpg', '000263.jpg', '007963.jpg'],
       dtype=object),
 'validation': array(['006235.jpg', '002045.jpg', '003390.jpg', '002734.jpg',
        '005979.jpg', '001944.jpg', '003461.jpg', '008461.jpg',
        '007855.jpg', '009900.jpg', '000233.jpg', '005593.jpg',
        '009558.jpg', '001862.jpg', '009932.jpg', '006062.jpg',
        '001693.jpg', '008279.jpg', '004295.jpg', '005749.

In [None]:
def parse_annotation_jpg_svm(jpg, gs):
    iou_list, rects, maximum_bndbox_size, bndboxs = region_proposals(jpg, gs)

    positive_list = []
    negative_list = []

    for i in range(len(iou_list)):
        xmin, ymin, xmax, ymax = rects[i]
        rect_size = (xmax - xmin) * (ymax - ymin)

        iou_score = iou_list[i]

        if 0 < iou_score <= 0.3 and rect_size > maximum_bndbox_size / 5.0:
            negative_list.append(rects[i])

        return bndboxs, negative_list

In [54]:
gs = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

for name in ['train', 'validation']:
    total_num_positive = 0
    total_num_negative = 0

    for sample in samples[name]:
        positive_list, negative_list = parse_annotation_jpg_svm(sample, gs)
        total_num_positive += len(positive_list)
        total_num_negative += len(negative_list)

        path = os.path.join(finetune_root_dir, name, 'Annotations')
        dst_annotation_positive_path = os.path.join(path, sample.replace('.jpg', '_p.csv'))
        dst_annotation_negative_path = os.path.join(path, sample.replace('.jpg', '_n.csv'))

        np.savetxt(dst_annotation_positive_path, np.array(positive_list), fmt='%d', delimiter=' ')
        np.savetxt(dst_annotation_negative_path, np.array(negative_list), fmt='%d', delimiter=' ')
    
    print('%s positive num: %d' % (name, total_num_positive))
    print('%s negative num: %d' % (name, total_num_negative))

train positive num: 167
train negative num: 4
validation positive num: 171
validation negative num: 4


In [70]:
res_samples = []
total_positive_num = 0

name = 'train'

jpg_csv_path = os.path.join(finetune_root_dir, name, 'bndboxes')
if not os.path.exists(jpg_csv_path):
    os.mkdir(jpg_csv_path)

for sample in samples[name]:
    path = os.path.join(finetune_root_dir, name, sample.replace('.jpg', '_p.csv'))
    positive_bndboxes = np.loadtxt(path, dtype=np.int, delimiter=' ')

    cols = ['xmin', 'ymin', 'xmax', 'ymax']
    train_df = df[df['file_name'] == sample]
    train_car = train_df.loc[df['object'] == 'car']

    bndboxes = np.array(train_car[cols])
    
    positive_list = []

    if len(positive_bndboxes.shape) == 1 and len(positive_bndboxes) != 0:
        scores = iou(positive_bndboxes, bndboxes)
        if np.max(scores) > 0.6:
            positive_list.append(positive_bndboxes)
    elif len(positive_bndboxes.shape) == 2:
        for positive_bndbox in positive_bndboxes:
            scores = IoU(positive_bndbox, bndboxes)
            if np.max(scores) > 0.6:
                positive_list.append(positive_bndbox)

    if len(positive_list) > 0:
        jpg_csv_paths = os.path.join(jpg_csv_path, sample.replace('jpg', 'csv'))
        np.savetxt(jpg_csv_paths, np.array(positive_list), fmt='%s', delimiter=' ')
        total_positive_num += len(positive_list)
        res_samples.append(sample)
        print('save {} done'.format(sample))
    else:
        print('-------- {} ineligible'.format(sample))

dst_csv_path = os.path.join(finetune_root_dir, name, 'bndboxes', 'df.csv')
np.savetxt(dst_csv_path, res_samples, fmt='%s', delimiter=' ')
print('total positive num: {}'.format(total_positive_num))

save 000906.jpg done
save 008968.jpg done
save 005585.jpg done
save 005609.jpg done
save 008108.jpg done
save 004946.jpg done
save 008665.jpg done
save 005483.jpg done
save 002533.jpg done
save 008750.jpg done
save 008923.jpg done
save 008483.jpg done
save 003355.jpg done
-------- 002056.jpg ineligible
save 004873.jpg done
-------- 008960.jpg ineligible
save 009073.jpg done
save 005566.jpg done
save 008768.jpg done
save 002804.jpg done
save 003987.jpg done
save 008360.jpg done
save 006224.jpg done
save 004576.jpg done
save 003420.jpg done
save 006375.jpg done
save 005756.jpg done
save 003363.jpg done
save 000134.jpg done
save 004591.jpg done
save 000860.jpg done
save 009205.jpg done
save 006196.jpg done
-------- 000477.jpg ineligible
save 007003.jpg done
save 004705.jpg done
save 008939.jpg done
save 001119.jpg done
save 000263.jpg done
save 007963.jpg done
total positive num: 4474


In [72]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.utils.data as data
from torch.utils.data import DataLoader

In [111]:
class CustomFinetuneDataset():
    def __init__(self, root_dir, transform=None):
        '''
        root_dir = 'Finetune/train'
        jpg_path = 'JPEGImages/'
        '''
        with open('samples.pickle', 'rb') as fr:
            samples = pickle.load(fr)

        name = root_dir.split('/')[-1]

        jpg_path = './JPEGImages/'

        images = [cv2.imread(os.path.join(jpg_path, sample)) for sample in samples[name]]

        annotations_path = os.path.join(root_dir, 'Annotations')
        positive_annotations = [cv2.imread(os.path.join(annotations_path, sample.replace('.jpg', '_p.csv'))) for sample in samples[name]]
        negative_annotations = [cv2.imread(os.path.join(annotations_path, sample.replace('.jpg', '_n.csv'))) for sample in samples[name]]

        positive_sizes = []
        negative_sizes = []
        
        positive_rects = []
        negative_rects = []

        for annotation_path in positive_annotations:
            rects = np.loadtxt(annotation_path, dtype=np.int, delimiter=' ')
            if len(rects.shape) == 1:
                if rects.shape[0] == 4:
                    positive_rects.append(rects)
                    positive_sizes.append(1)
                else:
                    positive_sizes.append(0)
            else:
                positive_rects.extend(rects)
                positive_sizes.append(len(rects))
        
        for annotation_path in negative_annotations:
            rects = np.loadtxt(annotation_path, dtype=np.int, delimiter=' ')
            if len(rects.shape) == 1:
                if rects.shape[0] == 4:
                    negative_rects.append(rects)
                    negative_sizes.append(1)
                else:
                    positive_sizes.append(0)
            else:
                negative_rects.extend(rects)
                negative_sizes.append(len(rects))

        self.transform = transform
        self.images = images
        self.positive_sizes = positive_sizes
        self.negative_sizes = negative_sizes
        self.positive_rects = positive_rects
        self.negative_rects = negative_rects
        self.total_positive_num = int(np.sum(positive_sizes))
        self.total_negative_num = int(np.sum(negative_sizes))
    
    def __getitem__(self, index):
        image_id = len(self.images) - 1
        if index < self.total_positive_num:
            target = 1
            xmin, ymin, xmax, ymax = self.positive_rects[index]

            for i in range(len(self.positive_sizes) - 1):
                if np.sum(self.positive_sizes[:i]) <= index < np.sum(self.positive_sizes[:(i + 1)]):
                    image_id = i
                    break
            image = self.images[image_id][ymin:ymax, xmin:xmax]
        else:
            target = 0
            idx = index - self.total_positive_num
            xmin, ymin, xmax, ymax = self.negative_rects[idx]

            for i in range(len(self.negative_sizes) - 1):
                if np.sum(self.negative_sizes[:i]) <= idx < np.sum(self.negative_sizes[:(i + 1)]):
                    image_id = i
                    break
            image = self.jpeg_images[image_id][ymin:ymax, xmin:xmax]
        
        if self.transform:
            image = self.transform(image)

        return image, target

    def __len__(self):
        return self.total_positive_num + self.total_negative_num

    def get_positive_num(self):
        return self.total_positive_num

    def get_negative_num(self):
        return self.total_negative_num

In [112]:
from google.colab.patches import cv2_imshow

def test_finetune(idx):
    root_dir = './Finetune/train'
    train_data_set = CustomFinetuneDataset(root_dir)

    print('positive num: %d' % train_data_set.get_positive_num())
    print('negative num: %d' % train_data_set.get_negative_num())
    print('total num: %d' % train_data_set.__len__())

    image, target = train_data_set.__getitem__(idx)
    print('target: %d' % target)

    cv2_imshow(image)
    cv2.waitKey(0)

## Reference
- [R-CNN](https://github.com/object-detection-algorithm/R-CNN)

```
@misc{girshick2013rich,
    title={Rich feature hierarchies for accurate object detection and semantic segmentation},
    author={Ross Girshick and Jeff Donahue and Trevor Darrell and Jitendra Malik},
    year={2013},
    eprint={1311.2524},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}

@misc{pascal-voc-2007,
	author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.",
	title = "The {PASCAL} {V}isual {O}bject {C}lasses {C}hallenge 2007 {(VOC2007)} {R}esults",
	howpublished = "http://www.pascal-network.org/challenges/VOC/voc2007/workshop/index.html"}

```