In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd '/content/gdrive/MyDrive/PASCAL/'

/content/gdrive/MyDrive/PASCAL


In [None]:
!wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
!wget http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar

In [None]:
!tar -xvf VOCtrainval_06-Nov-2007.tar
!tar -xvf VOCtest_06-Nov-2007.tar

PASCAL VOC 2007 directory
```
/VOCdevkit//VOC2007/
├── Annotations        
│   └── *.xml
├── ImageSets
│   ├── Layout
│   │   └── *.txt
│   ├── Main
│   │   └── *.txt
│   └── Segmentation
│        └── *.txt
├── JPEGImages
│   └── .jpg
├── SegmentationClass
│   └── *.png
└── SegmentationObject
     └── *.png

```


ImageSets
- class 별 train/val/test txt 파일
- object와 class 일치 여부(일치 : 1, 불일치 : -1) 

Annotations
1. Class : 오브젝트의 클래스
2. Bounding Box: 오브젝트 위치
3. View: 각 오브젝트의 방향
4. Truncated: 오브젝트의 잘림 여부
5. Difficult: 인식 난이도

참조 : [PASCAL VOC 2007 데이터셋 다운로드 및 확인 방법](https://ndb796.tistory.com/500)

In [None]:
!cat './VOCdevkit/VOC2007/Annotations/000001.xml'

<annotation>
	<folder>VOC2007</folder>
	<filename>000001.jpg</filename>
	<source>
		<database>The VOC2007 Database</database>
		<annotation>PASCAL VOC2007</annotation>
		<image>flickr</image>
		<flickrid>341012865</flickrid>
	</source>
	<owner>
		<flickrid>Fried Camels</flickrid>
		<name>Jinky the Fruit Bat</name>
	</owner>
	<size>
		<width>353</width>
		<height>500</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>dog</name>
		<pose>Left</pose>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>48</xmin>
			<ymin>240</ymin>
			<xmax>195</xmax>
			<ymax>371</ymax>
		</bndbox>
	</object>
	<object>
		<name>person</name>
		<pose>Left</pose>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>8</xmin>
			<ymin>12</ymin>
			<xmax>352</xmax>
			<ymax>498</ymax>
		</bndbox>
	</object>
</annotation>


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

In [136]:
def extract_file(imageset_path, file_type, class_name=""):
    if class_name != "":
        class_name += '_'
    file_path = imageset_path + class_name + file_type + '.txt'

    cls = []

    with open(file_path, 'r') as f:
        reader = f.readlines()
        for line in reader:
            check = line.strip().split(' ')
            if class_name != "":
               if check[-1] == str(1):
                   cls.append(check[0] + '.jpg')
            else:           
                cls.append(check[0] + '.jpg')
    return cls

In [137]:
classes = set()
dir_name = './VOCdevkit/VOC2007/'

imageset = 'ImageSets/Main/'
imageset_path = os.path.join(dir_name, imageset)

annotation = 'Annotations/'
annotation_path = os.path.join(dir_name, annotation)

jpeg = 'JPEGImages/'
jpeg_path = os.path.join(dir_name, jpeg)

class_dir = os.listdir(imageset_path)
annot_dir = os.listdir(annotation_path)
jpeg_dir = os.listdir(jpeg_path)


for cls in class_dir:
    cls = cls.split('/')[-1].split('_')[0]
    if '.txt' not in cls:
        classes.add(cls)

print(len(classes), classes)

20 {'train', 'bus', 'bicycle', 'cat', 'cow', 'bird', 'horse', 'dog', 'car', 'sheep', 'boat', 'pottedplant', 'motorbike', 'chair', 'sofa', 'tvmonitor', 'person', 'aeroplane', 'bottle', 'diningtable'}


In [145]:
train_data = extract_file(imageset_path, 'train')
test_data = extract_file(imageset_path, 'test')
val_data = extract_file(imageset_path, 'val')

train_data = {f:{'class_name':[]} for f in train_data}
test_data = {f:{'class_name':[]} for f in test_data}
val_data = {f:{'class_name':[]} for f in val_data}

In [146]:
for cls in classes:
    for file_type in ['train', 'test', 'val']:
        for f in extract_file(imageset_path, file_type, cls):
            if file_type == 'train':
                train_data[f]['class_name'].append(cls)
            elif file_type == 'test':
                test_data[f]['class_name'].append(cls)
            else:
                val_data[f]['class_name'].append(cls)

In [169]:
from xml.etree.ElementTree import parse

def xml_parser(annotation_path, file_name):
    file_name = file_name
    xml_name = file_name.replace('jpg', 'xml')
    dir_path = os.path.join(annotation_path, xml_name)

    tree = parse(dir_path)
    root = tree.getroot()

    cls = dict()
    for obj in root.findall('object'):
        obj_name = obj.find('name').text
        bb = obj.find('bndbox')

        x1 = int(bb.find('xmin').text)
        y1 = int(bb.find('ymin').text)
        x2 = int(bb.find('xmax').text)
        y2 = int(bb.find('ymax').text)
        cls[obj_name] = (x1, y1, x2, y2)

    return cls    

In [170]:
for data in train_data.keys():
    train_data[data] = xml_parser(annotation_path, data)

for data in test_data.keys():
    test_data[data] = xml_parser(annotation_path, data)

for data in val_data.keys():
    val_data[data] = xml_parser(annotation_path, data)

In [172]:
train_data

{'000012.jpg': {'car': (156, 97, 351, 270)},
 '000017.jpg': {'horse': (90, 78, 403, 336), 'person': (185, 62, 279, 199)},
 '000023.jpg': {'bicycle': (2, 178, 90, 500), 'person': (225, 1, 334, 486)},
 '000026.jpg': {'car': (90, 125, 337, 212)},
 '000032.jpg': {'aeroplane': (133, 88, 197, 123),
  'person': (26, 189, 44, 238)},
 '000033.jpg': {'aeroplane': (325, 188, 411, 223)},
 '000034.jpg': {'train': (141, 153, 333, 229)},
 '000035.jpg': {'diningtable': (3, 304, 500, 375),
  'person': (468, 195, 500, 317)},
 '000036.jpg': {'dog': (27, 79, 319, 344)},
 '000042.jpg': {'train': (1, 36, 235, 299)},
 '000044.jpg': {'cat': (99, 101, 312, 213), 'chair': (1, 1, 370, 330)},
 '000047.jpg': {'car': (15, 1, 459, 312), 'chair': (439, 170, 487, 230)},
 '000048.jpg': {'bird': (58, 107, 291, 465), 'person': (2, 1, 302, 500)},
 '000061.jpg': {'boat': (184, 214, 281, 252)},
 '000064.jpg': {'bird': (1, 23, 451, 500)},
 '000066.jpg': {'person': (269, 188, 295, 259)},
 '000073.jpg': {'boat': (270, 155, 375

In [171]:
import pickle

# save data
with open('train_data.pickle','wb') as fw:
    pickle.dump(train_data, fw)

with open('test_data.pickle','wb') as fw:
    pickle.dump(test_data, fw)
    
with open('val_data.pickle','wb') as fw:
    pickle.dump(val_data, fw)

# load data
with open('train_data.pickle', 'rb') as fr:
    train_data = pickle.load(fr)

with open('test_data.pickle', 'rb') as fr:
    test_data = pickle.load(fr)

with open('val_data.pickle', 'rb') as fr:
    val_data = pickle.load(fr)

In [None]:
max_length = 0
length = []
file_type = ['train'] * len(train_data.keys()) + ['test'] * len(test_data.keys()) + ['val'] * len(val_data.keys)
objs = []
bb = []

for data in train_data.keys():
    if max_length <= len(train_data[data]['class_name']):
        max_length = len(train_data[data]['class_name'])

    length.append(len(train_data[data]['class_name']))
    objs.append(train_data[data]['class_name'])
    bb.append(train_data[data][key] for key in train_data[data]['class_name'].keys())

for data in test_data.keys():
    if max_length <= len(test_data[data]['class_name']):
        max_length = len(test_data[data]['class_name'])

    length.append(len(test_data[data]['class_name'])
    objs.append(test_data[data]['class_name'])
    bb.append(test_data[data][key] for key in test_data[data]['class_name'].keys())

for data in val_data.keys():
    if max_length <= len(val_data[data]['class_name']):
        max_length = len(val_data[data]['class_name'])

    length.append(len(val_data[data]['class_name'])
    objs.append(val_data[data]['class_name'])
    bb.append(val_data[data][key] for key in val_data[data]['class_name'].keys())

print(max_length)

DataFrame 만들기

파일명(path.jpg)|file_type(train/test/val)|object_length(n개)|object_1|$\cdots$|object_n|object_bb_1|$\cdots$|object_bb_n
:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:
~.jpg|train|5|'dog'|$\cdots$|None|(2, 4, 100, 120)|$\cdots$|None
$\vdots$||||||||$\vdots$
~.jpg|val|1|'bird'|$\cdots$|None|(20, 38, 29, 48)|$\cdots$|None