In [1]:
import os
import glob
import pandas as pd
from functools import reduce
from xml.etree import ElementTree as et
import warnings
warnings.filterwarnings('ignore')

In [38]:
home = 'D:\YOLO_Obj_Detection\yoloenv'
os.chdir(home)

In [2]:
# Load all xml files and store into a list
xmlfiles = glob.glob('./data_images/*.xml')
# Data cleaning by replacing \\ with /
xmlfiles = list(map(lambda x: x.replace('\\', '/'), xmlfiles))

In [4]:
len(xmlfiles)

40

In [5]:
# Read xml file and extract
# filename, size(width, height), object(name, xmin, xmax, ymin, ymax)

def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()

    # Extract file name
    image_name = root.find('filename').text
    # Width and height of image
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text
    objs = root.findall('object')
    parser = []
    for obj in objs:
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = bndbox.find('xmin').text
        xmax = bndbox.find('xmax').text
        ymin = bndbox.find('ymin').text
        ymax = bndbox.find('ymax').text
        parser.append([image_name, width, height, name, xmin, xmax, ymin, ymax])
    return parser

In [6]:
parser_all = list(map(extract_text, xmlfiles))

In [7]:
parser_all

[[['1.jpg', '1300', '969', 'person', '235', '425', '57', '869'],
  ['1.jpg', '1300', '969', 'person', '493', '685', '167', '881'],
  ['1.jpg', '1300', '969', 'person', '709', '899', '64', '843'],
  ['1.jpg', '1300', '969', 'person', '916', '1210', '150', '872']],
 [['10.jpg', '1280', '853', 'person', '346', '476', '115', '263'],
  ['10.jpg', '1280', '853', 'person', '851', '958', '122', '285']],
 [['11.jpg', '266', '189', 'person', '48', '88', '25', '70']],
 [['12.jpg', '608', '348', 'person', '28', '51', '186', '210'],
  ['12.jpg', '608', '348', 'person', '68', '94', '197', '222'],
  ['12.jpg', '608', '348', 'person', '180', '208', '225', '256'],
  ['12.jpg', '608', '348', 'person', '209', '231', '207', '226'],
  ['12.jpg', '608', '348', 'person', '466', '487', '178', '206'],
  ['12.jpg', '608', '348', 'person', '507', '530', '182', '204'],
  ['12.jpg', '608', '348', 'person', '558', '576', '179', '200']],
 [['13.jpg', '750', '550', 'person', '319', '373', '161', '218']],
 [['14.jpg',

In [8]:
data = reduce(lambda x, y:x+y, parser_all)
data

[['1.jpg', '1300', '969', 'person', '235', '425', '57', '869'],
 ['1.jpg', '1300', '969', 'person', '493', '685', '167', '881'],
 ['1.jpg', '1300', '969', 'person', '709', '899', '64', '843'],
 ['1.jpg', '1300', '969', 'person', '916', '1210', '150', '872'],
 ['10.jpg', '1280', '853', 'person', '346', '476', '115', '263'],
 ['10.jpg', '1280', '853', 'person', '851', '958', '122', '285'],
 ['11.jpg', '266', '189', 'person', '48', '88', '25', '70'],
 ['12.jpg', '608', '348', 'person', '28', '51', '186', '210'],
 ['12.jpg', '608', '348', 'person', '68', '94', '197', '222'],
 ['12.jpg', '608', '348', 'person', '180', '208', '225', '256'],
 ['12.jpg', '608', '348', 'person', '209', '231', '207', '226'],
 ['12.jpg', '608', '348', 'person', '466', '487', '178', '206'],
 ['12.jpg', '608', '348', 'person', '507', '530', '182', '204'],
 ['12.jpg', '608', '348', 'person', '558', '576', '179', '200'],
 ['13.jpg', '750', '550', 'person', '319', '373', '161', '218'],
 ['14.jpg', '900', '600', 'perso

In [9]:
df = pd.DataFrame(data, columns = ['filename','width','height','name','xmin','xmax','ymin','ymax'])

In [10]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,1.jpg,1300,969,person,235,425,57,869
1,1.jpg,1300,969,person,493,685,167,881
2,1.jpg,1300,969,person,709,899,64,843
3,1.jpg,1300,969,person,916,1210,150,872
4,10.jpg,1280,853,person,346,476,115,263


In [11]:
df.shape

(128, 8)

In [12]:
df['name'].value_counts()

name
person    128
Name: count, dtype: int64

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  128 non-null    object
 1   width     128 non-null    object
 2   height    128 non-null    object
 3   name      128 non-null    object
 4   xmin      128 non-null    object
 5   xmax      128 non-null    object
 6   ymin      128 non-null    object
 7   ymax      128 non-null    object
dtypes: object(8)
memory usage: 8.1+ KB


In [14]:
# type conversion
columns = ['width','height','xmin','xmax','ymin','ymax']
df[columns] = df[columns].astype('int')

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128 entries, 0 to 127
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  128 non-null    object
 1   width     128 non-null    int32 
 2   height    128 non-null    int32 
 3   name      128 non-null    object
 4   xmin      128 non-null    int32 
 5   xmax      128 non-null    int32 
 6   ymin      128 non-null    int32 
 7   ymax      128 non-null    int32 
dtypes: int32(6), object(2)
memory usage: 5.1+ KB


In [16]:
# centerx, centery
df['center_x'] = ((df['xmin']+df['xmax'])/2)/df['width']
df['center_y'] = ((df['ymin']+df['ymax'])/2)/df['height']
# w
df['w'] = (df['xmax']-df['xmin'])/df['width']
# h
df['h'] = (df['ymax']-df['ymin'])/df['height']

In [17]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,1.jpg,1300,969,person,235,425,57,869,0.253846,0.477812,0.146154,0.837977
1,1.jpg,1300,969,person,493,685,167,881,0.453077,0.540764,0.147692,0.736842
2,1.jpg,1300,969,person,709,899,64,843,0.618462,0.468008,0.146154,0.803922
3,1.jpg,1300,969,person,916,1210,150,872,0.817692,0.527348,0.226154,0.745098
4,10.jpg,1280,853,person,346,476,115,263,0.321094,0.221571,0.101562,0.173505


### Split data into train and test

In [18]:
images = df['filename'].unique()

In [19]:
len(images)

40

In [20]:
# 80% train and 20% for test
img_df = pd.DataFrame(images, columns=['filename'])
img_train = tuple(img_df.sample(frac=0.8)['filename']) # Random shuffle and pick 80%


In [21]:
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename']) # take the ret of 20&

In [22]:
img_test

('13.jpg',
 '16.jpg',
 '20.jpg',
 '24.jpg',
 '25.jpg',
 '30.jpg',
 '35.jpg',
 '36.jpg')

In [23]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

In [24]:
train_df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,1.jpg,1300,969,person,235,425,57,869,0.253846,0.477812,0.146154,0.837977
1,1.jpg,1300,969,person,493,685,167,881,0.453077,0.540764,0.147692,0.736842
2,1.jpg,1300,969,person,709,899,64,843,0.618462,0.468008,0.146154,0.803922
3,1.jpg,1300,969,person,916,1210,150,872,0.817692,0.527348,0.226154,0.745098
4,10.jpg,1280,853,person,346,476,115,263,0.321094,0.221571,0.101562,0.173505
...,...,...,...,...,...,...,...,...,...,...,...,...
123,7.jpg,1280,720,person,1022,1036,243,261,0.803906,0.350000,0.010937,0.025000
124,7.jpg,1280,720,person,985,1005,249,274,0.777344,0.363194,0.015625,0.034722
125,8.jpg,640,359,person,244,282,99,135,0.410938,0.325905,0.059375,0.100279
126,8.jpg,640,359,person,323,355,85,121,0.529687,0.286908,0.050000,0.100279


In [25]:
test_df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
14,13.jpg,750,550,person,319,373,161,218,0.461333,0.344545,0.072,0.103636
25,16.jpg,910,683,person,641,712,212,313,0.743407,0.384334,0.078022,0.147877
26,16.jpg,910,683,person,469,515,351,413,0.540659,0.559297,0.050549,0.090776
35,20.jpg,945,300,person,259,372,25,146,0.333862,0.285,0.119577,0.403333
41,24.jpg,728,485,person,271,323,198,270,0.407967,0.482474,0.071429,0.148454
42,25.jpg,1344,896,person,416,483,59,155,0.334449,0.11942,0.049851,0.107143
54,30.jpg,1024,683,person,358,429,147,228,0.384277,0.274524,0.069336,0.118594
55,30.jpg,1024,683,person,499,568,135,217,0.520996,0.257687,0.067383,0.120059
56,30.jpg,1024,683,person,632,695,139,205,0.647949,0.25183,0.061523,0.096633
71,35.jpg,910,603,person,420,489,170,266,0.499451,0.361526,0.075824,0.159204


##### Assign id numbers to object names

In [26]:
# label encoding
def label_encoding(x):
    labels = {'person':0}
    return labels[x]


# # label encoding
# def label_encoding(x):
#     labels = {'person':0, 'car':1, 'chair':2, 'bottle':3, 'pottedplant':4, 'bird':5, 'dog':6,
#               'sofa':7, 'bicycle':8, 'horse':9, 'boat':10, 'motorbike':11, 'cat':12, 'tvmonitor':13,
#               'cow':14, 'sheep':15, 'aeroplane':16, 'train':17, 'hills':18, 'bus':19,}
#     return labels[x]

In [27]:
train_df['id'] = train_df['name'].apply(label_encoding)
test_df['id'] = test_df['name'].apply(label_encoding)

In [28]:
train_df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
0,1.jpg,1300,969,person,235,425,57,869,0.253846,0.477812,0.146154,0.837977,0
1,1.jpg,1300,969,person,493,685,167,881,0.453077,0.540764,0.147692,0.736842,0
2,1.jpg,1300,969,person,709,899,64,843,0.618462,0.468008,0.146154,0.803922,0
3,1.jpg,1300,969,person,916,1210,150,872,0.817692,0.527348,0.226154,0.745098,0
4,10.jpg,1280,853,person,346,476,115,263,0.321094,0.221571,0.101562,0.173505,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,7.jpg,1280,720,person,1022,1036,243,261,0.803906,0.350000,0.010937,0.025000,0
124,7.jpg,1280,720,person,985,1005,249,274,0.777344,0.363194,0.015625,0.034722,0
125,8.jpg,640,359,person,244,282,99,135,0.410938,0.325905,0.059375,0.100279,0
126,8.jpg,640,359,person,323,355,85,121,0.529687,0.286908,0.050000,0.100279,0


In [29]:
test_df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
14,13.jpg,750,550,person,319,373,161,218,0.461333,0.344545,0.072,0.103636,0
25,16.jpg,910,683,person,641,712,212,313,0.743407,0.384334,0.078022,0.147877,0
26,16.jpg,910,683,person,469,515,351,413,0.540659,0.559297,0.050549,0.090776,0
35,20.jpg,945,300,person,259,372,25,146,0.333862,0.285,0.119577,0.403333,0
41,24.jpg,728,485,person,271,323,198,270,0.407967,0.482474,0.071429,0.148454,0
42,25.jpg,1344,896,person,416,483,59,155,0.334449,0.11942,0.049851,0.107143,0
54,30.jpg,1024,683,person,358,429,147,228,0.384277,0.274524,0.069336,0.118594,0
55,30.jpg,1024,683,person,499,568,135,217,0.520996,0.257687,0.067383,0.120059,0
56,30.jpg,1024,683,person,632,695,139,205,0.647949,0.25183,0.061523,0.096633,0
71,35.jpg,910,603,person,420,489,170,266,0.499451,0.361526,0.075824,0.159204,0


Save images and labels in text

In [30]:
import os
from shutil import move

In [31]:
train_folder = 'data_images/train'
test_folder = 'data_images/test'

os.mkdir(train_folder)
os.mkdir(test_folder)

In [32]:
cols = ['filename','id','center_x','center_y','w','h']
groupby_obj_train = train_df[cols].groupby('filename')
groupby_obj_test = test_df[cols].groupby('filename')

In [33]:
# Save each image in train/test folder and respective labels in .txt
def save_data(filename, folder_path, group_obj):
    src = os.path.join('data_images',filename)
    dst = os.path.join(folder_path, filename)
    move(src,dst) # moving the image from src to dst

    # Save labels
    text_filename = os.path.join(folder_path, 
                                 os.path.splitext(filename)[0]+'.txt')
    group_obj.get_group(filename).set_index('filename').to_csv(text_filename, sep=' ', index=False, header=False)


In [34]:
filename_series = pd.Series(groupby_obj_train.groups.keys())

In [35]:
filename_series.apply(save_data,args=(train_folder, groupby_obj_train))

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
17    None
18    None
19    None
20    None
21    None
22    None
23    None
24    None
25    None
26    None
27    None
28    None
29    None
30    None
31    None
dtype: object

In [36]:
filename_series_test = pd.Series(groupby_obj_test.groups.keys())

In [37]:
filename_series_test.apply(save_data,args=(test_folder, groupby_obj_test))

0    None
1    None
2    None
3    None
4    None
5    None
6    None
7    None
dtype: object