In [62]:
import os
from glob import glob
import pandas as pd
from functools import reduce
from xml.etree import ElementTree as et
import warnings
warnings.filterwarnings('ignore')

In [6]:
xml_list = glob('./data_images/*.xml')
#data cleaning. replace \\ with /
xml_list = list(map(lambda x: x.replace('\\','/'),xml_list))


In [7]:
xml_list

['./data_images/000001.xml',
 './data_images/000002.xml',
 './data_images/000007.xml',
 './data_images/003337.xml',
 './data_images/003338.xml',
 './data_images/003339.xml',
 './data_images/003343.xml',
 './data_images/003344.xml',
 './data_images/003349.xml',
 './data_images/003350.xml',
 './data_images/003351.xml',
 './data_images/003354.xml',
 './data_images/003355.xml',
 './data_images/003356.xml',
 './data_images/003359.xml',
 './data_images/003360.xml',
 './data_images/003362.xml',
 './data_images/003363.xml',
 './data_images/003365.xml',
 './data_images/003367.xml',
 './data_images/003369.xml',
 './data_images/003370.xml',
 './data_images/003373.xml',
 './data_images/003374.xml',
 './data_images/003376.xml',
 './data_images/003377.xml',
 './data_images/003379.xml',
 './data_images/003380.xml',
 './data_images/003382.xml',
 './data_images/003386.xml',
 './data_images/003390.xml',
 './data_images/003391.xml',
 './data_images/003392.xml',
 './data_images/003395.xml',
 './data_image

In [32]:
# step -2: read xml files
# from each xml file we need to extract
#filename, size(width, height), object(name, xmin, xmax, ymin,ymax)
def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()

    # extract filename
    image_name = root.find('filename').text
    #width and height of the image
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text
    objs = root.findall('object')
    parser = []
    for obj in objs:
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = bndbox.find('xmin').text
        xmax = bndbox.find('xmax').text
        ymin = bndbox.find('ymin').text
        ymax = bndbox.find('ymax').text
        parser.append([image_name, width, height, name,xmin,xmax,ymin,ymax])

    return parser


In [33]:
parser_all = list(map(extract_text,xml_list))

In [35]:
data = reduce(lambda x,y : x+y, parser_all)

In [42]:
df = pd.DataFrame(data, columns=['filename', 'width', 'height', 'name','xmin','xmax','ymin','ymax'])

In [44]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,000001.jpg,1024,657,car,14,301,335,522
1,000001.jpg,1024,657,car,269,571,345,489
2,000001.jpg,1024,657,car,502,798,342,450
3,000001.jpg,1024,657,car,709,1009,333,438
4,000002.jpg,800,600,car,41,768,240,497


In [45]:
df.shape

(10463, 8)

In [46]:
df['name'].value_counts()

person         3633
car            1123
chair           958
bottle          420
pottedplant     415
bird            396
dog             358
motorbike       275
sofa            273
bicycle         273
horse           268
cat             260
tvmonitor       252
boat            251
cow             249
train           229
sheep           220
aeroplane       208
diningtable     208
bus             194
Name: name, dtype: int64

# Conversion

- To get some extra information about the images we are goint to perform some operations based on some formulas

$$ center_x = \frac{\frac{x_min + x_max}{2}}{\text{width of the image}} $$

$$ center_y = \frac{\frac{y_min + y_max}{2}}{\text{height of the image}} $$

$$w = \frac{x_max - x_min}{\text{width of the image}}$$

$$h = \frac{y_max - y_min}{\text{height of the image}}$$



In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10463 entries, 0 to 10462
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  10463 non-null  object
 1   width     10463 non-null  object
 2   height    10463 non-null  object
 3   name      10463 non-null  object
 4   xmin      10463 non-null  object
 5   xmax      10463 non-null  object
 6   ymin      10463 non-null  object
 7   ymax      10463 non-null  object
dtypes: object(8)
memory usage: 654.1+ KB


In [48]:
#type conversion
cols = ['width','height','xmin','xmax','ymin','ymax']
df[cols] = df[cols].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10463 entries, 0 to 10462
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  10463 non-null  object
 1   width     10463 non-null  int32 
 2   height    10463 non-null  int32 
 3   name      10463 non-null  object
 4   xmin      10463 non-null  int32 
 5   xmax      10463 non-null  int32 
 6   ymin      10463 non-null  int32 
 7   ymax      10463 non-null  int32 
dtypes: int32(6), object(2)
memory usage: 408.8+ KB


In [49]:
# center x, center y
df['center_x'] = ((df['xmax']+df['xmin'])/2)/df['width']
df['center_y'] = ((df['ymax']+df['ymin'])/2)/df['height']
# w 
df['w'] = (df['xmax']-df['xmin'])/df['width']
# h 
df['h'] = (df['ymax']-df['ymin'])/df['height']

In [50]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,000001.jpg,1024,657,car,14,301,335,522,0.153809,0.652207,0.280273,0.284627
1,000001.jpg,1024,657,car,269,571,345,489,0.410156,0.634703,0.294922,0.219178
2,000001.jpg,1024,657,car,502,798,342,450,0.634766,0.60274,0.289062,0.164384
3,000001.jpg,1024,657,car,709,1009,333,438,0.838867,0.586758,0.292969,0.159817
4,000002.jpg,800,600,car,41,768,240,497,0.505625,0.614167,0.90875,0.428333


# Split data into train and test

In [51]:
images = df['filename'].unique()

In [53]:
len(images)

3335

In [54]:
# 80% train and 20% test
img_df = pd.DataFrame(images,columns=['filename'])
img_train = tuple(img_df.sample(frac=0.8)['filename']) # shuffle and pick 80% of images

In [55]:
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename']) # take rest 20% images

In [56]:
len(img_train), len(img_test)

(2668, 667)

In [57]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

In [58]:
train_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,000001.jpg,1024,657,car,14,301,335,522,0.153809,0.652207,0.280273,0.284627
1,000001.jpg,1024,657,car,269,571,345,489,0.410156,0.634703,0.294922,0.219178
2,000001.jpg,1024,657,car,502,798,342,450,0.634766,0.60274,0.289062,0.164384
3,000001.jpg,1024,657,car,709,1009,333,438,0.838867,0.586758,0.292969,0.159817
4,000002.jpg,800,600,car,41,768,240,497,0.505625,0.614167,0.90875,0.428333


In [59]:
test_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
11,003343.jpg,500,375,dog,167,407,82,333,0.574,0.553333,0.48,0.669333
12,003344.jpg,500,333,car,139,415,133,277,0.554,0.615616,0.552,0.432432
13,003344.jpg,500,333,car,116,176,117,154,0.292,0.406907,0.12,0.111111
14,003344.jpg,500,333,person,59,119,89,278,0.178,0.551051,0.12,0.567568
15,003344.jpg,500,333,car,213,257,90,103,0.47,0.28979,0.088,0.039039


# Assign id number to object names

In [60]:
# label encoding
def label_encoding(x):
    labels = {'person':0, 'car':1, 'chair':2, 'bottle':3, 'pottedplant':4, 'bird':5, 'dog':6,
       'sofa':7, 'bicycle':8, 'horse':9, 'boat':10, 'motorbike':11, 'cat':12, 'tvmonitor':13,
       'cow':14, 'sheep':15, 'aeroplane':16, 'train':17, 'diningtable':18, 'bus':19}
    return labels[x]

In [63]:
train_df['id'] = train_df['name'].apply(label_encoding)
test_df['id'] = test_df['name'].apply(label_encoding)

In [65]:
train_df.head(10)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
0,000001.jpg,1024,657,car,14,301,335,522,0.153809,0.652207,0.280273,0.284627,1
1,000001.jpg,1024,657,car,269,571,345,489,0.410156,0.634703,0.294922,0.219178,1
2,000001.jpg,1024,657,car,502,798,342,450,0.634766,0.60274,0.289062,0.164384,1
3,000001.jpg,1024,657,car,709,1009,333,438,0.838867,0.586758,0.292969,0.159817,1
4,000002.jpg,800,600,car,41,768,240,497,0.505625,0.614167,0.90875,0.428333,1
5,000002.jpg,800,600,car,533,722,236,299,0.784375,0.445833,0.23625,0.105,1
6,000007.jpg,500,333,car,141,500,50,330,0.641,0.570571,0.718,0.840841,1
7,003337.jpg,500,375,boat,71,365,146,210,0.436,0.474667,0.588,0.170667,10
8,003337.jpg,500,375,boat,227,294,179,216,0.521,0.526667,0.134,0.098667,10
9,003338.jpg,500,329,bird,196,334,75,285,0.53,0.547112,0.276,0.638298,5
