In [151]:
import os
from glob import glob
import numpy as np
import pandas as pd
from functools import reduce
from xml.etree import ElementTree as et

In [152]:
xml_list = glob("./data_images/*.xml")
xml_list

['./data_images/007826.xml',
 './data_images/002786.xml',
 './data_images/006286.xml',
 './data_images/002962.xml',
 './data_images/008297.xml',
 './data_images/009189.xml',
 './data_images/009823.xml',
 './data_images/002976.xml',
 './data_images/002745.xml',
 './data_images/006523.xml',
 './data_images/008268.xml',
 './data_images/004452.xml',
 './data_images/002023.xml',
 './data_images/005980.xml',
 './data_images/004446.xml',
 './data_images/002037.xml',
 './data_images/009162.xml',
 './data_images/006251.xml',
 './data_images/000620.xml',
 './data_images/000146.xml',
 './data_images/007629.xml',
 './data_images/001258.xml',
 './data_images/002751.xml',
 './data_images/002989.xml',
 './data_images/007601.xml',
 './data_images/001270.xml',
 './data_images/002779.xml',
 './data_images/005016.xml',
 './data_images/003301.xml',
 './data_images/006279.xml',
 './data_images/007167.xml',
 './data_images/008254.xml',
 './data_images/000608.xml',
 './data_images/005764.xml',
 './data_image

In [153]:
def extract(fn):
    tree = et.parse(fn)
    root = tree.getroot()
    img_name = root.find("filename").text
    width = float(root.find("size").find("width").text)
    height = float(root.find("size").find("height").text)
    objs = root.findall("object")
    parser = []
    for obj in objs:
        name = obj.find("name").text
        xmin = float(obj.find("bndbox").find("xmin").text)
        xmax = float(obj.find("bndbox").find("xmax").text)
        ymin = float(obj.find("bndbox").find("ymin").text)
        ymax = float(obj.find("bndbox").find("ymax").text)
        parser.append([img_name, width, height, name, xmin, xmax, ymin, ymax])
    return parser


In [154]:
result = []
for n in xml_list:
    res = extract(n)
    for j in res:
        result.append(j)
result


[['007826.jpg', 500.0, 375.0, 'diningtable', 80.0, 320.0, 217.0, 273.0],
 ['007826.jpg', 500.0, 375.0, 'chair', 197.0, 257.0, 193.0, 326.0],
 ['007826.jpg', 500.0, 375.0, 'chair', 139.0, 185.0, 184.0, 231.0],
 ['007826.jpg', 500.0, 375.0, 'chair', 258.0, 312.0, 180.0, 314.0],
 ['007826.jpg', 500.0, 375.0, 'chair', 10.0, 93.0, 195.0, 358.0],
 ['007826.jpg', 500.0, 375.0, 'chair', 82.0, 243.0, 252.0, 372.0],
 ['007826.jpg', 500.0, 375.0, 'chair', 43.0, 144.0, 319.0, 375.0],
 ['002786.jpg', 500.0, 332.0, 'horse', 80.0, 348.0, 97.0, 272.0],
 ['002786.jpg', 500.0, 332.0, 'person', 201.0, 258.0, 52.0, 202.0],
 ['006286.jpg', 500.0, 375.0, 'person', 80.0, 405.0, 88.0, 375.0],
 ['006286.jpg', 500.0, 375.0, 'person', 436.0, 475.0, 147.0, 209.0],
 ['006286.jpg', 500.0, 375.0, 'person', 381.0, 428.0, 145.0, 191.0],
 ['006286.jpg', 500.0, 375.0, 'diningtable', 402.0, 500.0, 219.0, 375.0],
 ['006286.jpg', 500.0, 375.0, 'diningtable', 347.0, 405.0, 177.0, 216.0],
 ['006286.jpg', 500.0, 375.0, 'dinin

In [155]:
df = pd.DataFrame(result, columns=["filename", "width", "height", "name", "xmin", "xmax", "ymin", "ymax"])
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,007826.jpg,500.0,375.0,diningtable,80.0,320.0,217.0,273.0
1,007826.jpg,500.0,375.0,chair,197.0,257.0,193.0,326.0
2,007826.jpg,500.0,375.0,chair,139.0,185.0,184.0,231.0
3,007826.jpg,500.0,375.0,chair,258.0,312.0,180.0,314.0
4,007826.jpg,500.0,375.0,chair,10.0,93.0,195.0,358.0


In [156]:
df.shape

(15663, 8)

In [157]:
df["name"].value_counts()

name
person         5447
car            1650
chair          1427
bottle          634
pottedplant     625
bird            599
dog             538
sofa            425
bicycle         418
horse           406
boat            398
motorbike       390
cat             389
tvmonitor       367
cow             356
sheep           353
aeroplane       331
train           328
diningtable     310
bus             272
Name: count, dtype: int64

In [158]:
df["center_x"] = ((df["xmax"] + df["xmin"]) / 2) / df["width"]
df["center_y"] = ((df["ymax"] + df["ymin"]) / 2) / df["height"]

df["w"] = (df["xmax"] - df["xmin"]) / df["width"]
df["h"] = (df["ymax"] - df["ymin"]) / df["height"]


In [159]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,007826.jpg,500.0,375.0,diningtable,80.0,320.0,217.0,273.0,0.4,0.653333,0.48,0.149333
1,007826.jpg,500.0,375.0,chair,197.0,257.0,193.0,326.0,0.454,0.692,0.12,0.354667
2,007826.jpg,500.0,375.0,chair,139.0,185.0,184.0,231.0,0.324,0.553333,0.092,0.125333
3,007826.jpg,500.0,375.0,chair,258.0,312.0,180.0,314.0,0.57,0.658667,0.108,0.357333
4,007826.jpg,500.0,375.0,chair,10.0,93.0,195.0,358.0,0.103,0.737333,0.166,0.434667


In [160]:
images = df["filename"].unique()

In [161]:
len(images)

5012

In [162]:
img_df = pd.DataFrame(images, columns=["filename"])
img_train = tuple(img_df.sample(frac=0.8)['filename'])



In [163]:
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename'])

In [164]:
len(img_train), len(img_test)

(4010, 1002)

In [165]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

train_df.head()


Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,007826.jpg,500.0,375.0,diningtable,80.0,320.0,217.0,273.0,0.4,0.653333,0.48,0.149333
1,007826.jpg,500.0,375.0,chair,197.0,257.0,193.0,326.0,0.454,0.692,0.12,0.354667
2,007826.jpg,500.0,375.0,chair,139.0,185.0,184.0,231.0,0.324,0.553333,0.092,0.125333
3,007826.jpg,500.0,375.0,chair,258.0,312.0,180.0,314.0,0.57,0.658667,0.108,0.357333
4,007826.jpg,500.0,375.0,chair,10.0,93.0,195.0,358.0,0.103,0.737333,0.166,0.434667


In [166]:
def label_encoding(x):
    labels = {
        "person":0,
        "car":1,
        "chair":2,
        "bottle": 3,
        "pottedplant": 4,
        "bird": 5,
        "dog": 6,
        "sofa": 7,
        "bicycle": 8,
        "horse": 9,
        "boat": 10,
        "motorbike": 11,
        "cat": 12,
        "tvmonitor": 13,
        "cow": 14,
        "sheep": 15,
        "aeroplane": 16,
        "train":17,
        "diningtable": 18,
        "bus":19
    }
    return labels[x]

In [167]:
train_df["id"] = train_df["name"].apply(label_encoding)
test_df["id"] = test_df["name"].apply(label_encoding)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df["id"] = train_df["name"].apply(label_encoding)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["id"] = test_df["name"].apply(label_encoding)


In [168]:
train_df.head(10)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
0,007826.jpg,500.0,375.0,diningtable,80.0,320.0,217.0,273.0,0.4,0.653333,0.48,0.149333,18
1,007826.jpg,500.0,375.0,chair,197.0,257.0,193.0,326.0,0.454,0.692,0.12,0.354667,2
2,007826.jpg,500.0,375.0,chair,139.0,185.0,184.0,231.0,0.324,0.553333,0.092,0.125333,2
3,007826.jpg,500.0,375.0,chair,258.0,312.0,180.0,314.0,0.57,0.658667,0.108,0.357333,2
4,007826.jpg,500.0,375.0,chair,10.0,93.0,195.0,358.0,0.103,0.737333,0.166,0.434667,2
5,007826.jpg,500.0,375.0,chair,82.0,243.0,252.0,372.0,0.325,0.832,0.322,0.32,2
6,007826.jpg,500.0,375.0,chair,43.0,144.0,319.0,375.0,0.187,0.925333,0.202,0.149333,2
7,002786.jpg,500.0,332.0,horse,80.0,348.0,97.0,272.0,0.428,0.555723,0.536,0.527108,9
8,002786.jpg,500.0,332.0,person,201.0,258.0,52.0,202.0,0.459,0.38253,0.114,0.451807,0
29,002962.jpg,500.0,334.0,tvmonitor,430.0,500.0,121.0,205.0,0.93,0.488024,0.14,0.251497,13


In [169]:
import os
from shutil import move

In [170]:
train_folder = "data_images/train"
test_folder = "data_images/test"
os.mkdir(train_folder)
os.mkdir(test_folder)

In [171]:
# 10.45
cols = ["filename", "id", "center_x", "center_y", "w", "h"]
groupby_obj_train = train_df[cols].groupby("filename")
groupby_obj_test = test_df[cols].groupby("filename")
#11.06


In [174]:
def save_data(filename, folderpath, group_obj):
    src = os.path.join("data_images", filename)
    dst = os.path.join(folderpath, filename)
    move(src, dst)
    
    text_filename = os.path.join(folderpath, os.path.splitext(filename)[0]+".txt")
    group_obj.get_group(filename).set_index("filename").to_csv(text_filename, sep=" ", index=False, header=False)


# To refresh the test/train data: delete test/train folders. Copy the image data to data_images again. Uncomment and run the file_name_series_train block, then comment it and do the same to the filename_series_test block

# file_name_series_train = pd.Series(groupby_obj_train.groups.keys())
# file_name_series_train.apply(save_data,args=(train_folder, groupby_obj_train))

file_name_series_test = pd.Series(groupby_obj_test.groups.keys())
file_name_series_test.apply(save_data,args=(test_folder, groupby_obj_test))

0       None
1       None
2       None
3       None
4       None
        ... 
997     None
998     None
999     None
1000    None
1001    None
Length: 1002, dtype: object