In [1]:
import numpy as np
import pandas as pd
from glob import glob
import xml.etree.ElementTree as xet
import cv2
import os

In [2]:
# base directory -> Get Current Working Directory
BASE_DIR = os.getcwd()

In [3]:
df = pd.read_csv('labels.csv')
df['filepath'] = df['filepath'].apply(lambda x: x.replace('\\','/'))

df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,./images/N1.xml,1093,1396,645,727
1,./images/N100.xml,134,301,312,350
2,./images/N101.xml,31,139,128,161
3,./images/N102.xml,164,316,216,243
4,./images/N103.xml,813,1067,665,724


In [4]:
for i in df.index:
    temp = df['filepath'][i].split("./")[1]
    temp = os.path.join(BASE_DIR, temp)
    temp = temp.replace('\\','/')
    df['filepath'][i] = temp
    
df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['filepath'][i] = temp


Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,/home/ezra/Desktop/Module_4_Yolo_v5/images/N1.xml,1093,1396,645,727
1,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,134,301,312,350
2,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,31,139,128,161
3,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,164,316,216,243
4,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,813,1067,665,724


In [5]:
df['filepath'][0]

'/home/ezra/Desktop/Module_4_Yolo_v5/images/N1.xml'

In [6]:
# parsing
def parsing(path):
    parser = xet.parse(path).getroot()
    name = parser.find('filename').text
    #print(name)
    filename = f'images/{name}'
    filepath = os.path.join(BASE_DIR,filename)
    filepath = filepath.replace('\\','/')

    # width and height
    parser_size = parser.find('size')
    width = int(parser_size.find('width').text)
    height = int(parser_size.find('height').text)
    
    return filepath, width, height


df[['filename','width','height']] = df['filepath'].apply(parsing).apply(pd.Series)

df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax,filename,width,height
0,/home/ezra/Desktop/Module_4_Yolo_v5/images/N1.xml,1093,1396,645,727,/home/ezra/Desktop/Module_4_Yolo_v5/images/N1....,1920,1080
1,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,134,301,312,350,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,450,417
2,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,31,139,128,161,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,249,239
3,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,164,316,216,243,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,478,395
4,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,813,1067,665,724,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,1800,1200


In [7]:
# center_x, center_y, width , height
df['center_x'] = (df['xmax'] + df['xmin'])/(2*df['width'])
df['center_y'] = (df['ymax'] + df['ymin'])/(2*df['height'])

df['bb_width'] = (df['xmax'] - df['xmin'])/df['width']
df['bb_height'] = (df['ymax'] - df['ymin'])/df['height']

df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax,filename,width,height,center_x,center_y,bb_width,bb_height
0,/home/ezra/Desktop/Module_4_Yolo_v5/images/N1.xml,1093,1396,645,727,/home/ezra/Desktop/Module_4_Yolo_v5/images/N1....,1920,1080,0.648177,0.635185,0.157812,0.075926
1,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,134,301,312,350,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,450,417,0.483333,0.793765,0.371111,0.091127
2,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,31,139,128,161,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,249,239,0.341365,0.604603,0.433735,0.138075
3,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,164,316,216,243,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,478,395,0.502092,0.581013,0.317992,0.068354
4,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,813,1067,665,724,/home/ezra/Desktop/Module_4_Yolo_v5/images/N10...,1800,1200,0.522222,0.57875,0.141111,0.049167


In [8]:
### split the data into train and test
df_train = df.iloc[:200]
df_test = df.iloc[200:]

text.file

    class_id, center_x, center_y, bb_width, bb_height

In [9]:
from shutil import copy

In [10]:
train_folder = './data_images/train'

values = df_train[['filename','center_x','center_y','bb_width','bb_height']].values
for fname, x,y, w, h in values:
    image_name = os.path.split(fname)[-1]
    txt_name = os.path.splitext(image_name)[0]
    
    dst_image_path = os.path.join(train_folder,image_name)
    dst_label_file = os.path.join(train_folder,txt_name+'.txt')
    
    # copy each image into the folder
    copy(fname,dst_image_path)

    # generate .txt which has label info
    label_txt = f'0 {x} {y} {w} {h}'
    with open(dst_label_file,mode='w') as f:
        f.write(label_txt)
        
        f.close()


In [11]:
test_folder = './data_images/test'

values = df_test[['filename','center_x','center_y','bb_width','bb_height']].values
for fname, x,y, w, h in values:
    image_name = os.path.split(fname)[-1]
    txt_name = os.path.splitext(image_name)[0]
    
    dst_image_path = os.path.join(test_folder,image_name)
    dst_label_file = os.path.join(test_folder,txt_name+'.txt')
    
    # copy each image into the folder
    copy(fname,dst_image_path)

    # generate .txt which has label info
    label_txt = f'0 {x} {y} {w} {h}'
    with open(dst_label_file,mode='w') as f:
        f.write(label_txt)
        
        f.close()


---