In [1]:
import shutil
import os, sys, random
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from shutil import copyfile
import pandas as pd
from sklearn import preprocessing, model_selection
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import patches
import numpy as np
import os

In [2]:
annotations = sorted(glob('BCCD/Annotations/*.xml'))

df = []
cnt = 0
for file in annotations:
    prev_filename = file.split('/')[-1].split('.')[0] + '.jpg'
    filename = str(cnt) + '.jpg'
    row = []
    parsedXML = ET.parse(file)
    for node in parsedXML.getroot().iter('object'):
        blood_cells = node.find('name').text
        xmin = int(node.find('bndbox/xmin').text)
        xmax = int(node.find('bndbox/xmax').text)
        ymin = int(node.find('bndbox/ymin').text)
        ymax = int(node.find('bndbox/ymax').text)
        row = [prev_filename, filename, blood_cells, xmin, xmax, ymin, ymax]
        df.append(row)
    cnt += 1

data = pd.DataFrame(df, columns=['prev_filename', 'filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])
data[['prev_filename','filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('blood_cell_detection.csv', index=False)
data

Unnamed: 0,prev_filename,filename,cell_type,xmin,xmax,ymin,ymax
0,Annotations\BloodImage_00000.jpg,0.jpg,WBC,260,491,177,376
1,Annotations\BloodImage_00000.jpg,0.jpg,RBC,78,184,336,435
2,Annotations\BloodImage_00000.jpg,0.jpg,RBC,63,169,237,336
3,Annotations\BloodImage_00000.jpg,0.jpg,RBC,214,320,362,461
4,Annotations\BloodImage_00000.jpg,0.jpg,RBC,414,506,352,445
...,...,...,...,...,...,...,...
4883,Annotations\BloodImage_00410.jpg,363.jpg,Platelets,239,291,275,321
4884,Annotations\BloodImage_00410.jpg,363.jpg,Platelets,121,189,260,320
4885,Annotations\BloodImage_00410.jpg,363.jpg,Platelets,57,104,119,167
4886,Annotations\BloodImage_00410.jpg,363.jpg,Platelets,1,29,286,327


In [3]:
img_width = 640
img_height = 480

def width(df):
    return int(df.xmax - df.xmin)
def height(df):
    return int(df.ymax - df.ymin)
def x_center(df):
    return int(df.xmin + (df.width/2))
def y_center(df):
    return int(df.ymin + (df.height/2))
def w_norm(df):
    return df/img_width
def h_norm(df):
    return df/img_height

df = pd.read_csv('blood_cell_detection.csv')

le = preprocessing.LabelEncoder()
le.fit(df['cell_type'])
print(le.classes_)
labels = le.transform(df['cell_type'])
df['labels'] = labels

df['width'] = df.apply(width, axis=1)
df['height'] = df.apply(height, axis=1)

df['x_center'] = df.apply(x_center, axis=1)
df['y_center'] = df.apply(y_center, axis=1)

df['x_center_norm'] = df['x_center'].apply(w_norm)
df['width_norm'] = df['width'].apply(w_norm)

df['y_center_norm'] = df['y_center'].apply(h_norm)
df['height_norm'] = df['height'].apply(h_norm)

df.head(30)

['Platelets' 'RBC' 'WBC']


Unnamed: 0,prev_filename,filename,cell_type,xmin,xmax,ymin,ymax,labels,width,height,x_center,y_center,x_center_norm,width_norm,y_center_norm,height_norm
0,Annotations\BloodImage_00000.jpg,0.jpg,WBC,260,491,177,376,2,231,199,375,276,0.585938,0.360938,0.575,0.414583
1,Annotations\BloodImage_00000.jpg,0.jpg,RBC,78,184,336,435,1,106,99,131,385,0.204687,0.165625,0.802083,0.20625
2,Annotations\BloodImage_00000.jpg,0.jpg,RBC,63,169,237,336,1,106,99,116,286,0.18125,0.165625,0.595833,0.20625
3,Annotations\BloodImage_00000.jpg,0.jpg,RBC,214,320,362,461,1,106,99,267,411,0.417187,0.165625,0.85625,0.20625
4,Annotations\BloodImage_00000.jpg,0.jpg,RBC,414,506,352,445,1,92,93,460,398,0.71875,0.14375,0.829167,0.19375
5,Annotations\BloodImage_00000.jpg,0.jpg,RBC,555,640,356,455,1,85,99,597,405,0.932813,0.132812,0.84375,0.20625
6,Annotations\BloodImage_00000.jpg,0.jpg,RBC,469,567,412,480,1,98,68,518,446,0.809375,0.153125,0.929167,0.141667
7,Annotations\BloodImage_00000.jpg,0.jpg,RBC,1,87,333,437,1,86,104,44,385,0.06875,0.134375,0.802083,0.216667
8,Annotations\BloodImage_00000.jpg,0.jpg,RBC,4,95,406,480,1,91,74,49,443,0.076563,0.142187,0.922917,0.154167
9,Annotations\BloodImage_00000.jpg,0.jpg,RBC,155,247,74,174,1,92,100,201,124,0.314063,0.14375,0.258333,0.208333


In [4]:
df_train, df_valid = model_selection.train_test_split(df, test_size=0.1, random_state=13, shuffle=True)
print(df_train.shape, df_valid.shape)

(4399, 16) (489, 16)


In [5]:
df_train

Unnamed: 0,prev_filename,filename,cell_type,xmin,xmax,ymin,ymax,labels,width,height,x_center,y_center,x_center_norm,width_norm,y_center_norm,height_norm
2516,Annotations\BloodImage_00206.jpg,176.jpg,RBC,285,379,263,355,1,94,92,332,309,0.518750,0.146875,0.643750,0.191667
2164,Annotations\BloodImage_00172.jpg,148.jpg,RBC,566,640,274,386,1,74,112,603,330,0.942187,0.115625,0.687500,0.233333
2045,Annotations\BloodImage_00164.jpg,140.jpg,RBC,309,415,233,354,1,106,121,362,293,0.565625,0.165625,0.610417,0.252083
688,Annotations\BloodImage_00037.jpg,35.jpg,RBC,48,147,5,81,1,99,76,97,43,0.151562,0.154688,0.089583,0.158333
2116,Annotations\BloodImage_00169.jpg,145.jpg,RBC,330,457,222,341,1,127,119,393,281,0.614062,0.198437,0.585417,0.247917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Annotations\BloodImage_00008.jpg,8.jpg,RBC,221,327,18,116,1,106,98,274,67,0.428125,0.165625,0.139583,0.204167
866,Annotations\BloodImage_00046.jpg,44.jpg,RBC,138,251,294,398,1,113,104,194,346,0.303125,0.176563,0.720833,0.216667
2790,Annotations\BloodImage_00228.jpg,197.jpg,RBC,52,150,78,181,1,98,103,101,129,0.157812,0.153125,0.268750,0.214583
74,Annotations\BloodImage_00004.jpg,4.jpg,RBC,510,606,112,195,1,96,83,558,153,0.871875,0.150000,0.318750,0.172917
