In [1]:
import os,cv2
import numpy as np

## Load data

In [2]:
import json
with open('Data/train/train.json') as f:
  train = json.load(f)

## Regional Proposal

In [3]:
def get_iou(bb1, bb2):
    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])

    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    return iou

|numeric|label|
|------|------|
|0|background|
|1|uninfected|
|2|infected|

In [4]:
threshold = 0.85
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
train_images = []
train_labels = []
k = 1
data = train[:200]

In [5]:
for i in range(len(data)):
    subject = data[i]
    path = 'Data/train'+subject['image']['pathname']
    image = cv2.imread(path)
    rects = subject['objects']
    ss.setBaseImage(image)
    ss.switchToSelectiveSearchFast()
    ssrects = ss.process()
    ssrects = ssrects[np.random.RandomState(seed=i).permutation(len(ssrects))[:2000]]
    imout = image.copy()

    for j in range(len(rects)):
        box = rects[j]['bbx']
        label = (rects[j]['category']=='infected')+1
        iou = np.array([get_iou(box,{"x1":x,"x2":x+w,"y1":y,"y2":y+h}) for x,y,w,h in ssrects])
        
        ind_t = np.array(np.where(iou>=threshold)[0],dtype="float").reshape([-1,1])
        ind_f = np.array(np.where(iou<=(1-threshold)/2)[0],dtype="float").reshape([-1,1])
        ind_f = ind_f[:min(len(ind_t),len(ind_f))]
        ind = np.array(np.vstack([ind_t,ind_f]),dtype="int")
        train_labels += np.array([label]*len(ind_t)).tolist()+[0]*len(ind_f)
        
        srects = ssrects[ind]
        srects = srects.reshape([srects.shape[0],srects.shape[2]])
        train_images += [cv2.resize(imout[y:y+h,x:x+w], (224,224), interpolation = cv2.INTER_AREA) for x,y,w,h in srects] 
        
        if len(train_images)>=1000:
            np.save('Data/wrapped_train_data/images/data_'+str(k),np.array(train_images[:1000]))
            np.save('Data/wrapped_train_data/labels/data_'+str(k),np.array(train_labels[:1000]))
            train_images,train_labels = train_images[1000:],train_labels[1000:]
            k=k+1
        
    if len(train_images)>0:
        np.save('Data/wrapped_train_data/images/data_'+str(k),np.array(train_images))
        np.save('Data/wrapped_train_data/labels/data_'+str(k),np.array(train_labels))

In [6]:
s=0
for i in range(len(os.listdir("Data/wrapped_train_data/images/"))):
    l = len(np.load("Data/wrapped_train_data/images/data_"+str(i+1)+".npy"))
    s += l
    print("data_"+str(i+1)+":"+str(l))

print("Number of images extracted:",s)

data_1:1000
data_2:1000
data_3:1000
data_4:1000
data_5:1000
data_6:1000
data_7:1000
data_8:1000
data_9:1000
data_10:1000
data_11:1000
data_12:1000
data_13:302
Number of images extracted: 12302


## Resemble data

In [7]:
X = []
y = []

for i in range(13):
    X += list(np.load("Data/wrapped_train_data/images/"+"data_"+str(i+1)+".npy"))
    y += list(np.load("Data/wrapped_train_data/labels/"+"data_"+str(i+1)+".npy"))


X = np.array(X)
y = np.array(y)

#### One-hot lables

In [8]:
one_hot = np.zeros([len(y),3])
for i in range(len(one_hot)):
    one_hot[i][y[i]]=1

#### Save data

In [10]:
np.save("Data/X",X)
np.save("Data/y",y)
np.save("Data/y_onehot",one_hot)