In [None]:
import pandas as pd
import cv2
import numpy as np
import os.path
import random
from PIL import Image

In [None]:
'''Data augmentation function set'''
# salt and pepper noise
def SaltAndPepper(src,percetage):
    SP_NoiseImg=src.copy()
    SP_NoiseNum=int(percetage*src.shape[0]*src.shape[1])
    for i in range(SP_NoiseNum):
        randR=np.random.randint(0,src.shape[0]-1)
        randG=np.random.randint(0,src.shape[1]-1)
        randB=np.random.randint(0,3)
        if np.random.randint(0,1)==0:
            SP_NoiseImg[randR,randG,randB]=0
        else:
            SP_NoiseImg[randR,randG,randB]=255
    return SP_NoiseImg

# gaussian noise
def addGaussianNoise(image,percetage):
    G_Noiseimg = image.copy()
    w = image.shape[1]
    h = image.shape[0]
    G_NoiseNum=int(percetage*image.shape[0]*image.shape[1])
    for i in range(G_NoiseNum):
        temp_x = np.random.randint(0,h)
        temp_y = np.random.randint(0,w)
        G_Noiseimg[temp_x][temp_y][np.random.randint(3)] = np.random.randn(1)[0]
    return G_Noiseimg



# rotate
def rotate(image, angle, center=None, scale=1.0):
    (h, w) = image.shape[:2]
    # If no rotation center is specified, the center of the image is set as the rotation center
    if center is None:
        center = (w / 2, h / 2)
    m = cv2.getRotationMatrix2D(center, angle, scale)
    rotated = cv2.warpAffine(image, m, (w, h))
    return rotated

# flip
def flip(image):
    flipped_image = np.fliplr(image)
    return flipped_image

# center crop
def resizing(img):
    height, width, channels = img.shape
    r_img = img[int(height/2-height*2/6):int(height/2+height*2/6),int(width/2-width*2/6):int(width/2+width*2/6)]
    return r_img

In [None]:
# read data
trdf = pd.read_csv('./HRM+EIpHM+Esophagoscopes.csv',index_col=0)
trdf = pd.DataFrame(trdf).reset_index(drop=True)

In [None]:
# data augmentation
trau=[]
tr = []

file_dir = '../output/'

for i in range(len(trdf)):
    label_tr = trdf.loc[i,'label']
    imgpath = trdf.loc[i,'path']
    if label_tr>=1:
        label_tr = 1
        label_name = 'surgery'
    else:
        label_name = 'surgery_free'
    img = cv2.imread(imgpath)
    path = file_dir
    if not os.path.exists(path+imgpath.split('/')[3]):
        os.makedirs(path+imgpath.split('/')[3])
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'.jpg',img)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'.jpg',label_name,trdf.iloc[i,2:]])
    tr.append([path+imgpath.split('/')[3]+'/'+str(i)+'.jpg',label_name])

    rotated_90 = rotate(img, 90)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_r90.jpg', rotated_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_r90.jpg',label_name,trdf.iloc[i,2:]])

    rotated_180 = rotate(img, 180)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_r180.jpg', rotated_180)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_r180.jpg',label_name,trdf.iloc[i,2:]])

    flipped_img = flip(img)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+ '_fli.jpg', flipped_img)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_fli.jpg',label_name,trdf.iloc[i,2:]])
    
    flipped_img_90 = rotate(flipped_img,90)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_f_r90.jpg', flipped_img_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_f_r90.jpg',label_name,trdf.iloc[i,2:]])
    flipped_img_180 = rotate(flipped_img,180)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_f_r180.jpg', flipped_img_180)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_f_r180.jpg',label_name,trdf.iloc[i,2:]])

    img_gauss = addGaussianNoise(img, 0.3)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+ '_noise.jpg',img_gauss)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_noise.jpg',label_name,trdf.iloc[i,2:]])
    
    gs_90 = rotate(img, 90)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_noise_r90.jpg', rotated_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_noise_r90.jpg',label_name,trdf.iloc[i,2:]])
    
    gs_180 = rotate(img, 180)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_noise_r180.jpg', rotated_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_noise_r180.jpg',label_name,trdf.iloc[i,2:]])

    blur = cv2.GaussianBlur(img, (3, 3), 0)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+ '_blur.jpg',blur)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_blur.jpg',label_name,trdf.iloc[i,2:]])
    
    blur_90 = rotate(img, 90)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_blur_r90.jpg', rotated_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_blur_r90.jpg',label_name,trdf.iloc[i,2:]])
    
    blur_180 = rotate(img, 180)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_blur_r180.jpg', rotated_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_blur_r180.jpg',label_name,trdf.iloc[i,2:]])

    rs = resizing(img)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+ '_resizing.jpg',rs)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_resizing.jpg',label_name,trdf.iloc[i,2:]])
    
    rs_90 = rotate(rs,90)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_rs_r90.jpg', rs_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_rs_r90.jpg',label_name,trdf.iloc[i,2:]])
    rs_180 = rotate(rs,180)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_rs_r180.jpg', rs_180)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_rs_r180.jpg',label_name,trdf.iloc[i,2:]])
    rs_flip = flip(rs)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+ '_rs_fli.jpg', rs_flip)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_rs_fli.jpg',label_name,trdf.iloc[i,2:]])
    rs_flip_90 = rotate(rs_flip,90)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_rs_fli_r90.jpg', rs_flip_90)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_rs_fli_r90.jpg',label_name,trdf.iloc[i,2:]])
    rs_flip_180 = rotate(rs_flip,180)
    cv2.imwrite(path+imgpath.split('/')[3]+'/'+str(i)+'_rs_fli_r180.jpg', rs_flip_180)
    trau.append([path+imgpath.split('/')[3]+'/'+str(i)+'_rs_fli_r180.jpg',label_name,trdf.iloc[i,2:]])

In [None]:
# save data
temp = pd.DataFrame()
for i in range(len(trau)):
    temp_temp = pd.DataFrame([trau[i][0:2]],columns=['path','label'])
    temp_temp = pd.concat([temp_temp,pd.DataFrame(trau[i][2:]).reset_index(drop=True)],axis=1)
    if(temp_temp.shape[0]>1):
        print(temp_temp)
    temp = pd.concat([temp,temp_temp])
temp.to_csv('../output/temp_data/all_data.csv',index=False)

In [None]:
# encode the labels and save the encoded data
traudf = pd.DataFrame(trau,columns=['path','label'])
trdf =  pd.DataFrame(tr,columns=['path','label'])
traudf.replace('surgery',1,inplace=True)
traudf.replace('surgery_free',0,inplace=True)
trdf.replace('surgery',1,inplace=True)
trdf.replace('surgery_free',0,inplace=True)
traudf.to_csv('../output/temp_data/all.csv')