In [116]:
import numpy as np
import pandas as pd
import os, sys
import shutil
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
import cv2
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image, ImageOps
from sklearn.utils import class_weight, shuffle
import tensorflow as tf
from sklearn.metrics import f1_score, fbeta_score
from sklearn.model_selection import train_test_split
import warnings
import shutil
warnings.filterwarnings("ignore")

WORKERS = 2
CHANNEL = 3
IMG_SIZE = 224
NUM_CLASSES = 5
SEED = 77
TRAIN_NUM = 1000 # use 1000 when you just want to explore new idea, use -1 for full train
root_path = './data/pre'
path_from = './data/pre/raw/'
test_path_to = './data/pre/test_images/'
train_path_to = './data/pre/train_images/'
TRAIN_DF = pd.read_csv('./data/train.csv')

'''0': [1443, 362],
 '1': [295, 75],
 '2': [799, 200],
 '3': [154, 39],
 '4': [235, 60]}'''

"0': [1443, 362],\n '1': [295, 75],\n '2': [799, 200],\n '3': [154, 39],\n '4': [235, 60]}"

In [127]:
def split_train_test(df,t = 0.1):
    for i in range(100):
        df = df.sample(frac = 1).reset_index(drop=True)
    lengths = dict()
    for i in range(5):
        subdf = df[df['diagnosis'] == i].copy().reset_index(drop = True)
        trainsz = subdf.shape[0]
        testsz = 1 + int(t * trainsz)
        trainsz -= testsz
        trainv = [os.path.join(path_from,x + '.png') for x in subdf.loc[:trainsz-1 ,'id_code']]
        testv = [os.path.join(path_from,x + '.png') for x in subdf.loc[trainsz:,'id_code']]
        target_train = os.path.join(train_path_to,str(i))
        target_test = os.path.join(test_path_to,str(i))
        if not os.path.exists(target_train):
            os.makedirs(target_train)
        if not os.path.exists(target_test):
            os.makedirs(target_test)
        for file_name in trainv:
            shutil.copy(file_name,target_train)
        for file_name in testv:
            shutil.copy(file_name,target_test)
        lengths[str(i)] = [len(trainv), len(testv)]
    return lengths
            
def spoil_image(img):
    sigma = np.random.choice(20,1)[0]
    noise = np.random.normal(0,sigma,img.shape)
    img=cv2.addWeighted(img.astype(float),1, noise ,1 ,0)
    lx = img < 0
    img[lx] = 0
    lx = img > 255
    img[lx] = 255
    return img

def augment(maxCnt):
    gen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range = 10, preprocessing_function = spoil_image)
    for i in range(5):
        targetdir = os.path.join(train_path_to,str(i))
        augdir = os.path.join(targetdir,'augment')
        realimgdir = os.path.join(targetdir,str(i))
        cnt = len([f for f in os.listdir(realimgdir) if os.path.isfile(os.path.join(realimgdir,f))])
        if cnt < maxCnt:
            batch_sz = cnt
            epochs =1 + ((maxCnt - cnt) // batch_sz)
            for j in range(epochs):
                tr_gen = gen.flow_from_directory(target_size = (IMG_SIZE, IMG_SIZE), 
                                                 directory = targetdir,
                                                 save_to_dir = targetdir,
                                                 save_prefix = 'pr_' + str(i)+ '_' + str(j),
                                                 batch_size = batch_sz) 
                iter(tr_gen).next()
                
def make_new_train_table(maxCnt):
    dictpd = {'filename' : [], 'class' : [], 'type' : []}
    types = []
    for i in range(5):
        targetdir = os.path.join(train_path_to,str(i))
        subdir = os.path.join(targetdir,str(i))
        filenames = ['./{}/{}/{}'.format(i,i,f) for f in os.listdir(subdir) if os.path.isfile(os.path.join(subdir,f))]
        types = [0] * len(filenames)
        augFilenames = ['./{}/{}'.format(i,f) for f in os.listdir(targetdir) if os.path.isfile(os.path.join(targetdir,f))]
        augFilenames.sort()
        extendLen = maxCnt - len(filenames)
        if extendLen > 0:
            filenames += augFilenames[0 : extendLen]
            types += [int(s.split('_')[2]) + 1 for s in augFilenames[0 : extendLen]]
        dictpd['filename'] += filenames
        dictpd['class'] += [i]*maxCnt
        dictpd['type'] += types
    return pd.DataFrame(dictpd)

def make_new_test_table():
    dictpd = {'filename' : [], 'class' : [],'type' : []}
    for i in range(5):
        targetdir = os.path.join(test_path_to,str(i))
        filenames = ['./{}/{}'.format(i,f) for f in os.listdir(targetdir) if os.path.isfile(os.path.join(targetdir,f))]
        cnt = len(filenames)
        dictpd['filename'] += filenames
        dictpd['class'] += [i]*cnt
        dictpd['type'] += [0]*cnt
    return pd.DataFrame(dictpd)

In [128]:
train_df = make_new_train_table(1443)
test_df = make_new_test_table()
train_df.to_csv(os.path.join(root_path,'train.csv'),index = False)
test_df.to_csv(os.path.join(root_path,'test.csv'),index = False)

In [129]:
train_df

Unnamed: 0,filename,class,type
0,./0/0/002c21358ce6.png,0,0
1,./0/0/005b95c28852.png,0,0
2,./0/0/0097f532ac9f.png,0,0
3,./0/0/00f6c1be5a33.png,0,0
4,./0/0/0125fbd2e791.png,0,0
...,...,...,...
7210,./4/pr_4_5_125_3133204.png,4,6
7211,./4/pr_4_5_126_4102704.png,4,6
7212,./4/pr_4_5_127_5576619.png,4,6
7213,./4/pr_4_5_128_2020103.png,4,6


In [None]:
df_train = pd.read_csv('./data/train.csv')
df_test = pd.read_csv('./data/test.csv')

x = df_train['id_code']
y = df_train['diagnosis']

x, y = shuffle(x, y, random_state=SEED)

def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
    #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1,img2,img3],axis=-1)
    #         print(img.shape)
        return img
    
def load_ben_color(path, sigmaX=10):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    #image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)
        
    return image

def circle_crop(img, sigmaX=10):   
    """
    Create circular crop around image centre    
    """    
    
    img = cv2.imread(img)
    img = crop_image_from_gray(img)    
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    height, width, depth = img.shape    
    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = crop_image_from_gray(img)
    #img = cv2.resize(img,(224,224))
    img=cv2.addWeighted ( img,4, cv2.GaussianBlur( img , (0,0) , sigmaX) ,-4 ,128)
    return img 

In [None]:
if os.path.exists(path_to) == False:
    os.mkdir(path_to)

cnt = 0
for idd in x.values:
    if cnt > 0:
        break
    cnt += 1
    path = path_from + idd + '.png'
    for ii in [1,2,3,4,5,6,8,9,10]:
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = crop_image_from_gray(image)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image=cv2.addWeighted ( image,ii, cv2.GaussianBlur( image , (0,0) , 30) ,-ii ,128)
        print("min = {}; max = {}.".format(np.min(image),np.max(image)))
        path1 = './data/' + idd + '_{}'.format(ii) + '.png'
        cv2.imwrite(path1,image)

In [None]:
image

In [None]:
if os.path.exists(path_to) == False:
    os.mkdir(path_to)

for sx in range(110,510,10):
    path_to_to = path_to + '{}/'.format(sx)
    if os.path.exists(path_to_to):
        shutil.rmtree(path_to_to)
    os.mkdir(path_to_to)
    cnt0 = 0
    cnt2 = 0
    for idd, diagnos in zip(x.values,y.values):
        flag = False
        if diagnos == 0 and cnt0 < 10:
            cnt0 +=1
            flag = True    
        if diagnos == 2 and cnt2 < 10:
            cnt2 +=1
            flag = True
        if flag == False:
            continue
        path = path_from + idd + '.png'
        #print(path)
        #img = circle_crop(path,sigmaX=30)
        img = load_ben_color(path, sx)
        path = path_to_to + idd + '_{}'.format(diagnos) + '.png'
        print(path)
        cv2.imwrite(path,img)
   

In [None]:
x[1000]

In [None]:
train_x, valid_x, train_y, valid_y = train_test_split(x, y, test_size=0.15,
                                                      stratify=y, random_state=SEED)
print(train_x.shape, train_y.shape, valid_x.shape, valid_y.shape)
train_y.hist()
valid_y.hist()

In [None]:
fig = plt.figure(figsize=(25, 16))
# display 10 images from each class
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train['diagnosis'] == class_id].sample(5, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, 5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path=f"./train_images/{row['id_code']}.png"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))

        plt.imshow(image)
        ax.set_title('Label: %d-%d-%s' % (class_id, idx, row['id_code']) )

In [None]:
%%time
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train['diagnosis'] == class_id].sample(5, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, 5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path=f"./train_images/{row['id_code']}.png"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         image=cv2.addWeighted ( image, 0 , cv2.GaussianBlur( image , (0 ,0 ) , 10) ,-4 ,128)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))

        plt.imshow(image, cmap='gray')
        ax.set_title('Label: %d-%d-%s' % (class_id, idx, row['id_code']) )

In [None]:
%%time
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train['diagnosis'] == class_id].sample(5, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, 5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path=f"./train_images/{row['id_code']}.png"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , IMG_SIZE/10) ,-4 ,128) # the trick is to add this line

        plt.imshow(image, cmap='gray')
        ax.set_title('Label: %d-%d-%s' % (class_id, idx, row['id_code']) )

In [None]:
def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
    #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1,img2,img3],axis=-1)
    #         print(img.shape)
        return img
    
def load_ben_color(path, sigmaX=10):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)
        
    return image

In [None]:
NUM_SAMP=7
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train['diagnosis'] == class_id].sample(NUM_SAMP, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, class_id * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path=f"./train_images/{row['id_code']}.png"
        image = load_ben_color(path,sigmaX=30)

        plt.imshow(image)
        ax.set_title('%d-%d-%s' % (class_id, idx, row['id_code']) )

In [None]:
def circle_crop(img, sigmaX=10):   
    """
    Create circular crop around image centre    
    """    
    
    img = cv2.imread(img)
    img = crop_image_from_gray(img)    
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    height, width, depth = img.shape    
    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = crop_image_from_gray(img)
    img=cv2.addWeighted ( img,4, cv2.GaussianBlur( img , (0,0) , sigmaX) ,-4 ,128)
    return img 

In [None]:
%%time
## try circle crop
NUM_SAMP=7
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train['diagnosis'] == class_id].sample(NUM_SAMP, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, class_id * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path=f"./train_images/{row['id_code']}.png"
        image = circle_crop(path,sigmaX=30)

        plt.imshow(image)
        ax.set_title('%d-%d-%s' % (class_id, idx, row['id_code']) )

In [None]:
%%time
NUM_SAMP=10
fig = plt.figure(figsize=(25, 16))
for jj in range(5):
    for i, (idx, row) in enumerate(df_test.sample(NUM_SAMP,random_state=SEED+jj).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, jj * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path=f"./data/raw/test_images/{row['id_code']}.png"
        image = load_ben_color(path,sigmaX=30)
        
        plt.imshow(image)
        ax.set_title('%d-%s' % (idx, row['id_code']) )

In [None]:
image = load_ben_color(path,sigmaX=30)
plt.imshow(image)

In [None]:
cv2.imwrite('./xxx.png',image)