In [1]:
import numpy as np
import sys, os, pickle
import cv2
import matplotlib.pyplot as plt
from preprocessing import text_vanish as tv
from preprocessing import crop_edges as ce
from pathlib import Path

In [2]:
#image_dim = int(sys.argv[1])#224, 299, 331, ...

image_dim = 384


dpath_original = 'D:/kvasir_cls/kvasir-dataset-v2'
dpath_preprocess = 'D:/kvasir_cls'
class_list = ['esophagitis', 'dyed-lifted-polyps', 'dyed-resection-margins', 'normal-cecum', 'normal-pylorus', 'normal-z-line', 'polyps', 'ulcerative-colitis']

In [3]:
if not(os.path.isdir(dpath_preprocess)):
    os.mkdir(dpath_preprocess)

In [5]:
for i, c in enumerate(class_list):
    
    print(c)
    
    class_dir = os.path.join(dpath_original, c)
    file_list = os.listdir(class_dir+"/")
    
    out_dir = os.path.join(dpath_preprocess, c)
    if not(os.path.isdir(out_dir)): os.mkdir(out_dir)
        
    for f in file_list:

        impath = os.path.join(class_dir, f)
        processed = tv.Vanisher(impath).do_vanish()
        cropped = ce.Crop(processed).do_crop()
        final = cv2.resize(cropped, (625, 532))

        filename = os.path.basename(impath).strip()[:-4] + '_prsd'
        out_path = os.path.join(out_dir, filename + '.png')
        cv2.imwrite(out_path, final)
            

esophagitis
dyed-lifted-polyps
dyed-resection-margins
normal-cecum
normal-pylorus
normal-z-line
polyps
ulcerative-colitis


In [6]:
X = []
Y = []

# remove polyp class
# to calibrate with segmentation dataset
class_list = ['esophagitis', 'dyed-lifted-polyps', 'dyed-resection-margins',
              'normal-cecum', 'normal-pylorus', 'normal-z-line', 'ulcerative-colitis'] # polyp removed

for i,c in enumerate(class_list):
    class_dir = os.path.join(dpath_preprocess, c)
    file_list = os.listdir(class_dir)
    
    for f in file_list:
        impath = os.path.join(class_dir, f)
        
        x = cv2.imread(impath)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
        x = np.round(cv2.resize(x, (image_dim, image_dim)))
        x.dtype = np.uint8
        X.append(x)
        Y.append([i])
        
        if len(X)%100 ==0:
            print(len(X), 'done')
            
X = np.array(X, dtype=np.uint8)
Y = np.array(Y, dtype=np.uint8)

print(X.shape, Y.shape)
print(X.dtype, Y.dtype)
print(np.max(X), np.min(X))

100 done
200 done
300 done
400 done
500 done
600 done
700 done
800 done
900 done
1000 done
1100 done
1200 done
1300 done
1400 done
1500 done
1600 done
1700 done
1800 done
1900 done
2000 done
2100 done
2200 done
2300 done
2400 done
2500 done
2600 done
2700 done
2800 done
2900 done
3000 done
3100 done
3200 done
3300 done
3400 done
3500 done
3600 done
3700 done
3800 done
3900 done
4000 done
4100 done
4200 done
4300 done
4400 done
4500 done
4600 done
4700 done
4800 done
4900 done
5000 done
5100 done
5200 done
5300 done
5400 done
5500 done
5600 done
5700 done
5800 done
5900 done
6000 done
6100 done
6200 done
6300 done
6400 done
6500 done
6600 done
6700 done
6800 done
6900 done
7000 done
(7000, 384, 384, 3) (7000, 1)
uint8 uint8
255 0


In [9]:
with open(dpath_preprocess + '/kvasir_cls_'+str(image_dim)+"no_polyp"+'.pickle', 'wb') as f:
    pickle.dump([X,Y], f)

In [11]:
X = []
Y = []

# only polyps class images
# class index for polyp = 7
class_dir = os.path.join(dpath_preprocess, "polyps")
file_list = os.listdir(class_dir)
    
for f in file_list:
    impath = os.path.join(class_dir, f)

    x = cv2.imread(impath)
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    x = np.round(cv2.resize(x, (image_dim, image_dim)))
    x.dtype = np.uint8
    X.append(x)
    Y.append([7])

    if len(X)%100 ==0:
        print(len(X), 'done')
            
X = np.array(X, dtype=np.uint8)
Y = np.array(Y, dtype=np.uint8)

print(X.shape, Y.shape)
print(X.dtype, Y.dtype)
print(np.max(X), np.min(X))

100 done
200 done
300 done
400 done
500 done
600 done
700 done
800 done
900 done
1000 done
(1000, 384, 384, 3) (1000, 1)
uint8 uint8
255 0


In [13]:
with open(dpath_preprocess + '/kvasir_cls_'+str(image_dim)+"polyp"+'.pickle', 'wb') as f:
    pickle.dump([X,Y], f)

# data load for segmentation dataset

In [4]:
dpath = 'D:'
data = "kvasir"
image_list = os.listdir(os.path.join(dpath, data, 'images'))
mask_list = os.listdir(os.path.join(dpath, data, 'masks'))


In [5]:
print(len(image_list))
print(len(mask_list))
print(image_list[0])
print(mask_list[0])

1000
1000
cju0qkwl35piu0993l0dewei2.jpg
cju0qkwl35piu0993l0dewei2.jpg


In [8]:
X = []
Y = []


for f in image_list:

    image_path = os.path.join(dpath, data, 'images', f)
    mask_path = os.path.join(dpath, data, 'masks', f)

    x = cv2.imread(image_path)  
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    x = np.round(cv2.resize(x, (image_dim, image_dim)))
    x.dtype=np.uint8


    y = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) # for ETIS dataset
    y = cv2.resize(y, (image_dim, image_dim))

    y = (y > 0.1 * np.max(y)) + 0

    X.append(x)
    Y.append(y)

    if len(X) % 100 == 0:
        print(len(X), 'done')


X = np.array(X, dtype=np.uint8)
Y = np.array(Y, dtype=np.uint8)

print(X.shape, Y.shape)
print(X.dtype, Y.dtype)
print(np.max(X), np.min(X))

with open(dpath+"/"+data+"/"+data+"_"+str(image_dim)+'.pickle', 'wb') as f:
    pickle.dump([X, Y], f)

100 done
200 done
300 done
400 done
500 done
600 done
700 done
800 done
900 done
1000 done
(1000, 384, 384, 3) (1000, 384, 384)
uint8 uint8
255 0
