In [6]:
import pprint
import pickle as pkl
import os
import natsort
import numpy as np
import cv2
import module.data_processing as dpc
import module.image_preprocessing as ipc

TRAIN = 0
VALID = 1
data_type = ["train", "valid"]
dt = data_type[TRAIN]

project_path = "E:/Tukorea/Capstone/"
original_data_path = os.path.join(project_path, "dataset/", "original/", f"{dt}/")
RoI_data_path = os.path.join(project_path, "dataset/", "RoI/", f"{dt}/")
background_RoI_data_path = os.path.join(project_path, "dataset/", "background_RoI/", f"{dt}/")
box_data_path = os.path.join(project_path, "dataset/", "box/256x256/", f"{dt}/")
crop_data_path = os.path.join(project_path, "dataset/", "crop/", f"{dt}/")
RZ_data_path = os.path.join(project_path, "dataset/", "ratio_zero-padding/", f"{dt}/")
RZ_LANCZOS4_data_path = os.path.join(project_path, "dataset/", "RZ_LANCZOS4/", f"{dt}/")

data_path_namelist = ["original", "RoI", "background_RoI", "box", "crop", "ratio_zero-padding", "RZ_LANCZOS4"]
data_path_list = [original_data_path, RoI_data_path, background_RoI_data_path, box_data_path, crop_data_path, RZ_data_path, RZ_LANCZOS4_data_path]

print("DATA TYPE:", dt, end="\n\n")
for i, data_path in enumerate(data_path_list):
    print(data_path_namelist[i] + " data folder")
    folder = natsort.natsorted(os.listdir(data_path))
    print(folder, end="\n\n")

DATA TYPE: train

original data folder
['A1', 'A2', 'A3', 'A4', 'A5', 'A6']

RoI data folder
['A1', 'A2', 'A3', 'A4', 'A5', 'A6']

background_RoI data folder
['A1', 'A2', 'A3', 'A4', 'A5', 'A6']

box data folder
['A1', 'A2', 'A3', 'A4', 'A5', 'A6']

crop data folder
['A1', 'A2', 'A3', 'A4', 'A5', 'A6']

ratio_zero-padding data folder
['A1', 'A2', 'A3', 'A4', 'A5', 'A6']

RZ_LANCZOS4 data folder
['A1', 'A2', 'A3', 'A4', 'A5', 'A6']



In [3]:
dt = data_type[VALID]
original_data_path = os.path.join(project_path, "dataset/", "original/", f"{dt}/")
RoI_data_path = os.path.join(project_path, "dataset/", "RoI/", f"{dt}/")
box_data_path = os.path.join(project_path, "dataset/", "box/224x224/", f"{dt}/")
crop_data_path = os.path.join(project_path, "dataset/", "crop/", f"{dt}/")
RZ_data_path = os.path.join(project_path, "dataset/", "ratio_zero-padding/", f"{dt}/")
RZ_LANCZOS4_data_path = os.path.join(project_path, "dataset/", "RZ_LANCZOS4/", f"{dt}/")

data_path_namelist = ["original", "RoI", "box", "crop", "ratio_zero-padding", "RZ_LANCZOS4"]
data_path_list = [original_data_path, RoI_data_path, box_data_path, crop_data_path, RZ_data_path, RZ_LANCZOS4_data_path]

# Scenario 1, 2

In [21]:
src_folder = os.path.join(project_path, "dataset/", "RoI/", f"392-/", dt) + "/"
dst_folder = os.path.join(project_path, "dataset/", "RZ_LANCZOS4/", "392-/", f"{dt}/")
ipc.ratio_resize_and_zeroPadding(224, src_folder, dst_folder, intpol=cv2.INTER_LANCZOS4)

In [37]:
fname = "392-/"
dst_folder = os.path.join(project_path, "dataset/", "RZ_LANCZOS4/", fname, f"{dt}/")

labels = ["A1", "A2", "A3", "A4", "A5", "A6"]
label_dict = {"A1": 0, "A2": 1, "A3": 2, "A4": 3, "A5": 4, "A6": 5}

x = list()
y = list()
for label in labels:
    label_path = f"{dst_folder}{label}/"
    image_names = os.listdir(label_path)

    for image_name in image_names:
        img = cv2.imread(label_path + image_name)
        x.append(img)
        y.append(label_dict[label])

x = np.array(x)
PIL_x = ipc.convert_numpy_to_PIL(x)
y = np.array(y)

In [38]:
save_path = "/".join(dst_folder.split("/")[:-2]) + "/"
with open(f"{save_path}{fname[:-1]}_{dt}_data.pkl", "wb") as pkl_file:
    pkl.dump((PIL_x, y), pkl_file)

In [None]:
del x, PIL_x

# Scenario 3

In [9]:
import pandas as pd
import shutil

In [10]:
data_label = ["A1", "A2", "A3", "A4", "A5", "A6"]
csv_data_list = list()

for label in data_label:
    csv_path = f"{project_path}{label}.csv"
    csv_data = pd.read_csv(csv_path, encoding="CP949")
    
    # preprocessing
    exist_index = csv_data.loc[csv_data["exist"] == "Y"].index
    csv_data = csv_data.loc[exist_index].copy()
    csv_data.drop(["Unnamed: 0", "exist", "diagnosis", "src_path", "label_path", "type", "fileformat", "copyrighter"], axis=1, inplace=True)
    
    csv_data_list.append(csv_data)

In [13]:
import cv2

low = 10
high = 130
dst_folder = os.path.join(project_path, "dataset/", "RoI/", f"{low}-{high}/", dt) + "/"

for label in data_label:
    dpc.mkadir(dst_folder + label + "/")

for i, csv_data in enumerate(csv_data_list):
    filtered_csv = dpc.get_RoI_range_csv(low, high, csv_data)
    filtered_img_filenames = list(filtered_csv["Raw data ID"])

    src_folder = RoI_data_path + "/" + data_label[i] + "/"
    for filename in filtered_img_filenames:
        filename = "_".join(filename.split("_")[:3]) + "_" + filename.split("_")[-1]
    
        try:    shutil.copy(src_folder + filename, dst_folder + data_label[i] + "/" + filename)
        except:
            for j in range(5):
                new_filename = filename.split(".")[0] + f"T{j}." + filename.split(".")[1]
                temp_img = cv2.imread(src_folder + new_filename)
                
                if isinstance(temp_img, type(None)):
                    continue
                
                width, height, channel = temp_img.shape
                if (width <= 10 or height <= 10):
                    continue

                try:    shutil.copy(src_folder + new_filename, dst_folder + data_label[i] + "/" + new_filename)
                except: pass

# Scenario 5

In [9]:
import pickle as pkl

dt = "valid"
background_RoI_data_path = os.path.join(project_path, "dataset/", "background_RoI/")

In [8]:
background_RoI_data_path

'E:/Tukorea/Capstone/dataset/background_RoI/'

In [20]:
# Load Validation Data
labels = ["A1", "A2", "A3", "A4", "A5"]
label_dict = {"A1": 0, "A2": 1, "A3": 2, "A4": 3, "A5": 4}

x = list()
y = list()
filename = "valid_80p"
for label in labels:
    label_path = f"{background_RoI_data_path}{filename}/{label}/"
    image_names = os.listdir(label_path)

    for image_name in image_names:
        img = cv2.imread(label_path + image_name)
        x.append(img)
        y.append(label_dict[label])

x = np.array(x)
PIL_x = ipc.convert_numpy_to_PIL(x)
y = np.array(y)

  x = np.array(x)


In [21]:
save_path = background_RoI_data_path
with open(f"{save_path}{filename}.pkl", "wb") as pkl_file:
    pkl.dump((PIL_x, y), pkl_file)