In [86]:
import numpy as np 
# from tqdm import tqdm
# import cv2
import os
import imutils
# import matplotlib.pyplot as plt
import csv

In [78]:

ROOT_PATH = "./data/bt/raw"
PROCESSED_DIR = './data/bt/processed'
CLASS_NAMES = {"pituitary_tumor": 3, "no_tumor": 0,
               "meningioma_tumor": 2, "glioma_tumor": 1}
IMG_SIZE = (260,260)

if not os.path.exists(PROCESSED_DIR):
    os.makedirs(PROCESSED_DIR)

In [79]:
def load_data(dir_path):
    """
    Load resized images as np.arrays to workspace
    """

    print("Loading Dataset...")
    data = []

    for cls in tqdm(os.listdir(f'{dir_path}')):
        i = 1
        for file in os.listdir(f'{dir_path}/{cls}'):
            tmp = {}
            img = cv2.imread(f'{dir_path}/{cls}/{file}')
            tmp["image"] = img
            tmp['label'] = CLASS_NAMES[cls]
            tmp['name'] = str(i) + '.jpg'

            data.append(tmp)
            i += 1

    return data


In [80]:
def crop_imgs(set_name, add_pixels_value=0):
    """
    Finds the extreme points on the image and crops the rectangular out of them
    """
    print("Cropping...")

    set_new = []
    for sample in tqdm(set_name):
        img = sample['image']
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        gray = cv2.GaussianBlur(gray, (5, 5), 0)

        # threshold the image, then perform a series of erosions +
        # dilations to remove any small regions of noise
        thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
        thresh = cv2.erode(thresh, None, iterations=2)
        thresh = cv2.dilate(thresh, None, iterations=2)

        # find contours in thresholded image, then grab the largest one
        cnts = cv2.findContours(
            thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        c = max(cnts, key=cv2.contourArea)

        # find the extreme points
        extLeft = tuple(c[c[:, :, 0].argmin()][0])
        extRight = tuple(c[c[:, :, 0].argmax()][0])
        extTop = tuple(c[c[:, :, 1].argmin()][0])
        extBot = tuple(c[c[:, :, 1].argmax()][0])

        ADD_PIXELS = add_pixels_value
        new_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS,
                      extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()

        set_new.append(
            {'image': new_img,
                'name': sample['name'], 'label': sample['label']})

    return np.array(set_new)


In [81]:
def save_new_images(dataset, dir_path):
    print("Saving...")

    for item in tqdm(dataset):
        file_path = f'{dir_path}/{item["label"]}'
        if not os.path.exists(file_path):
            os.makedirs(file_path)

        cv2.imwrite(file_path + '/' + item["name"], item['image'])

In [83]:
# Training

dataset = load_data(f'{ROOT_PATH}/Training')
cropped_images = crop_imgs(dataset)
dir_path = f'{PROCESSED_DIR}/Training'
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
save_new_images(cropped_images, dir_path)

print("\n\n")
# Testing

dataset = load_data(f'{ROOT_PATH}/Testing')
cropped_images = crop_imgs(dataset)
dir_path = f'{PROCESSED_DIR}/Test'
save_new_images(cropped_images, dir_path)

print("Done")


Loading Dataset...


100%|██████████| 4/4 [00:06<00:00,  1.56s/it]


Cropping...


100%|██████████| 394/394 [00:00<00:00, 599.61it/s] 


Saving...


100%|██████████| 394/394 [00:01<00:00, 246.42it/s]

Done





In [92]:
def make_dataset_csv(dir_path, csv_file):
    with open(csv_file, 'w', newline='') as file:
        writer = csv.writer(file)
        for cls in tqdm(os.listdir(dir_path)):
            for file in os.listdir(f'{dir_path}/{cls}'):
                writer.writerow([file, cls])


In [93]:
training_dir_path = os.path.join(PROCESSED_DIR, 'Training')
csv_file_path = f"{PROCESSED_DIR}/train_dataset.csv"
make_dataset_csv(training_dir_path, csv_file_path)

test_dir_path = os.path.join(PROCESSED_DIR, 'Testing')
csv_file_path = f"{PROCESSED_DIR}/test_dataset.csv"
make_dataset_csv(test_dir_path, csv_file_path)

100%|██████████| 4/4 [00:00<00:00, 477.48it/s]
100%|██████████| 4/4 [00:00<00:00, 2872.81it/s]
