# Importing Libraries

In [39]:
import tensorflow as tf
import os
import shutil

# Creating Custom Dataset

In [104]:
def create_dataset(image_dir):
    # getting the folder name 'train', 'valid' or 'test'
    folder_name = image_dir.split('/')[-1]
    
    # can not use the 'DATASETS' path because kaggle kept it reading only
    # so, used the 'OUTPUT' directory
    output_path = '/kaggle/working/'
    path = os.path.join(output_path, 'custom_ds', folder_name)
    
    # avoiding duplicate path, so if the function called many times it will create the dataset for one time only 
    if not os.path.exists(path):
        print(path)
        os.makedirs(path)
    
    # created the 'class' folders
    class_paths = {
        'non_tumor_path': os.path.join(path, 'non-tumor'),
        'tumor_path': os.path.join(path, 'tumor'),
        'other_path': os.path.join(path, 'other')
    }
    
    for path_name in class_paths:
        if not os.path.exists(class_paths[path_name]):
            os.mkdir(class_paths[path_name])
    
    # looping through all the images in the 'image_dir' 'images' folder
    for filename in os.listdir(os.path.join(image_dir, "images")):
        if filename.endswith('.jpg'):
            img_path = os.path.join(image_dir, "images", filename)
            # 'labels' files are named similar to 'images' file name 
            # so just replaced the extension from '.jpg' to '.txt'
            label_path = os.path.join(image_dir, "labels", filename.replace('.jpg', '.txt'))
            label = None
            
            # 'labels' files first line's first character is the 'class' of that image 
            with open(label_path, 'r') as label_file:
                line = label_file.readline().strip()
                # checking some files might be empty
                if not line:
                    continue
                label = int(line.split()[0])
            
            # checking the label and according to the label classifying the image and
            # copying the image to the appropriate 'class' folder
            if label == 0:
                temp_path = os.path.join(path, 'non-tumor', filename)
                if os.path.exists(temp_path):
                    continue
                shutil.copy2(img_path, temp_path)
            
            elif label == 1:
                temp_path = os.path.join(path, 'tumor', filename)
                if os.path.exists(temp_path):
                    continue
                shutil.copy2(img_path, temp_path)
        
            else:
                temp_path = os.path.join(path, 'other', filename)
                if os.path.exists(temp_path):
                    continue
                shutil.copy2(img_path, temp_path)

In [105]:
train_data_dir = "/kaggle/input/medical-image-dataset-brain-tumor-detection/Brain Tumor Detection/train"
test_data_dir = '/kaggle/input/medical-image-dataset-brain-tumor-detection/Brain Tumor Detection/test'
valid_data_dir = '/kaggle/input/medical-image-dataset-brain-tumor-detection/Brain Tumor Detection/valid'

In [106]:
create_dataset(train_data_dir)
create_dataset(valid_data_dir)
create_dataset(test_data_dir)

In [107]:
custom_train_data_dir = '/kaggle/working/custom_ds/train'
custom_valid_data_dir = '/kaggle/working/custom_ds/valid'
custom_test_data_dir = '/kaggle/working/custom_ds/test'

In [108]:
batch_size = 32
seed = 123
image_size = (224, 224)

In [109]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  custom_train_data_dir,
  seed=seed,
  image_size=image_size,
  batch_size=batch_size)

Found 6851 files belonging to 3 classes.


In [110]:
valid_ds = tf.keras.utils.image_dataset_from_directory(
  custom_valid_data_dir,
  seed=seed,
  image_size=image_size,
  batch_size=batch_size)

Found 1963 files belonging to 3 classes.


In [111]:
test_ds = tf.keras.utils.image_dataset_from_directory(
  custom_test_data_dir,
  seed=seed,
  image_size=image_size,
  batch_size=batch_size)

Found 973 files belonging to 3 classes.


In [113]:
print(train_ds.class_names)
print(valid_ds.class_names)
print(test_ds.class_names)

['non-tumor', 'other', 'tumor']
['non-tumor', 'other', 'tumor']
['non-tumor', 'other', 'tumor']
