In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
from sklearn.model_selection import train_test_split


In [8]:
BASE_DIR="brain_tumor_dataset"
CLASSES=['yes' , 'no']
SPLIT_DIR="brain_tumor_split"
TRAIN_RATIO=0.70


In [12]:
def create_split_structure(base_split_dir , classes):
    for split in ['train' ,'val', 'test' ]:
        for class_name in classes:
            path=os.path.join(base_split_dir , split , class_name)
            os.makedirs(path, exist_ok=True)
    

In [13]:
create_split_structure(SPLIT_DIR , CLASSES)

In [14]:
# --- 3. Perform the Split and Copy Files ---
for class_name in CLASSES:
    class_path = os.path.join(BASE_DIR, class_name)
    # os.listdir() gets all file names in the folder (e.g., ['tumour1.png', 'tumour2.png', ...])
    all_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if f.endswith('.jpg') or f.endswith('.png')]

    # --- The train_test_split Magic ---
    # This splits the list 'all_files' into two lists: train_files and holdout_files
    # It does the random shuffling and selection for us.
    train_files, holdout_files = train_test_split(
        all_files, 
        train_size=TRAIN_RATIO, # 70% goes to the train set
        random_state=42, 
        shuffle=True
    )
    
    # We do the split one more time on the 'holdout_files' to get Val and Test
    val_files, test_files = train_test_split(
        holdout_files, 
        # ... (ratio calculation) ...
        random_state=42, 
        shuffle=True
    )
    # -----------------------------------

    # --- The shutil (Copying) Part ---
    def copy_files(file_list, target_split):
        target_dir = os.path.join(SPLIT_DIR, target_split, class_name)
        for file_path in file_list:
            # shutil.copy() takes the image from the source path (file_path) 
            # and copies it into the destination folder (target_dir)
            shutil.copy(file_path, target_dir) 

    copy_files(train_files, 'train')
    copy_files(val_files, 'val')
    copy_files(test_files, 'test') 