## Moving images to respective folders

In [None]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv("RSNA_breast_cancer_data.csv")

In [None]:
# Check the distribution of the classes before splitting
print(df['cancer'].value_counts())

cancer
0    1218
1    1156
Name: count, dtype: int64


In [None]:
# Split the dataset into train, test, and validation sets while maintaining class balance
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['cancer'], random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['cancer'], random_state=42)

In [None]:
# Check the distribution after splitting
print("Train set class distribution:\n", train_df['cancer'].value_counts())
print("Validation set class distribution:\n", val_df['cancer'].value_counts())
print("Test set class distribution:\n", test_df['cancer'].value_counts())

Train set class distribution:
 cancer
0    779
1    740
Name: count, dtype: int64
Validation set class distribution:
 cancer
0    195
1    185
Name: count, dtype: int64
Test set class distribution:
 cancer
0    244
1    231
Name: count, dtype: int64


In [None]:
# Create directories if they don't exist
base_dir = '../Sample BCD/images'
for split in ["train", "val", "test"]:
    for category in ["cancer", "normal"]:
        os.makedirs(os.path.join(base_dir, split, category), exist_ok=True)

In [None]:
# Function to move images
def move_images(data, split):
    for _, row in data.iterrows():
        image_path = row['image_path']  # Path to the DICOM image
        label = 'cancer' if row['cancer'] == 1 else 'normal'
        
        dest_path = os.path.join(base_dir, split, label, os.path.basename(image_path))
        shutil.copy(image_path, dest_path)  # Copy image to the new location

# Move the images to corresponding folders
move_images(train_df, "train")
move_images(val_df, "val")
move_images(test_df, "test")

print("Images have been moved to respective folders.")

Images have been moved to respective folders.
