In [None]:
import os
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split

# Paths
data_dir = r"D:\A\data\train_images"
small_data_dir = data_dir 
output_dir = os.path.join(data_dir, "dataset")
train_dir = os.path.join(output_dir, "train")
test_dir = os.path.join(output_dir, "test")
# Corrected CSV file path
train_csv = r"D:\A\data\train.csv"  

# Create directories for train and test if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Read CSV file
df = pd.read_csv(train_csv)

# Only consider images in small_data_dir
small_data_images = os.listdir(small_data_dir)
id_codes = [os.path.splitext(img)[0] for img in small_data_images]
filtered_df = df[df['id_code'].isin(id_codes)]

# Split the data into 90% train and 10% test
train_df, test_df = train_test_split(filtered_df, test_size=0.1, random_state=47)

# Mapping for labels
label_mapping = {
    0: "0-No DR",
    1: "1-Mild",
    2: "2-Moderate",
    3: "3-Severe",
    4: "4-Proliferative DR"
}

# Function to move images to corresponding label directories
def move_images(dataframe, source_dir, dest_dir):
    for _, row in dataframe.iterrows():
        img_id, label = row['id_code'], row['diagnosis']
        label_name = label_mapping[label]
        label_dir = os.path.join(dest_dir, label_name)
        os.makedirs(label_dir, exist_ok=True)
        source_path = os.path.join(source_dir, f"{img_id}.png")
        dest_path = os.path.join(label_dir, f"{img_id}.png")
        if os.path.exists(source_path):
            shutil.copy(source_path, dest_path)

# Move train images
move_images(train_df, small_data_dir, train_dir)

# Move test images
move_images(test_df, small_data_dir, test_dir)

print("Dataset split completed successfully.")


Dataset split completed successfully.
