In [5]:
import os
import shutil
from pathlib import Path
import random

# Configuration
SOURCE_DIR = "../data/Rice_Leaf_AUG"  # Your current data folder
OUTPUT_DIR = "../data/rice_split"
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

print("Configuration loaded ✓")
print(f"Source: {SOURCE_DIR}")
print(f"Output: {OUTPUT_DIR}")
print(f"Split: {TRAIN_RATIO*100}% train, {VAL_RATIO*100}% val, {TEST_RATIO*100}% test")

Configuration loaded ✓
Source: ../data/Rice_Leaf_AUG
Output: ../data/rice_split
Split: 70.0% train, 15.0% val, 15.0% test


In [6]:
for split in ['train', 'val', 'test']:
    for class_name in os.listdir(SOURCE_DIR):
        class_path = os.path.join(SOURCE_DIR, class_name)
        if os.path.isdir(class_path):
            output_path = os.path.join(OUTPUT_DIR, split, class_name)
            Path(output_path).mkdir(parents=True, exist_ok=True)

print("✓ Directory structure created")
print(f"✓ Created: {OUTPUT_DIR}/train, val, test")


✓ Directory structure created
✓ Created: ../data/rice_split/train, val, test


In [7]:
random.seed(42)  # For reproducibility

split_summary = []

for class_name in os.listdir(SOURCE_DIR):
    class_path = os.path.join(SOURCE_DIR, class_name)
    if not os.path.isdir(class_path):
        continue
    
    # Get all images
    images = [f for f in os.listdir(class_path) 
              if f.endswith(('.jpg', '.png', '.jpeg'))]
    random.shuffle(images)
    
    # Calculate split points
    n = len(images)
    train_end = int(n * TRAIN_RATIO)
    val_end = train_end + int(n * VAL_RATIO)
    
    # Split images
    train_imgs = images[:train_end]
    val_imgs = images[train_end:val_end]
    test_imgs = images[val_end:]
    
    # Copy files to respective folders
    print(f"\nProcessing {class_name}...")
    
    for img in train_imgs:
        shutil.copy2(
            os.path.join(class_path, img),
            os.path.join(OUTPUT_DIR, 'train', class_name, img)
        )
    
    for img in val_imgs:
        shutil.copy2(
            os.path.join(class_path, img),
            os.path.join(OUTPUT_DIR, 'val', class_name, img)
        )
    
    for img in test_imgs:
        shutil.copy2(
            os.path.join(class_path, img),
            os.path.join(OUTPUT_DIR, 'test', class_name, img)
        )
    
    split_summary.append({
        'class': class_name,
        'train': len(train_imgs),
        'val': len(val_imgs),
        'test': len(test_imgs),
        'total': n
    })
    
    print(f"  ✓ {len(train_imgs)} train, {len(val_imgs)} val, {len(test_imgs)} test")



Processing Sheath Blight...
  ✓ 442 train, 94 val, 96 test

Processing Leaf Blast...
  ✓ 443 train, 95 val, 96 test

Processing Bacterial Leaf Blight...
  ✓ 445 train, 95 val, 96 test

Processing Healthy Rice Leaf...
  ✓ 457 train, 97 val, 99 test

Processing Brown Spot...
  ✓ 452 train, 96 val, 98 test

Processing Leaf scald...
  ✓ 439 train, 94 val, 95 test


In [8]:
print("\n" + "="*60)
print("DATA SPLIT SUMMARY")
print("="*60)
print(f"{'Class':<25} {'Train':<10} {'Val':<10} {'Test':<10} {'Total':<10}")
print("-"*60)

total_train = 0
total_val = 0
total_test = 0

for item in split_summary:
    print(f"{item['class']:<25} {item['train']:<10} {item['val']:<10} {item['test']:<10} {item['total']:<10}")
    total_train += item['train']
    total_val += item['val']
    total_test += item['test']

print("-"*60)
print(f"{'TOTAL':<25} {total_train:<10} {total_val:<10} {total_test:<10} {total_train+total_val+total_test:<10}")

print("\n✅ Data split complete!")
print(f"✅ Files saved to: {OUTPUT_DIR}")


DATA SPLIT SUMMARY
Class                     Train      Val        Test       Total     
------------------------------------------------------------
Sheath Blight             442        94         96         632       
Leaf Blast                443        95         96         634       
Bacterial Leaf Blight     445        95         96         636       
Healthy Rice Leaf         457        97         99         653       
Brown Spot                452        96         98         646       
Leaf scald                439        94         95         628       
------------------------------------------------------------
TOTAL                     2678       571        580        3829      

✅ Data split complete!
✅ Files saved to: ../data/rice_split
