### Crop images

In [None]:
import os
import json
from PIL import Image

# Paths
images_path = "../A-Dataset-and-Benchmark-for-Malaria-Life-Cycle-Classification-in-Thin-Blood-Smear-Images/IML_Malaria"
annotation_path = "../A-Dataset-and-Benchmark-for-Malaria-Life-Cycle-Classification-in-Thin-Blood-Smear-Images/annotations.json"
output_dir = "cell_dataset/"

# Make map for each class
classes = ["ring", "trophozoite", "schizont", "gametocyte", "red blood cell"]
for cls in classes:
    os.makedirs(os.path.join(output_dir, cls), exist_ok=True)

# Load annotations
with open(annotation_path) as f:
    ground_truth = json.load(f)

# Iterate through each image and its annotations
for entry in ground_truth:
    image_name = entry["image_name"]
    image_path = os.path.join(images_path, image_name)
    
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Bilde ikke funnet: {image_path}")
        continue

    for i, obj in enumerate(entry["objects"]):
        label = obj["type"]  
        if label not in classes:
            print(f"Unknown class: {label} — skipping")
            continue

        # Bounding box
        x = int(obj["bbox"]["x"])
        y = int(obj["bbox"]["y"])
        w = int(obj["bbox"]["w"])
        h = int(obj["bbox"]["h"])

        # Crop and save
        cropped = image.crop((x, y, x + w, y + h))
        save_path = os.path.join(output_dir, label, f"{image_name[:-4]}_{i}.png")
        if not os.path.exists(save_path):
            cropped.save(save_path)
        else:
            print(f"File {save_path} already exists, skipping save.")
print(" Done with cropping and saving images.")


Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
Unknown class: difficult — skipping
 Done with cropping and saving images.


### Split data into train and valitadion set

In [None]:
import random
import shutil 

# Paths
input_dir = "cell_dataset"        # Your full dataset with all images in class folders
output_dir = "data_split"         # Where train/val folders will be created
train_ratio = 0.8                 # 80% train, 20% validation split

# For reproducibility
random.seed(42)

# Create train and val directories
train_dir = os.path.join(output_dir, "train")
val_dir = os.path.join(output_dir, "val")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# List all class folders
classes = [d for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d))]

for cls in classes:
    print(f"Splitting class: {cls}")
    cls_input_path = os.path.join(input_dir, cls)
    files = os.listdir(cls_input_path)
    random.shuffle(files)  # Shuffle files before splitting

    train_count = int(len(files) * train_ratio)
    train_files = files[:train_count]
    val_files = files[train_count:]

    # Create class folders inside train and val directories
    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)

    # Copy training files
    for f in train_files:
        shutil.copy2(os.path.join(cls_input_path, f), os.path.join(train_dir, cls, f))

    # Copy validation files
    for f in val_files:
        shutil.copy2(os.path.join(cls_input_path, f), os.path.join(val_dir, cls, f))

print("Dataset split into train and validation sets!")

# After splitting, delete the original dataset folder to save space
shutil.rmtree("cell_dataset")
print("'cell_dataset' folder has been deleted to save space.")

FileNotFoundError: [Errno 2] No such file or directory: 'cell_dataset'