This code extracts 10 small image patches randomly from each dataset and saves them locally to make it easier for visualise the overlay of these images together with the PCA from the embeddings (because albumentations and lightly do not work well together because of the pydantic version).

The image patches are extracted where there are rumex plants, but for simplicity the bounding boxes are not overlayed.

In [None]:
import pandas as pd
import os
import cv2
import json
import random
from glob import glob
from tqdm import tqdm
import albumentations as A

darwin_path = (
    "/mnt/Data-Work-RE/26_Agricultural_Engineering-RE/263_DP/00_Darwin/digital-production"
)
df = pd.read_csv('../assets/lightly_totalimages_selectedimages_annotated_with_fields_label.csv')
im_dir = os.path.join(darwin_path, 'lightly/images')
ann_dir = os.path.join(darwin_path, 'lightly/releases/1/annotations')

df.head()

In [None]:
def save_sample_crops_per_dataset(df, im_dir, ann_dir, crop_height=1000, crop_width=1000, output_dir='./figures/patches'):
    os.makedirs(output_dir, exist_ok=True)

    # Albumentations crop transform
    transform = A.Compose([
        A.AtLeastOneBBoxRandomCrop(height=crop_height, width=crop_width, p=1.0)
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing datasets"):
        dataset = row['dataset']

        # Find all image paths for this dataset
        img_paths = glob(os.path.join(im_dir, f"{dataset}*.jpg")) + glob(os.path.join(im_dir, f"{dataset}*.png"))
        if len(img_paths) < 1:
            print(f"No images found for dataset: {dataset}")
            continue

        # Select 10 random images (or fewer if not available)
        selected_imgs = random.sample(img_paths, min(10, len(img_paths)))

        for crop_idx, img_path in enumerate(selected_imgs):
            # Load image
            image = cv2.imread(img_path)
            if image is None:
                print(f"Failed to load image: {img_path}")
                continue
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # Load corresponding annotation
            basename = os.path.splitext(os.path.basename(img_path))[0]
            ann_path = os.path.join(ann_dir, f"{basename}.json")
            if not os.path.exists(ann_path):
                print(f"Annotation not found for image: {img_path}")
                continue

            with open(ann_path, 'r') as f:
                ann = json.load(f)

            # Extract bboxes
            bboxes = []
            labels = []
            for annotation in ann.get('annotations', []):
                if 'bounding_box' not in annotation:
                    continue
                bbox = annotation['bounding_box']
                x_min = bbox['x']
                y_min = bbox['y']
                x_max = x_min + bbox['w']
                y_max = y_min + bbox['h']
                if annotation.get('name', 'object') == 'rumex_plant':
                    bboxes.append([x_min, y_min, x_max, y_max])
                    labels.append(annotation.get('name', 'object'))

            if not bboxes:
                continue

            # Apply crop
            try:
                transformed = transform(image=image, bboxes=bboxes, class_labels=labels)
            except Exception as e:
                print(f"Transform failed for {img_path}: {e}")
                continue

            cropped_img = transformed['image']

            # Save cropped image
            original_name = os.path.splitext(os.path.basename(img_path))[0]
            save_path = os.path.join(output_dir, f"{original_name}-CROP{crop_idx+1}.jpg")
            cropped_img_bgr = cv2.cvtColor(cropped_img, cv2.COLOR_RGB2BGR)
            cv2.imwrite(save_path, cropped_img_bgr)

    print(f"All crops saved in {output_dir}")


In [4]:
import pandas as pd

save_sample_crops_per_dataset(df, im_dir, ann_dir, crop_height=678, crop_width=678, output_dir='./figures/patches')


Processing datasets: 100%|██████████| 50/50 [04:05<00:00,  4.90s/it]

All crops saved in ./figures/patches



