In [None]:
from roboflow import Roboflow
from PIL import Image, ImageDraw
import os 
import json 
import cv2 
import numpy as np
import pandas as pd

In [None]:
# object detection api documentation: https://docs.roboflow.com/deploy/hosted-api/custom-models/object-detection
api_key = "132cxQxyrOVmPD63wJrV" # api keys are individual, change to your own
model_endpoint = "" # model endpoint (after deployment)
version = 0 # version number

rf = Roboflow(api_key=api_key)
project = rf.workspace().project("MODEL_ENDPOINT")
model = project.version(version).model

In [None]:
image_folder_path = "" # Your Image Folder Path Here
seal_images = [os.path.join(image_folder_path, file) for file in os.listdir(image_folder_path)]

Image.open(seal_images[0]) # for verification

In [None]:
# adjust for model tendencies 
seal_conf = 20
seal_overlap = 30
clump_conf = 20
clump_overlap = 30

# extracting clumps 
clumps = []
clump_size = []
num_seals = [] # number of individual seals 

for img in seal_images:
    image = Image.open(img)

    seals = json.load(model.predict(img, confidence=seal_conf, overlap=seal_overlap, classes='seals').json())['predictions']
    num_seals.append(len(seals))

    clumps = json.loads(model.predict(img, confidence=clump_conf, overlap=clump_overlap, classes='clump').json())['predictions']

    # getting individual seals
    seal_pos = [] 
    for seal in seals:
        x1_seal = seal['x'] - seal['width'] / 2
        x2_seal = seal['x'] + seal['width'] / 2
        y1_seal = seal['y'] - seal['height'] / 2
        y2_seal = seal['y'] + seal['height'] / 2

        seal_pos.append((x1_seal, y1_seal, x2_seal, y2_seal))
    
    # getting clumps 
    for clump in clumps:
        x1_clump = clump['x'] - clump['width'] / 2
        x2_clump = clump['x'] + clump['width'] / 2
        y1_clump = clump['y'] - clump['height'] / 2
        y2_clump = clump['y'] + clump['height'] / 2

        top_left_clump = (x1_clump, y1_clump)
        bottom_right_clump = (x2_clump, y2_clump)

        for pos in seal_pos:
            if (max(x1_clump, pos[0]) <= min(x2_clump, pos[2])) & (max(y1_clump, pos[1]) <= min(y2_clump, pos[3])):
                seal_pos.remove(pos) 
        num_seals.append(len(seal_pos))

        subimage = image.crop((*top_left_clump, *bottom_right_clump))

        clumps.append(subimage)
        clump_size.append(clump['class'])

In [None]:
# pad and process clumps to df (ONLY FOR IMAGE MODEL APPROACH)
def get_largest_dimensions(image_paths):
    max_height, max_width = 0, 0
    for path in image_paths:
        img = cv2.imread(path)
        if img is None:
            continue
        h, w, _ = img.shape
        if h > max_height:
            max_height = h
        if w > max_width:
            max_width = w
    return max_height, max_width

def preprocess_image(image_path, max_height, max_width):
    img = cv2.imread(image_path)

    original_height, original_width = img.shape[:2]
    padded_img = np.full((max_height, max_width, 3), (255, 0, 127), dtype=np.uint8)

    y_offset = (max_height - original_height) // 2
    x_offset = (max_width - original_width) // 2

    padded_img[y_offset:y_offset + original_height, x_offset:x_offset + original_width] = img

    padded_img = padded_img.astype("float32") / 255.0

    return padded_img


def process_images_to_dataframe(image_paths, label):
    data = []
    max_height, max_width = get_largest_dimensions(image_paths)

    for img_path in image_paths:
        padded_img = preprocess_image(img_path, max_height, max_width)
        if padded_img is not None:
            img_flattened = padded_img.flatten()
            data.append(np.append(img_flattened, label))

    return pd.DataFrame(data)


df = process_images_to_dataframe(clumps, clump_size)


In [None]:
df.to_csv('data/clumps_data.csv')