In [None]:
from dotenv import load_dotenv
from roboflow import Roboflow
from PIL import Image
import requests
from io import BytesIO
import numpy as np
import pandas as pd
import os 

In [None]:
load_dotenv()

api_key = os.getenv("132cxQxyrOVmPD63wJrV") # api keys are individual, change to your own 
rf = Roboflow(api_key)
project = rf.workspace().project("elephant-seals-project-mark-1")
model = project.version("6").model

In [None]:
# image paths 
path_to_beach_imgs = ""
beach_imgs_paths = [os.path.join(path_to_beach_imgs, file) for file in os.listdir(path_to_beach_imgs)]

In [None]:
# extracting clumps and getting rid of overlaps 
clump_imgs_dct = {} # dictionary of clumps. image id will be the key and a list of clumps will be its value. 
num_seals = [] # number of individual seals 

# define confidence levels
seal_conf_lvl = 0
clump_conf_lvl = 0 

def intersects(seal, clump):
    """Check if seal and clump overlap"""
    seal_x1 = seal['x'] - seal['width'] / 2
    seal_x2 = seal['x'] + seal['width'] / 2
    seal_y1 = seal['y'] - seal['height'] / 2
    seal_y2 = seal['y'] + seal['height'] / 2

    clump_x1 = clump['x'] - clump['width'] / 2
    clump_x2 = clump['x'] + clump['width'] / 2
    clump_y1 = clump['y'] - clump['height'] / 2
    clump_y2 = clump['y'] + clump['height'] / 2

    return not (
        seal_x2 <= clump_x1 or
        seal_x1 >= clump_x2 or
        seal_y2 <= clump_y1 or
        seal_y1 >= clump_y2
    )


for path in beach_imgs_paths:

    image = Image.open(path)

    result = model.predict(path, confidence=1, overlap=30)

    preds = result.json().get("predictions", [])

    seals = [pred for pred in preds if pred['class'] == 'seals' and pred['confidence'] > seal_conf_lvl]
    clumps = [pred for pred in preds if pred['class'] == 'clump' and pred['confidence'] > clump_conf_lvl]

    # getting individual seals 
    filtered_seals = [seal for seal in seals if not any(intersects(seal, clump) for clump in clumps)]
    num_seals.append(len(filtered_seals))
    
    # getting clumps 
    clump_imgs_dct[path] = [] 
    for clump in clumps:
        clump_x1 = clump['x'] - clump['width'] / 2
        clump_x2 = clump['x'] + clump['width'] / 2
        clump_y1 = clump['y'] - clump['height'] / 2
        clump_y2 = clump['y'] + clump['height'] / 2

        top_left_clump = (clump_x1, clump_y1)
        bottom_right_clump = (clump_x2, clump_y2)

        subimage = image.crop((*top_left_clump, *bottom_right_clump))
        
        clump_imgs_dct[path].append(subimage) 

In [6]:
# Extracting Length, Width and RGB metrics 

keys = []
widths = []
heights = []
avg_r = []
sd_r = []
avg_g = []
sd_g = []
avg_b = []
sd_b = [] 

for key, clump_lst in clump_imgs_dct.items():

    for clump in clump_lst: 

        keys.append(key)
    
        width, height = clump.size

        widths.append(width)
        heights.append(height)

        img_array = np.array(clump)

        avg_r.append(np.mean(img_array[1, :, :]))
        sd_r.append(np.std(img_array[1, :, :]))
        avg_g.append(np.mean(img_array[:, 1, :]))
        sd_g.append(np.std(img_array[:, 1, :]))
        avg_b.append(np.mean(img_array[:, :, 1]))
        sd_b.append(np.std(img_array[:, :, 1]))

heuristics = pd.DataFrame({'key': keys, 
                          'width': widths, 
                          'height': heights,
                          'avg_r': avg_r, 
                          'sd_r': sd_r, 
                          'avg_g': avg_g,
                          'sd_g': sd_g,
                          'avg_b': avg_b,
                          'sd_b': sd_b
                          })

In [7]:
heuristics.head()

Unnamed: 0,key,width,height,avg_r,sd_r,avg_g,sd_g,avg_b,sd_b
0,https://media.discordapp.net/attachments/52214...,42,44,95.333333,14.293411,107.560606,19.470746,122.245671,41.449524
1,https://media.discordapp.net/attachments/52214...,58,33,105.022989,13.818176,99.40404,13.762722,98.541275,20.956531
2,https://media.discordapp.net/attachments/52214...,42,48,101.865079,14.479517,84.208333,19.991274,120.34623,41.788431
3,https://media.discordapp.net/attachments/52214...,32,57,96.489583,15.163,110.479532,24.710331,122.048246,36.630323
4,https://media.discordapp.net/attachments/52214...,42,41,124.793651,26.456408,106.902439,15.592063,114.56446,29.859595


In [75]:
# writing heuristics 

heuristics.to_csv('heuristics.csv') # change to filepath 

In [80]:
# saving clumps as images in a new folder  

output_directory = "clumps" # insert folder to save clumps 

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for key, clump_lst in clump_imgs_dct.items(): 
    for idx, img in enumerate(clump_lst):
        img_path = os.path.join(output_directory, f"clump_{idx+1:04d}.jpg") # naming scheme needs to change... incorporate key name somehow 
        img.save(img_path)