In [1]:
from roboflow import Roboflow
from PIL import Image
import numpy as np
import pandas as pd
import os 
import re 

In [2]:
rf = Roboflow("132cxQxyrOVmPD63wJrV") # api keys are individual, change to your own
project = rf.workspace().project("elephant-seals-project-mark-1")
model = project.version("14").model

loading Roboflow workspace...
loading Roboflow project...


In [3]:
# image paths 
path_to_beach_imgs = "Beach_Images/LS 2.13.23"
beach_imgs_paths = [os.path.join(path_to_beach_imgs, file) for file in os.listdir(path_to_beach_imgs)]

In [None]:
# extracting clumps and getting rid of overlaps 
clump_imgs_dct = {} # dictionary of clumps. image id will be the key and a list of clumps will be its value. 
indiv_imgs_dct = {} # dictionary of individuals. same schema as clump_imgs_dct 
# num_seals = [] # number of individual seals 

# define confidence levels for seals and clumps
seal_conf_lvl = 20
clump_conf_lvl = 40 
overlap = 20 

def intersects(seal, clump):
    """Check if seal and clump overlap"""
    seal_x1 = seal['x'] - seal['width'] / 2
    seal_x2 = seal['x'] + seal['width'] / 2
    seal_y1 = seal['y'] - seal['height'] / 2
    seal_y2 = seal['y'] + seal['height'] / 2

    clump_x1 = clump['x'] - clump['width'] / 2
    clump_x2 = clump['x'] + clump['width'] / 2
    clump_y1 = clump['y'] - clump['height'] / 2
    clump_y2 = clump['y'] + clump['height'] / 2

    return not (
        seal_x2 <= clump_x1 or
        seal_x1 >= clump_x2 or
        seal_y2 <= clump_y1 or
        seal_y1 >= clump_y2
    )


for path in beach_imgs_paths:

    image = Image.open(path)

    preds = model.predict(path, confidence=min(seal_conf_lvl, clump_conf_lvl), overlap=overlap).json().get('predictions', []) 

    seals = [pred for pred in preds if pred['class'] == 'seals' and pred['confidence'] > seal_conf_lvl / 100]
    clumps = [pred for pred in preds if pred['class'] == 'clump' and pred['confidence'] > clump_conf_lvl / 100]

    # getting individual seals 
    filtered_seals = [seal for seal in seals if not any(intersects(seal, clump) for clump in clumps)]
    # num_seals.append(len(filtered_seals))

    key = re.sub(r'.*/([A-Za-z]+) (\d+)\.(\d+)\.(\d+)\\(DJI_\d+)\.JPG', r'\1\2\3\4_\5', path)
    
    # getting clumps 
    clump_imgs_dct[key] = [] 
    for clump in clumps:
        clump_x1 = clump['x'] - clump['width'] / 2
        clump_x2 = clump['x'] + clump['width'] / 2
        clump_y1 = clump['y'] - clump['height'] / 2
        clump_y2 = clump['y'] + clump['height'] / 2

        top_left_clump = (clump_x1, clump_y1)
        bottom_right_clump = (clump_x2, clump_y2)

        subimage_clump = image.crop((*top_left_clump, *bottom_right_clump))
        
        clump_imgs_dct[key].append(subimage_clump) 

    # getting individual seals 
    indiv_imgs_dct[key] = [] 
    for indiv in filtered_seals: 
        indiv_x1 = indiv['x'] - indiv['width'] / 2
        indiv_x2 = indiv['x'] + indiv['width'] / 2
        indiv_y1 = indiv['y'] - indiv['height'] / 2
        indiv_y2 = indiv['y'] + indiv['height'] / 2

        top_left_indiv = (indiv_x1, indiv_y1)
        bottom_right_indiv = (indiv_x2, indiv_y2)

        subimage_indiv = image.crop((*top_left_indiv, *bottom_right_indiv))
        
        indiv_imgs_dct[key].append(subimage_indiv) 

In [None]:
# Extracting Length, Width and RGB metrics for clumps 

clump_keys = []
clump_widths = []
clump_heights = []
clump_avg_r = []
clump_sd_r = []
clump_avg_g = []
clump_sd_g = []
clump_avg_b = []
clump_sd_b = [] 

for key, clump_lst in clump_imgs_dct.items():

    for idx, clump in enumerate(clump_lst): 

        clump_keys.append(f"{key}_clump_{idx+1:04d}")
    
        width, height = clump.size

        clump_widths.append(width)
        clump_heights.append(height)

        img_array = np.array(clump)

        clump_avg_r.append(np.mean(img_array[1, :, :]))
        clump_sd_r.append(np.std(img_array[1, :, :]))
        clump_avg_g.append(np.mean(img_array[:, 1, :]))
        clump_sd_g.append(np.std(img_array[:, 1, :]))
        clump_avg_b.append(np.mean(img_array[:, :, 1]))
        clump_sd_b.append(np.std(img_array[:, :, 1]))

heuristics_clump = pd.DataFrame({'clump_key': clump_keys, 
                                'width': clump_widths, 
                                'height': clump_heights,
                                'avg_r': clump_avg_r, 
                                'sd_r': clump_sd_r, 
                                'avg_g': clump_avg_g,
                                'sd_g': clump_sd_g,
                                'avg_b': clump_avg_b,
                                'sd_b': clump_sd_b
                                })

In [None]:
heuristics_clump.head()

Unnamed: 0,key,width,height,avg_r,sd_r,avg_g,sd_g,avg_b,sd_b
0,LS21323_DJI_0001_clump_0001,222,160,148.968468,8.861153,150.972917,10.472847,144.783333,35.448822
1,LS21323_DJI_0001_clump_0002,400,244,128.905,16.282485,133.304645,25.476204,125.685543,27.45159
2,LS21323_DJI_0001_clump_0003,324,314,129.433128,18.470493,148.5,10.585889,136.547643,32.810271
3,LS21323_DJI_0001_clump_0004,248,160,138.185484,11.64301,132.254167,17.457388,139.239592,29.105456
4,LS21323_DJI_0001_clump_0005,162,238,127.631687,9.237278,120.865546,9.84168,127.14724,25.30867


In [None]:
# writing heuristics of clump
heuristics_clump.to_csv('heuristics_clump.csv', index=False) 

In [6]:
# Extracting Length, Width and RGB metrics for individuals 

indiv_keys = []
indiv_widths = []
indiv_heights = []
indiv_avg_r = []
indiv_sd_r = []
indiv_avg_g = []
indiv_sd_g = []
indiv_avg_b = []
indiv_sd_b = [] 

for key, indiv_lst in indiv_imgs_dct.items():

    for idx, indiv in enumerate(indiv_lst): 

        indiv_keys.append(f"{key}_indiv_{idx+1:04d}")
    
        width, height = indiv.size

        indiv_widths.append(width)
        indiv_heights.append(height)

        img_array = np.array(indiv)

        indiv_avg_r.append(np.mean(img_array[1, :, :]))
        indiv_sd_r.append(np.std(img_array[1, :, :]))
        indiv_avg_g.append(np.mean(img_array[:, 1, :]))
        indiv_sd_g.append(np.std(img_array[:, 1, :]))
        indiv_avg_b.append(np.mean(img_array[:, :, 1]))
        indiv_sd_b.append(np.std(img_array[:, :, 1]))

heuristics_indiv = pd.DataFrame({'indiv_key': indiv_keys, 
                                'width': indiv_widths, 
                                'height': indiv_heights,
                                'avg_r': indiv_avg_r, 
                                'sd_r': indiv_sd_r, 
                                'avg_g': indiv_avg_g,
                                'sd_g': indiv_sd_g,
                                'avg_b': indiv_avg_b,
                                'sd_b': indiv_sd_b
                                })

In [8]:
heuristics_indiv.head()

Unnamed: 0,indiv_key,width,height,avg_r,sd_r,avg_g,sd_g,avg_b,sd_b
0,LS21323_DJI_0001_indiv_0001,238,266,119.578431,12.377473,115.08396,13.151678,115.035319,21.956118
1,LS21323_DJI_0001_indiv_0002,128,80,140.585938,7.490474,148.341667,11.107052,127.791309,19.386128
2,LS21323_DJI_0001_indiv_0003,94,114,141.120567,15.050259,140.780702,9.503547,119.774076,27.765374
3,LS21323_DJI_0001_indiv_0004,214,238,122.504673,10.984872,126.170868,16.46638,117.020518,21.614128
4,LS21323_DJI_0001_indiv_0005,136,58,171.379902,12.196122,150.741379,28.148964,151.846856,27.028394


In [7]:
# writing heuristics of indiv
heuristics_indiv.to_csv('heuristics_indiv.csv', index=False) 

In [10]:
# empty set for recording 
indiv_count = pd.DataFrame({'individual': indiv_keys, 
                               'Seal Type': None 
                               })

In [11]:
# writing empty set 
indiv_count.to_csv('indiv_count.csv', index=False)

In [14]:
# saving clumps as images in a new folder  

output_directory = "clumps" # insert folder to save clumps 

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for key, clump_lst in clump_imgs_dct.items(): 
    for idx, img in enumerate(clump_lst):
        img_path = os.path.join(output_directory, f"{key}_clump_{idx+1:04d}.tif") 
        img.save(img_path)

In [12]:
# saving indvidiuals as images in a new folder

output_directory = "individuals" # insert folder to save inds 

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for key, indiv_lst in indiv_imgs_dct.items(): 
    for idx, img in enumerate(indiv_lst):
        img_path = os.path.join(output_directory, f"{key}_indiv_{idx+1:04d}.tif") 
        img.save(img_path)