In [24]:
""" Merges cropped images (stored locally) with label annotations, performing some data cleaning and producing a resulting dataframe """

' Merges cropped images (stored locally) with label annotations, performing some data cleaning and producing a resulting dataframe '

In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2

In [14]:
images_path = "images-bboxes/jldp"
annotations = "jldp-annotations.csv"

In [15]:
df_annot = pd.read_csv(annotations)

In [16]:
img_id = [img.split(".")[0] for img in os.listdir(images_path)]
species = []
common_name = []
images = []

In [17]:
for iid in img_id:
    row = df_annot[df_annot["image_id"] == iid]
    
    # extract species and common name
    sp = row["species"].values
    cn = row["common_name"].values

    if len(sp) > 0:
        species.append(sp[0])
    else:
        species.append("")
        
    if len(cn) > 0:
        common_name.append(cn[0])
    else:
        common_name.append("")

In [18]:
# create dataframe
# images_data = [cv2.imread(f"images-bboxes/jldp/{img}.JPG") for img in images]
df = pd.DataFrame(list(zip(img_id, species, common_name)),
                 columns = ["image_name", "species", "common_name"])

In [19]:
# 34540
df

Unnamed: 0,image_name,species,common_name
0,0005ef15-2aa9-4df3-b843-0329866e57c3,latrans,Coyote
1,000a7ac3-decf-40ad-83c8-eff254efc678,latrans,Coyote
2,000a7ac3-decf-40ad-83c8-eff254efc678_500,,
3,0013dc45-7e2d-4c38-b874-28267110528d,,Mammal
4,0013dc45-7e2d-4c38-b874-28267110528d_500,,
...,...,...,...
34535,fff2f4e7-429f-4571-a4e4-32642ac8776e_500,,
34536,fff38e44-fc40-4a7e-a9e8-64ee14108035,latrans,Coyote
34537,fff38e44-fc40-4a7e-a9e8-64ee14108035_500,,
34538,fffd4420-abf8-49ed-b6d5-35a0b2e43c6e,hemionus,Mule Deer


In [20]:
df[["species", "common_name", "image_name"]].groupby(["species", "common_name"], as_index=False).\
    count().sort_values(by="image_name", ascending=False)

Unnamed: 0,species,common_name,image_name
0,,,19205
25,latrans,Coyote,5282
37,scrofa,Wild Boar,2530
23,hemionus,Mule Deer,1276
1,No CV Result,No CV Result,780
33,occidentalis,Western fence lizard,467
39,taurus,Domestic Cattle,235
11,californicus,California Gull,196
32,occidentalis,Western Gull,157
5,aura,Turkey Vulture,134


In [21]:
# remove observations with no labels or with NO CV results
df_sub = df[df["species"].isin(["", "No CV Result"]) == False]

# remove NAs
df_sub = df_sub.dropna()

In [22]:
# remove labels with <= 5 observations
df_group = df_sub[["image_name", "common_name"]].groupby("common_name", as_index=False).count()
selected_species = list(df_group[df_group["image_name"] > 5]["common_name"])

df_sub = df_sub[df_sub["common_name"].isin(selected_species)]

In [23]:
# 10630
df_sub

Unnamed: 0,image_name,species,common_name
0,0005ef15-2aa9-4df3-b843-0329866e57c3,latrans,Coyote
1,000a7ac3-decf-40ad-83c8-eff254efc678,latrans,Coyote
6,00162be1-3ae1-4703-992e-c5865e6e870a,scrofa,Wild Boar
8,00168b48-41c5-4c66-bef6-908d9a50175e,scrofa,Wild Boar
15,0030e116-4944-4390-9d56-094779752388,latrans,Coyote
...,...,...,...
34530,ffeff04b-2970-4be5-a966-84a3f082bdf9,latrans,Coyote
34532,fff09fad-a7ec-4f3e-a1a9-5b7c07eb4b57,latrans,Coyote
34534,fff2f4e7-429f-4571-a4e4-32642ac8776e,latrans,Coyote
34536,fff38e44-fc40-4a7e-a9e8-64ee14108035,latrans,Coyote


In [12]:
df_sub.to_csv("df.csv")