In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import cv2 as cv
import numpy as np
import pprint
import matplotlib.pyplot as plt
import os
import time
plt.rcParams['figure.figsize'] = [15, 10]
pd.options.display.max_columns = None
pd.options.display.width=None

from config import folder_locs, interest_cols, rando_seed

# Instructions:
1. Put all your photos in the input_photos folder
2. Put the annotation file in the input_csvs folder
3. Set the input_csv_filename variable (in the User Input section below) to the name of the annotation file
4. Run all the cells (top menu: Run > Run All Cells)

# User Inputs

enter the name of the csv file. 

if the file is an excel file, please save the file as a csv file by:
+ opening the excel file
+ selecting File from the menu at the top
+ selecting Save As
+ selecting CSV UTF-8 from the drop-down menu
+ selecting Save

In [3]:
input_csv_filename = 'sample csv.csv'
photo_file_types = ['.jpg', '.jpeg']

# Code

creating the relative filepath for the annoation file

In [4]:
input_csv_filepath = folder_locs['in_files'] + input_csv_filename
# print(input_csv_filepath)

./input_csvs/sample csv.csv


loading the annotation file as a dataframe

In [5]:
csv_df = pd.read_csv(input_csv_filepath)
# csv_df

Unnamed: 0,Track,File Name,3: Unique Frame Identifier,4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y),8: Detection or Length Confidence,9: Target Length (0 or -1 if invalid),10-11+: Repeated Species,Confidence Pairs or Attributes
0,1,2WDC5729-camera_1-20190225-155821Z(StitchedCli...,554,56,328,734,580,1,-1,Swordfish,1
1,412,94a6c30c-23f0-11e9-a7ea-2352e208ce34.jpg,6131,306,423,425,477,1,-1,Albacore,1
2,7,2WDC5729-camera_1-20190225-155821Z(StitchedCli...,2821,813,238,1194,461,1,-1,Bigeyetuna,1
3,27,2WDC5729-camera_1-20190225-155821Z(StitchedCli...,4602,928,243,1246,421,1,-1,Yellowfintuna,1
4,165078,WDC5729-camera_2-20190119-195049Z61807.jpg,65714,752,561,842,718,1,-1,Blueshark,1


getting all the uniuqe labels in the annotation file

In [6]:
unique_labels = csv_df[interest_cols['label']].unique()
# print(unique_labels)

['Swordfish' 'Albacore' 'Bigeyetuna' 'Yellowfintuna' 'Blueshark']


generating colors for the bounding boxes of the unique labels.  
a random seed has been set in the config.py file, under the scripts folder.  
this means that the colors generated for a certain annotation file will always be the same

In [7]:
np.random.seed(rando_seed)
rando_bgrs = np.random.choice(a=np.arange(start=30, 
                                          stop=225), 
                              size=3*len(unique_labels), 
                              replace=False).reshape(len(unique_labels), 3).tolist()

creating a dictionary with the unique labels as keys, and the bounding box colors as the values

In [8]:
label_bgrs = {}
for i, a_label in enumerate(unique_labels):
    label_bgrs[a_label] = tuple(rando_bgrs[i])
# pprint.pprint(label_bgrs)

{'Albacore': (126, 98, 183),
 'Bigeyetuna': (85, 45, 142),
 'Blueshark': (112, 39, 194),
 'Swordfish': (168, 46, 185),
 'Yellowfintuna': (141, 214, 48)}


create a function to determine if a filename is a photo image.  
uses the extenstions defines in photo_file_types variable under Inputs section.

In [9]:
def is_file_a_photo(a_filename):
    is_in_photo = [x in a_filename for x in photo_file_types]
    is_in_photo = any(is_in_photo)
    if is_in_photo:
        return a_filename
    else:
        return None

get all the files in the input_photos folder, keep only the images.

In [10]:
files_in_input_photos = os.listdir(folder_locs['in_pics'])
files_in_input_photos = [is_file_a_photo(x) for x in files_in_input_photos if is_file_a_photo(x)]
# len(files_in_input_photos)

4

get all files in the output_photos folder.  
The script will assume that these photos have already been processed, and will not process them again.

In [11]:
file_in_output_photos = os.listdir(folder_locs['out_pics'])
file_in_output_photos = [is_file_a_photo(x) for x in file_in_output_photos if is_file_a_photo(x)]
# len(file_in_output_photos)

4

create a dataframe for the results of the script

In [12]:
results_df = pd.DataFrame(columns=['filename', 'result'])

add notes for files that have already been processed. 

In [13]:
results_df['filename'] = file_in_output_photos
results_df['result'] = 'already exists in output_photos'
# results_df

make a list of the files to process.   
This is all the images in the input_photos folder, that does not have a corresponding file in the output_photos folder.

In [14]:
files_to_process = list(set(files_in_input_photos).difference(set(file_in_output_photos)))
# len(files_to_process)

0

make a list of photos that exists in the csv file, that do NOT exists in the input_photos folder.

In [15]:
files_that_exists = list(set(files_in_input_photos + file_in_output_photos))
missing_files = list(set(csv_df[interest_cols['filename']].tolist()).difference(set(files_that_exists)))
# len(missing_files)

1

add the missing files to the results_df

In [16]:
missing_rows = [{'filename': x, 'result':'missing file in input_photos'} for x in missing_files]
results_df = results_df.append(missing_rows, ignore_index=True)
# results_df

subset the csv_df to contain only rows that have existing files in the input_photos folder

In [17]:
csv_df = csv_df.set_index(interest_cols['filename']).loc[files_to_process].reset_index()
# csv_df

create the function to annotate and save an image

In [18]:
def create_annotated_image(filename):
    txt_color=(255, 255, 255) # the text color will be white
    cur_df = csv_df[csv_df[interest_cols['filename']]==filename].copy() # subset the dataframe for one photo
    cur_filepath = folder_locs['in_pics'] + filename # create the filepath for the filename
    cur_img = cv.imread(cur_filepath)
    line_thickness = max(round(sum(cur_img.shape) / 2 * 0.003), 2)  # calculate bbox line thickness
    n_rows = cur_df.shape[0]
    cur_row = 0
    try:
        for i, row in cur_df.iterrows():

            cur_label = row[interest_cols['label']] # get the label of the row

            # get the color (BGR) for the label
            cur_col = label_bgrs[cur_label]

            # make rectangle xy coordinates
            x1 = row[interest_cols['x1']]
            y1 = row[interest_cols['y1']]
            x2 = row[interest_cols['x2']]
            y2 = row[interest_cols['y2']]
            coord_1, coord_2 = (x1, y1), (x2, y2)

            # draw the bounding box
            cv.rectangle(cur_img, 
                         coord_1, 
                         coord_2, 
                         cur_col, 
                         thickness=line_thickness, 
                         lineType=cv.LINE_AA,
                        )

            # insert text label
            font_thickness = max(line_thickness - 1, 1)
            w, h = cv.getTextSize(cur_label, 
                                  0, 
                                  fontScale=line_thickness / 3, 
                                  thickness=font_thickness)[0]
            coord_2 = coord_1[0] + w, coord_1[1] - h - 3
            cv.rectangle(cur_img, 
                         coord_1, 
                         coord_2, 
                         cur_col, 
                         -1, 
                         cv.LINE_AA,
                        )
            cv.putText(cur_img, 
                       cur_label, 
                       (coord_1[0], coord_1[1] - 2), 
                       0, 
                       line_thickness / 3, 
                       txt_color, 
                       thickness=font_thickness, 
                       lineType=cv.LINE_AA,
                      )

            cur_row += 1

        out_filepath = folder_locs['out_pics'] + filename # create relative filepath of the image to be saved
        cv.imwrite(out_filepath, cur_img) # save the file
        return {'filename': filename, 'result': 'success'}
    
    except:
        return {'filename': filename, 'result': 'failed'}
    

create a list of filenames to be processed

In [19]:
filenames = csv_df[interest_cols['filename']].unique()
# len(filenames)

iterate through each filename and get the result of the processing (either 'success' of 'failed')

In [20]:
%%time
results = []
for i, filename in enumerate(filenames):
    results.append(create_annotated_image(filename))

Wall time: 0 ns


add the results of the functions to the results_df

In [21]:
results_df = pd.concat([results_df, pd.DataFrame(results)], ignore_index=True)
# results_df

save the results_df as a csv file

In [22]:
results_df.to_csv('results_' + str(time.time()) + '.csv', index=False)