blur detection adapted from: https://pyimagesearch.com/2015/09/07/blur-detection-with-opencv/

In [40]:
from imutils import paths
import argparse
import cv2
import pathlib
import matplotlib.pyplot as plt
from PIL import Image

In [41]:
def variance_of_laplacian(image):
	# compute the Laplacian of the image and then return the focus
	# measure, which is simply the variance of the Laplacian
	return cv2.Laplacian(image, cv2.CV_64F).var()

In [42]:
cropped_dir = pathlib.Path('../data/processed/cropped')
blur_threshold = 100

In [43]:
blurred_images = []

In [44]:
def check_image_pil(image_path):
    try:
        with Image.open(image_path) as img:
            img.verify()  # Verifies that the file is an image
        # print("The image is valid.")
        return True
    except (IOError, SyntaxError) as e:
        # print(f"Corrupted image detected: {e}")
        return False

In [45]:
def check_blurred_images(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # cv2.imshow('image', gray)
    fm = variance_of_laplacian(gray)
    if fm < blur_threshold:
        return image_path

In [46]:
for cat_dir in cropped_dir.iterdir():
    if cat_dir.is_dir():
        for image_path in cat_dir.iterdir():
            if image_path.is_file() and image_path.suffix == '.JPG':
                if check_image_pil(image_path) and check_blurred_images(str(image_path)):
                    blurred_images.append(image_path)

In [47]:
len(blurred_images)

382

In [48]:
def show_image(image_path):
    image = cv2.imread(str(image_path))
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    blur = variance_of_laplacian(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
    plt.text(10, 30, f'Blur: {blur}', color='red')
    plt.axis('off')
    plt.show()

## Copy Blurred Images

In [49]:
# import shutil

# destination_dir = pathlib.Path('/Users/fmb/GitHub/764WildlifeReID/data/processed/blur_augmentations/input')

# # Create the destination directory if it doesn't exist
# destination_dir.mkdir(parents=True, exist_ok=True)

# for image_path in blurred_images:
#     shutil.copy(image_path, destination_dir)

## Refactor Datasets

In [188]:
import pandas as pd
import pathlib
import shutil

In [62]:
cropped_labels = pd.read_csv('/Users/fmb/GitHub/764WildlifeReID/data/labels/cropped_auckland_island_feralcats_df.csv')
cropped_labels_filenames = cropped_labels['path'].apply(lambda x: x.split('/')[-1])

In [88]:
cropped_labels_filenames

0       20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...
1       20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...
2       20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...
3       20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...
4       20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...
                              ...                        
1900    20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...
1901    20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...
1902    20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...
1903    20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...
1904    20190801_g1ko_AucklandIsland__D2_3_M_SD131_201...
Name: path, Length: 1905, dtype: object

In [77]:
input_names = [image_path.name for image_path in blurred_images]

In [139]:
processed_blur_path = pathlib.Path('/Users/fmb/GitHub/764WildlifeReID/data/processed/blur_augmentations/output')

def fetch_augmentations(aug_dir):
    filenames = []
    for image_path in aug_dir.iterdir():
        if image_path.is_file() and (image_path.suffix == '.JPG' or image_path.suffix == '.png'):
            filenames.append(image_path)

    return filenames

In [143]:
maxim_augs = fetch_augmentations(processed_blur_path / 'maxim_output')
hidiff_augs_png = fetch_augmentations(processed_blur_path / 'HIdiff_output')
srmnet_augs = fetch_augmentations(processed_blur_path / 'SRMNet_output')

In [145]:
# convert hidiff png to JPG
for png_path in hidiff_augs_png:
    new_jpg_path = pathlib.Path('/Users/fmb/GitHub/764WildlifeReID/data/processed/blur_augmentations/output/HIdiff_JPG_output')
    if png_path.suffix.lower() == '.png':
        jpg_path = png_path.with_suffix('.JPG')
        jpg_name = jpg_path.name
        new_jpg_path = new_jpg_path / jpg_name
        # print(new_jpg_path)
        img = Image.open(png_path)
        rgb_img = img.convert('RGB')
        rgb_img.save(new_jpg_path)

In [146]:
hidiff_augs = fetch_augmentations(processed_blur_path / 'HIdiff_JPG_output')

In [116]:
def augmentations_df(augs):
    df = pd.DataFrame(augs, columns=['aug_path'])
    df['img_name'] = df['aug_path'].apply(lambda x: x.name)
    return df

In [147]:
maxim_df = augmentations_df(maxim_augs)
hidiff_df = augmentations_df(hidiff_augs)
srmnet_df = augmentations_df(srmnet_augs)

In [148]:
cropped_template = pd.DataFrame()

In [149]:
cropped_template = cropped_labels.copy()
cropped_template['cropped_img_name'] = cropped_labels_filenames
cropped_template['in_blurred_images'] = cropped_template['cropped_img_name'].isin(input_names)

In [150]:
def append_to_cropped_template(aug_df, aug_name):
    merged_df = cropped_template.merge(aug_df, left_on='cropped_img_name', right_on='img_name', how='left')
    merged_df.drop(columns=['img_name'], inplace=True)
    merged_df.rename(columns={'aug_path': aug_name}, inplace=True)

    return merged_df

In [151]:
maxim_merged = append_to_cropped_template(maxim_df, 'maxim')
hidiff_merged = append_to_cropped_template(hidiff_df, 'hidiff')
srmnet_merged = append_to_cropped_template(srmnet_df, 'srmnet')

## Creating Final Datasets

In [152]:
maxim_merged

Unnamed: 0,image_id,identity,path,cropped_img_name,in_blurred_images,maxim
0,3958,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
1,3959,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
2,3204,matt,collared-ignore/20190801_Y4bG_AucklandIsland_A...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
3,3960,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,False,
4,3961,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
...,...,...,...,...,...,...
1900,2423,linja,cat-ignore/20190801_nYPb_AucklandIsland_B2_2_1...,20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...,False,
1901,2424,linja,cat-ignore/20190801_nYPb_AucklandIsland_B2_2_1...,20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...,False,
1902,2425,linja,cat-ignore/20190801_nYPb_AucklandIsland_B2_2_1...,20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...,False,
1903,2426,linja,cat-ignore/20190801_nYPb_AucklandIsland_B2_2_1...,20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...,False,


In [153]:
maxim_merged.apply(lambda row: row['path'] if row['in_blurred_images'] else row['maxim'], axis=1)

0       collared-interact/20190801_Y4bG_AucklandIsland...
1       collared-interact/20190801_Y4bG_AucklandIsland...
2       collared-ignore/20190801_Y4bG_AucklandIsland_A...
3                                                     NaN
4       collared-interact/20190801_Y4bG_AucklandIsland...
                              ...                        
1900                                                  NaN
1901                                                  NaN
1902                                                  NaN
1903                                                  NaN
1904                                                  NaN
Length: 1905, dtype: object

In [193]:
# select subset of paths with augmented images to transfer
def prepare_paths_transfer(merged_df, aug_column):
    origin_paths = []

    for idx in merged_df.index:
        if merged_df.iloc[idx]['in_blurred_images']:
            origin_paths.append(merged_df.iloc[idx][aug_column])
        else:
            origin_paths.append(pathlib.Path('/Users/fmb/GitHub/764WildlifeReID/data/processed/cropped')/merged_df.iloc[idx]['path'])

    return pd.Series(origin_paths)

In [198]:
prepare_paths_transfer(maxim_merged, 'maxim').shape

(1905,)

In [194]:
prepare_paths_transfer(maxim_merged, 'maxim').isna().sum(), prepare_paths_transfer(hidiff_merged, 'hidiff').isna().sum(), prepare_paths_transfer(srmnet_merged, 'srmnet').isna().sum()

(0, 0, 0)

In [195]:
def transfer_files(origin_paths, destination_folder):
    destination_dir = pathlib.Path(f'/Users/fmb/GitHub/764WildlifeReID/data/formatted_datasets/FeralCatsAkl_{destination_folder}')
    for origin_path in origin_paths:
        shutil.copy(origin_path, destination_dir)

In [196]:
transfer_files(prepare_paths_transfer(maxim_merged, 'maxim'), 'maxim')

In [197]:
transfer_files(prepare_paths_transfer(hidiff_merged, 'hidiff'), 'hidiff')
transfer_files(prepare_paths_transfer(srmnet_merged, 'srmnet'), 'srmnet')

## Prepare Labels for Refactored Datasets

In [203]:
# read in images from refactored location

def fetch_images(image_dir):
    filenames = []
    names = []
    for image_path in image_dir.iterdir():
        if image_path.is_file() and image_path.suffix == '.JPG':
            filenames.append(image_path)
            names.append(image_path.name)
            
    return pd.DataFrame({'path': filenames, 'img_name': names})

fca_maxim_files = fetch_images(pathlib.Path('/Users/fmb/GitHub/764WildlifeReID/data/formatted_datasets/FeralCatsAkl_maxim'))
fca_hidiff_files = fetch_images(pathlib.Path('/Users/fmb/GitHub/764WildlifeReID/data/formatted_datasets/FeralCatsAkl_hidiff'))
fca_srmnet_files = fetch_images(pathlib.Path('/Users/fmb/GitHub/764WildlifeReID/data/formatted_datasets/FeralCatsAkl_srmnet'))

In [204]:
maxim_merged.head(5)

Unnamed: 0,image_id,identity,path,cropped_img_name,in_blurred_images,maxim
0,3958,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
1,3959,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
2,3204,matt,collared-ignore/20190801_Y4bG_AucklandIsland_A...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
3,3960,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,False,
4,3961,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...


In [217]:
merged_dummy = fca_maxim_files.merge(maxim_merged, left_on='img_name', right_on='cropped_img_name', how='left')

In [230]:
merged_dummy

Unnamed: 0,path_x,img_name,image_id,identity,path_y,cropped_img_name,in_blurred_images,maxim
0,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,3958,matt,collared-interact/20190801_Y4bG_AucklandIsland...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,True,/Users/fmb/GitHub/764WildlifeReID/data/process...
1,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_kW8J_AucklandIsland_A2_3_41_SD15_2019...,3150,matt,collared-eat/20190801_kW8J_AucklandIsland_A2_3...,20190801_kW8J_AucklandIsland_A2_3_41_SD15_2019...,False,
2,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_3vNi_AucklandIsland_B2_5_35_SD47_2019...,3915,matt,collared-ignore/20190801_3vNi_AucklandIsland_B...,20190801_3vNi_AucklandIsland_B2_5_35_SD47_2019...,False,
3,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...,2356,nuclei,cat-ignore/20190801_nYPb_AucklandIsland_B2_2_1...,20190801_nYPb_AucklandIsland_B2_2_1_SD21_20190...,False,
4,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_g7RF_AucklandIsland_C1_6_45_SD125_201...,1581,linja,cat-ignore/20190801_g7RF_AucklandIsland_C1_6_4...,20190801_g7RF_AucklandIsland_C1_6_45_SD125_201...,False,
...,...,...,...,...,...,...,...,...
1900,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_g7RF_AucklandIsland_C1_6_45_SD125_201...,1290,linja,cat-ignore/20190801_g7RF_AucklandIsland_C1_6_4...,20190801_g7RF_AucklandIsland_C1_6_45_SD125_201...,False,
1901,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_kW8J_AucklandIsland_A2_3_41_SD15_2019...,3073,matt,collared-eat/20190801_kW8J_AucklandIsland_A2_3...,20190801_kW8J_AucklandIsland_A2_3_41_SD15_2019...,False,
1902,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_kW8J_AucklandIsland_A2_3_41_SD15_2019...,4172,matt,collared-interact/20190801_kW8J_AucklandIsland...,20190801_kW8J_AucklandIsland_A2_3_41_SD15_2019...,False,
1903,/Users/fmb/GitHub/764WildlifeReID/data/formatt...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,2923,matt,collared-eat/20190801_Y4bG_AucklandIsland_A2_2...,20190801_Y4bG_AucklandIsland_A2_2_38_SD38_2019...,False,


In [231]:
def get_labelled_df(fca_df, merged_df):
    labelled_df = fca_df.merge(merged_df, left_on='img_name', right_on='cropped_img_name', how='left')
    return pd.DataFrame({'image_id': labelled_df['image_id'], 
                         'identity': labelled_df['identity'], 
                         'path': labelled_df['img_name'].apply(lambda x: pathlib.Path(x))})

In [232]:
get_labelled_df(fca_maxim_files, maxim_merged).to_csv('/Users/fmb/GitHub/764WildlifeReID/data/labels/FeralCatsAkl_maxim_labels.csv', index=False)

In [233]:
get_labelled_df(fca_hidiff_files, hidiff_merged).to_csv('/Users/fmb/GitHub/764WildlifeReID/data/labels/FeralCatsAkl_hidiff_labels.csv', index=False)
get_labelled_df(fca_srmnet_files, srmnet_merged).to_csv('/Users/fmb/GitHub/764WildlifeReID/data/labels/FeralCatsAkl_srmnet_labels.csv', index=False)

#### proceed to upload to datasets.py (see [README](/Users/fmb/GitHub/764WildlifeReID/data/README))