In [49]:
##### Give data directory and run all the rows

data_dir=r"D:\WII_BRP\In_situ\Camera_trapping\codes\trial_images"

In [77]:
import json
import pyfastcopy
from datetime import datetime, timedelta
import tensorflow as tf
import pandas as pd
import numpy as np
from PIL import Image
import cv2
import os
import shutil
import time
import sys
from pathlib import Path
from subprocess import Popen, PIPE 
import re
import subprocess
import concurrent.futures
import threading
from tqdm import tqdm
IMG_SIZE = (224,224)
    
def check_existing_file(img_dir, output_file_name):
    output_file_path = os.path.join(img_dir, f"{output_file_name}.json")
    if os.path.exists(output_file_path):
        return 1
    return 0

def megadetector(img_dir, num_images):
    print("Megadetector model")

    local_detector_path = os.path.join(os.getcwd(), "cameratraps", "detection", "run_detector_batch.py")
    megadetector_path = os.path.join(os.getcwd(), "md_v5a.0.0.pt")
    output_file_name = "_".join(img_dir.split("\\")[-3:]) + "_megadetector_output"
    json_dir = os.path.join(img_dir, f"{output_file_name}_.json")

    if check_existing_file(img_dir, output_file_name) == 1:
        # self.output_label.configure(text = f"Megadetector output file already exists.. Going for species classification")
        print("Megadetector output file already exists.. Going for species classification")
        return json_dir
    
    print(local_detector_path, megadetector_path, json_dir)

    command = [sys.executable,
                local_detector_path,
                megadetector_path,
                img_dir,
                json_dir,
                "--recursive"]
    
    with tqdm(total = 100) as t:
        prev_percentage = 0
        with Popen(command,
                stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, shell=True,
                universal_newlines=True) as p:
            for line in p.stdout:
                
                if line.startswith("Loaded model in"):
                    print(line)
                
                elif "%" in line[0:4]:
                    percentage = int(re.search("\d*%", line[0:4])[0][:-1])
                    if percentage > prev_percentage:
                        prev_percentage = percentage
                        t.update(1)

    print("Bounding Boxes Created")

    return json_dir

def get_detection_df(img_path, json_dir):
    print("Generating detections.csv...")

    with open(json_dir, 'r') as f:
        data = json.load(f)
        df = pd.DataFrame(data["images"])

    records = []
    for i, row in df.iterrows():
        filepath = row["file"]
        filename = os.path.splitext(os.path.basename(filepath))[0]
        detections = row["detections"]
        for j, detection in enumerate(detections):
            area = detection["bbox"][2] * detection["bbox"][3]
            y_position = detection["bbox"][1]
            if (
                detection["conf"] > 0.1
                and area > 0.001
                and not (area <= 0.01 and y_position > 0.6)
            ):
                if detection["category"] == '1':
                    category = "Animal"
                elif detection["category"] == '2':
                    category = "Person"
                else:
                    category = "Vehicle"

                records.append(
                    {
                        "Filepath": filepath,
                        "Filename": filename,
                        "Detection_number": j + 1,
                        "Category": category,
                        "Detection_Confidence": detection["conf"],
                        "Detection_bbox": detection["bbox"],
                    }
                )

    new_df = pd.DataFrame(records)
    new_df["File_directory"] = new_df["Filepath"].apply(os.path.dirname)
    new_df["Filepath"] = (img_path + "\\" + new_df["Filename"] + ".jpg").apply(clean_path)
    df_path = os.path.join(img_path, "detections.csv")
    new_df.to_csv(df_path, index=False)
    
    small_obj_df = new_df[new_df["Category"] == "Small Object"]
    if not small_obj_df.empty:
        small_obj_df_path = os.path.join(img_path, "small_objects.csv")
        small_obj_df.to_csv(small_obj_df_path, index=False)

    return new_df
    
def crop_img(img_dir, bbox):
    img = Image.open(img_dir)
    x,y,w,h = tuple(i for i in bbox)
    mul_x = img.size[0]
    mul_y = img.size[1]
    w = w * mul_x
    h = h * mul_y
    x1 = x * mul_x
    x2 = x * mul_x + w
    y1 = y * mul_y
    y2 = y * mul_y + h
    cropped = img.crop((x1,y1,x2, y2))
    return cropped

def crop_img_gpu(img_dir, bbox):
    # Load the image using TensorFlow
    img = tf.io.read_file(img_dir)
    img = tf.image.decode_jpeg(img, channels=3)
    x, y, w, h = bbox
    mul_x = tf.cast(tf.shape(img)[1], tf.float32)
    mul_y = tf.cast(tf.shape(img)[0], tf.float32)
    x1 = tf.cast(x * mul_x, tf.int32)
    y1 = tf.cast(y * mul_y, tf.int32)
    x2 = tf.cast((x + w) * mul_x, tf.int32)
    y2 = tf.cast((y + h) * mul_y, tf.int32)
    cropped = img[y1:y2, x1:x2, :]
    cropped_np = cropped.numpy()
    # Convert the NumPy array to a PIL Image
    cropped_img = Image.fromarray(cropped_np)
    return cropped_img

# Define a lock for thread safety when modifying shared data
lock = threading.Lock()

# Define the function for processing a single row in the DataFrame
def crop_row(row):
    filepath = row[0]
    filename = row[1]
    d_num = row[2]
    bbox = row[5]
    directory = row[6]
    
    cropped_img = crop_img_gpu(filepath, bbox)
    cropped_dir = os.path.join(directory,r"Cropped_images")
    cropped_name = f"{filename}_{d_num}.jpg"
    cropped_img_path = os.path.join(cropped_dir, cropped_name)
    os.makedirs(cropped_dir, exist_ok=True)
    cropped_img.save(cropped_img_path)
    with lock:
        return cropped_name


def resize_img(filepath, IMG_SIZE = [224, 224]):
    img = tf.io.read_file(filepath)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, IMG_SIZE)
    # img = tf.expand_dims(img, axis = 1)
    return img

def clean_path(path):
    return os.path.normpath(path)

def crop_images_batch(df, batch_size):   
    cropped_names=[]
    results = []
    
    # Split the DataFrame into batches
    num_batches, remainder = divmod(len(df), batch_size)
    
    for i in range(num_batches):
        batch = df[i * batch_size: (i + 1) * batch_size]
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(crop_row, row) for row in batch.itertuples(index=False, name=None)]
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            results.append(result)
    if remainder > 0:
        remaining_data = df[num_batches * batch_size:]
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(crop_row, row) for row in remaining_data.itertuples(index=False, name=None)]
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            results.append(result)

    for result in results:
        cropped_name = result
        cropped_names.append(cropped_name)
    df["Cropped_image_name"] = cropped_names
    return df
    
def move_images_batch(src_list, dest_list, batch_size=512):
    src_files=src_list
    dest_files=dest_list
    with concurrent.futures.ThreadPoolExecutor() as exe:
        batch_tasks = []
        for i in tqdm(range(0, len(src_files), batch_size)):
            src_batch = src_files[i:i + batch_size]
            dest_batch = dest_files[i:i + batch_size]
            
            batch_tasks.extend([exe.submit(shutil.move, src, os.path.dirname(dest)) for src, dest in zip(src_batch, dest_batch)])
            # Wait for all tasks in the batch to complete before proceeding to the next batch
            _ = [task.result() for task in batch_tasks]
    return


In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("No GPUs available.")

In [64]:
log = {}
now = datetime.now()
log.update({"Run timestamp" : str(now)})
num_images = 0
for _,_,files in os.walk(data_dir):
    num_images += len(files) 
log.update({"Num images" : num_images})
print(num_images)

43


In [5]:
#### Load models

models = []

model_load_start=time.time()
order_level_model_path = os.path.join(os.getcwd(), r"Models\Refined_Hierarchical.ckpt")
order_level_model = tf.keras.models.load_model(order_level_model_path)
models.append(order_level_model)
order_level_class_names = ["GIB", "Goat_Sheep", "Hare", "Human", "Raptor", "Small Bird", "Small Carnivore", "Ungulate", "Vehicle", "Wild Pig"]
order_level_class_names.sort()

ungulate_model_path = os.path.join(os.getcwd(), r"Models\Efficient_Net_Ungulates_3.ckpt")
ungulate_model = tf.keras.models.load_model(ungulate_model_path)
models.append(ungulate_model)
ungulate_class_names = ["Camel", "Chinkara", "Nilgai", "Cattle"]
ungulate_class_names.sort()     

small_carnivore_model_path = os.path.join(os.getcwd(), r"Models\Efficient_Net_Small_Carnivores_1.ckpt")
small_carnivore_model= tf.keras.models.load_model(small_carnivore_model_path)
models.append(small_carnivore_model)
small_carnivores_class_names = ["Dog", "Desert Cat", "Fox"]
small_carnivores_class_names.sort()

model_load_end = time.time()
model_load_time = str(timedelta(seconds=round(model_load_end - model_load_start)))
log.update({"Model_load_time" : model_load_time})
print(model_load_time)

0:00:52


In [65]:
megadetector_start = time.time()
json_dir = megadetector(data_dir, num_images)
megadetector_end = time.time()
megadetector_time = str(timedelta(seconds=round(megadetector_end - megadetector_start)))
log.update({"Megadetector time" : megadetector_time})
df_detections = get_detection_df(data_dir, json_dir)

Megadetector model
D:\WII_BRP\In_situ\Camera_trapping\BRP_AniPredictor\cameratraps\detection\run_detector_batch.py D:\WII_BRP\In_situ\Camera_trapping\BRP_AniPredictor\md_v5a.0.0.pt D:\WII_BRP\In_situ\Camera_trapping\codes\trial_images\Camera_trapping_codes_trial_images_megadetector_output_.json


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

Loaded model in 4.64 seconds

Loaded model in 4.64 seconds



 10%|████████                                                                         | 10/100 [00:14<02:13,  1.49s/it]

Bounding Boxes Created
Generating detections.csv...





In [75]:
# Cropping code
cropping_start = time.time() 
df_crop=crop_images_batch(df_detections,512)
cropped_images = os.path.join(data_dir,r"Cropped_images\*")
cropped_dir = clean_path("\\".join(cropped_images.split("\\")[:-1]))
df_crop["Cropped_image_path"] = (cropped_dir + "\\" + df_crop["Cropped_image_name"]).apply(clean_path)
cropping_end = time.time()
cropping_time = str(timedelta(seconds=round(cropping_end - cropping_start)))
log.update({"Cropping Time" : cropping_time})
log.update({"Number of Detections" : len(df_detections)})

In [76]:
# # Order Level Predict
order_level_start = time.time()

# Create Dataset
dataset = tf.data.Dataset.list_files(cropped_images, shuffle = False)
cropped_names=[]
for file_path in dataset:
    cropped_name = file_path.numpy().decode("utf-8").split("\\")[-1]
    cropped_names.append(cropped_name)
dataset = dataset.map(resize_img, num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size=64).prefetch(buffer_size=tf.data.AUTOTUNE)

#### Run model
order_level_preds = order_level_model.predict(dataset)
order_level_end = time.time()
order_level_time = str(timedelta(seconds=round(order_level_end - order_level_start)))
log.update({"Order Level Pred Time" : order_level_time})
order_level_class_names = ["GIB", "Goat_Sheep", "Hare", "Human", "Raptor", "Small Bird", "Small Carnivore", "Ungulate", "Vehicle", "Wild Pig"]
order_level_class_names.sort()

order_pred_classes = []
order_pred_probs=[]

# Order Level Cropped Shift
order_shift_start = time.time()
for pred in order_level_preds:
    if max(pred) >= 0.8:
        order = order_level_class_names[np.argmax(pred)]
    else:
        order = "Others"
    
    order_pred_classes.append(order)
    order_pred_probs.append(max(pred))

df_temp = pd.DataFrame({
    'Cropped_image_name': cropped_names,
    'Order_pred': order_pred_classes,
    'Order_pred_prob': order_pred_probs
})
df_order = pd.merge(df_crop, df_temp, on='Cropped_image_name', how='left')
df_order["Order_dir"] = (cropped_dir + "\\" + df_order["Order_pred"]).apply(clean_path)
df_order["Order_level_path"] = (df_order["Order_dir"] + "\\" + df_order["Cropped_image_name"]).apply(clean_path)
unique_directories = set(df_order['Order_dir'])
for directory in unique_directories:
    os.makedirs(directory, exist_ok=True)

move_images_batch(df_order["Cropped_image_path"], df_order["Order_level_path"])

order_shift_end = time.time()
order_shift_time = str(timedelta(seconds=round(order_shift_end - order_shift_start)))
order_level_time = str(timedelta(seconds=round(order_shift_end - order_level_start)))
log.update({"Order Shift Imgs Time" : order_shift_time})
log.update({"Order level predict and shift" : order_level_time})

First 1000 images copied at 2023-11-08 20:32:51.694345


In [23]:
# # Small Carnivores
small_carnivores_start = time.time()

small_carnivore_images = os.path.join(cropped_dir,r"Small Carnivore\*")
small_carnivore_dir = os.path.join(cropped_dir,r"Small Carnivore")
dataset = tf.data.Dataset.list_files(small_carnivore_images, shuffle = False)
small_carnivore_names=[]
for file_path in dataset:
    cropped_name = file_path.numpy().decode("utf-8").split("\\")[-1]
    small_carnivore_names.append(cropped_name)
dataset = dataset.map(resize_img, num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size=64).prefetch(buffer_size=tf.data.AUTOTUNE)

small_carnivore_preds = small_carnivore_model.predict(dataset)

small_carnivore_pred_classes = []
small_carnivore_pred_probs=[]

for pred in small_carnivore_preds:
    if max(pred) >= 0.8:
        species = small_carnivores_class_names[np.argmax(pred)]
    else:
        species = "Small Carnivore"
    
    small_carnivore_pred_classes.append(species)
    small_carnivore_pred_probs.append(max(pred))

df_small_carnivore = pd.DataFrame({
    'Cropped_image_name': small_carnivore_names,
    'Species_pred': small_carnivore_pred_classes,
    'Species_pred_prob': small_carnivore_pred_probs
})

small_carnivores_end = time.time()
small_carnivore_time = str(timedelta(seconds=round(small_carnivores_end - small_carnivores_start)))
log.update({"Small Carnivore Model Pred Time" : small_carnivore_time})

#### Ungulates
ungulate_start = time.time()

ungulate_images = os.path.join(cropped_dir,r"Ungulate\*")
ungulate_dir = os.path.join(cropped_dir,r"Ungulate")
dataset = tf.data.Dataset.list_files(ungulate_images, shuffle = False)
ungulate_names=[]
for file_path in dataset:
    cropped_name = file_path.numpy().decode("utf-8").split("\\")[-1]
    ungulate_names.append(cropped_name)
dataset = dataset.map(resize_img, num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size=64).prefetch(buffer_size=tf.data.AUTOTUNE)

ungulate_preds = ungulate_model.predict(dataset)

ungulate_pred_classes = []
ungulate_pred_probs=[]

for pred in ungulate_preds:
    if max(pred) >= 0.8:
        species = ungulate_class_names[np.argmax(pred)]
    else:
        species = "Ungulate"
    
    ungulate_pred_classes.append(species)
    ungulate_pred_probs.append(max(pred))

df_ungulate = pd.DataFrame({
    'Cropped_image_name': ungulate_names,
    'Species_pred': ungulate_pred_classes,
    'Species_pred_prob': ungulate_pred_probs
})

ungulate_end = time.time()
ungulate_time = str(timedelta(seconds=round(ungulate_end - ungulate_start)))
log.update({"Ungulate Model Pred Time" : ungulate_time})

species_shift_start = time.time()
df_species = pd.concat([df_small_carnivore,df_ungulate])
df_species["Species_dir"] = (cropped_dir + "\\" + df_species["Species_pred"]).apply(clean_path)
df_species["Species_level_path"] = (df_species["Species_dir"] + "\\" + df_species["Cropped_image_name"]).apply(clean_path)

df_move = pd.merge(df_species, df_order, on='Cropped_image_name', how='left')
df_move = df_move[df_move["Order_level_path"] != df_move["Species_level_path"]]
unique_directories = set(df_move['Species_dir'])
for directory in unique_directories:
    os.makedirs(directory, exist_ok=True)

move_images_batch(df_move["Order_level_path"], df_move["Species_level_path"])


species_shift_end = time.time()
species_shift_time = str(timedelta(seconds=round(species_shift_end - species_shift_start)))
species_level_time = str(timedelta(seconds=round(species_shift_end - small_carnivores_start)))
log.update({"Species level Shift Imgs Time" : species_shift_time})
log.update({"Species level Predict and Shift" : species_level_time})

First 1000 images copied at 2023-11-08 16:52:50.511847
First 1512 images copied at 2023-11-08 16:52:51.769005
First 2024 images copied at 2023-11-08 16:52:53.200933
First 2536 images copied at 2023-11-08 16:52:54.667669
First 3048 images copied at 2023-11-08 16:52:55.398723
First 1000 images deleted at 2023-11-08 16:53:27.979984
First 1512 images deleted at 2023-11-08 16:53:28.188427
First 2024 images deleted at 2023-11-08 16:53:28.388890
First 2536 images deleted at 2023-11-08 16:53:28.599327
First 3048 images deleted at 2023-11-08 16:53:28.713025


In [24]:
df_final = pd.merge(df_order, df_species, on='Cropped_image_name', how='left')
df_final.drop(columns=['Order_dir', 'Order_level_path','Cropped_image_path'], inplace=True)
df_final_path = os.path.join(data_dir, "predictions.csv")
df_final.to_csv(df_final_path, index=False)

In [27]:
log_file_name = "_".join(data_dir.split("\\")[-3:])
log_file_path = os.path.join(data_dir, f"{log_file_name}_log.json")
with open(log_file_path, "w") as f:
    json.dump(log, f, indent=2)

RKVY_KilodiNadi_2


In [None]:
#### End

In [None]:
unique_directories = set(df_order['Order_dir'])
for directory in unique_directories:
    os.makedirs(directory, exist_ok=True)
for i in range(len(df_order)):
    try:
        shutil.move(df_order["Cropped_image_path"].iloc[i],df_order["Order_level_path"].iloc[i])
    except:
        name = df_order["Cropped_image_name"].iloc[i]
        print(f"{name} not moved")

RKVY_GIB_20230201_152935(8)_2.jpg not moved
RKVY_GIB_20230201_153000(1)_2.jpg not moved
RKVY_GIB_20230201_152748(4)_7.jpg not moved
RKVY_GIB_20230201_152718(8)_6.jpg not moved
RKVY_GIB_20230201_153000(2)_3.jpg not moved
RKVY_GIB_20230201_152754(12)_3.jpg not moved
RKVY_GIB_20230201_152727(4)_7.jpg not moved
RKVY_GIB_20230201_152755(13)_2.jpg not moved
RKVY_GIB_20230201_152747(2)_3.jpg not moved
RKVY_GIB_20230201_153000(1)_1.jpg not moved
RKVY_GIB_20230201_152747(2)_5.jpg not moved
RKVY_GIB_20230201_152717(7)_8.jpg not moved
RKVY_GIB_20230201_152748(3)_4.jpg not moved
RKVY_GIB_20230201_152749(5)_4.jpg not moved
RKVY_GIB_20230201_152727(4)_8.jpg not moved
RKVY_GIB_20230201_152749(5)_3.jpg not moved
RKVY_GIB_20230201_152934(7)_6.jpg not moved
RKVY_GIB_20230201_152747(2)_4.jpg not moved
RKVY_GIB_20230201_152748(3)_3.jpg not moved
RKVY_GIB_20230201_152753(11)_7.jpg not moved
RKVY_GIB_20230201_152748(4)_6.jpg not moved
RKVY_GIB_20230201_152935(8)_1.jpg not moved
RKVY_GIB_20230201_152746(1)_5

In [126]:
os.path.join("\\".join(cropped_dir.split("\\")[:-1]), order)

'E:\\Camera_Trapping\\Guzzler_data\\2023\\CameraTrap\\RKVY\\GIB\\3\\Cropped_images\\*'

In [124]:
# Move Order Level Images

order_shift_start = time.time()
for i, row in df_order.iterrows():
    # print(row)
    order_class = row["Order_pred"]
    cropped_path = os.path.join(row["File_directory"], "Cropped_images", row["Cropped_image_name"])
    animal_dir = os.path.join(row["File_directory"], "Cropped_images",  order_class)
    if not os.path.exists(animal_dir):
        os.makedirs(animal_dir)

    new_path = os.path.join(animal_dir, row["Cropped_image_name"])
    shutil.move(src = cropped_path, 
                dst = new_path)
    
order_shift_end = time.time()
order_shift_time = str(timedelta(seconds=round(order_shift_end - order_shift_start)))
log.update({"Order Shift Imgs Time" : order_shift_time})
print(order_shift_time)

PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'E:\\Camera_Trapping\\Guzzler_data\\2023\\CameraTrap\\RKVY\\GIB\\3\\Cropped_images\\RKVY_GIB_20230201_152931(2)_2.jpg'

In [None]:
#Species level classification


In [35]:
#if not os.path.exists(order_dir):
        #os.makedirs(order_dir)
    #old_img_path = os.path.join("\\".join(cropped_dir.split("\\")[:-1]), cropped_name)
    #shutil.move(cropped_img_path, new_img_path)
    #print(max(pred), animal)
#order_dir = os.path.join("\\".join(cropped_dir.split("\\")[:-1]), order)

48265

In [None]:
import os
sub_directories=[os.path.join(data_dir, d) for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
if not sub_directories:
    sub_directories= [data_dir]
sub_directories