In [1]:
##### Give data directory and run all the rows

data_dir=r"E:\Camera_Trapping\Guzzler_data\2023\CameraTrap\RKVY\GIB\2"

In [2]:
import os
sub_directories=[os.path.join(data_dir, d) for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
if not sub_directories:
    sub_directories= [data_dir]
sub_directories

['E:\\Camera_Trapping\\Guzzler_data\\2023\\CameraTrap\\RKVY\\GIB\\2']

In [3]:
import json
from datetime import datetime, timedelta
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("No GPUs available.")
import pandas as pd
import numpy as np
from PIL import Image
import cv2
import os
import shutil
import time
import sys
from pathlib import Path
from subprocess import Popen, PIPE 
import re
import subprocess
import concurrent.futures
import threading
IMG_SIZE = (224,224)

def check_existing_file(img_dir):
    output_file_path = os.path.join(img_dir, "output.json")
    if os.path.exists(output_file_path):
        return 1
    return 0

def megadetector(img_dir, num_images):
    print("Megadetector model")

    local_detector_path = os.path.join(os.getcwd(), "cameratraps", "detection", "run_detector_batch.py")
    megadetector_path = os.path.join(os.getcwd(), "md_v5a.0.0.pt")
    json_dir = os.path.join(img_dir, "output.json")

    if check_existing_file(img_dir) == 1:
        # self.output_label.configure(text = f"Megadetector output file already exists.. Going for species classification")
        print("Megadetector output file already exists.. Going for species classification")
        return json_dir
    
    print(local_detector_path, megadetector_path, json_dir)

    command = [sys.executable,
                local_detector_path,
                megadetector_path,
                img_dir,
                json_dir,
                "--recursive"]

    with Popen(command,
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, shell=True,
            universal_newlines=True) as p:
        for line in p.stdout:
            if line.startswith("Loaded model in"):
                print(line)
            
            elif "%" in line[0:4]:
                percentage = re.search("\d*%", line[0:4])[0][:-1]
                
            print(line)
    
    print("Bounding Boxes Created")

    return json_dir

def get_detection_df(img_path, json_dir):
    print("Generating detections.csv...")

    with open(json_dir, 'r') as f:
        data = json.load(f)
        df = pd.DataFrame(data["images"])

    records = []
    for i, row in df.iterrows():
        filepath = row["file"]
        filename = os.path.splitext(os.path.basename(filepath))[0]
        detections = row["detections"]
        for j, detection in enumerate(detections):
            area = detection["bbox"][2] * detection["bbox"][3]
            y_position = detection["bbox"][1]
            if (
                detection["conf"] > 0.1
                and area > 0.001
                and not (area <= 0.01 and y_position > 0.6)
            ):
                if detection["category"] == '1':
                    category = "Animal"
                elif detection["category"] == '2':
                    category = "Person"
                else:
                    category = "Vehicle"

                records.append(
                    {
                        "Filepath": filepath,
                        "Filename": filename,
                        "Detection_number": j + 1,
                        "Category": category,
                        "Detection_Confidence": detection["conf"],
                        "Detection_bbox": detection["bbox"],
                    }
                )

    new_df = pd.DataFrame(records)
    new_df["File_directory"] = new_df["Filepath"].apply(os.path.dirname)
    df_path = os.path.join(img_path, "detections.csv")
    new_df.to_csv(df_path, index=False)
    
    small_obj_df = new_df[new_df["Category"] == "Small Object"]
    if not small_obj_df.empty:
        small_obj_df_path = os.path.join(img_path, "small_objects.csv")
        small_obj_df.to_csv(small_obj_df_path, index=False)

    return new_df
    
def crop_img(img_dir, bbox):
    img = Image.open(img_dir)
    x,y,w,h = tuple(i for i in bbox)
    mul_x = img.size[0]
    mul_y = img.size[1]
    w = w * mul_x
    h = h * mul_y
    x1 = x * mul_x
    x2 = x * mul_x + w
    y1 = y * mul_y
    y2 = y * mul_y + h
    cropped = img.crop((x1,y1,x2, y2))
    return cropped

def run_order_level_model(pred_img, order_level_model):
    pred = order_level_model.predict(pred_img)
    pred = np.squeeze(pred)
    order_level_pred_prob = round(np.max(pred,axis=-1), 2)
    if order_level_pred_prob >= 0.8:
        pred_class = order_level_class_names[np.argmax(pred,axis=-1)]
    else:
        pred_class = "Others" 
    return pred_class, order_level_pred_prob

def run_species_level_model(pred_img, order_pred, ungulate_model, small_carnivore_model):
    if order_pred == "Ungulate":
        species_model = ungulate_model
        species_class_names=ungulate_class_names
    else:
        species_model = small_carnivore_model
        species_class_names=small_carnivores_class_names
    pred = species_model.predict(pred_img)
    pred = np.squeeze(pred)
    species_pred_prob= round(np.max(pred,axis=-1),2)
    if species_pred_prob >= 0.8:
        pred_class = species_class_names[np.argmax(pred,axis=-1)]
    else:
        pred_class = order_pred
    return pred_class,species_pred_prob

def process_images(df,models):
    order_preds = []
    order_pred_probs = []
    species_preds=[]
    species_pred_probs=[]
    
    for i, row in df.iterrows():
        filepath = row["Filepath"]
        filename = row["Filename"]
        detection = row["Detection_number"]
        bbox = row["Detection_bbox"]
        category = row["Category"]
        directory = row["File_directory"]
        
        cropped_img = crop_img(filepath, bbox)
        pred_img = np.array(cropped_img)
        pred_img = tf.image.resize(pred_img, size= IMG_SIZE,method = "area")
        pred_img = tf.expand_dims(pred_img, axis=0)
        
        order_pred = category
        order_pred_prob = np.nan
        species_pred = category
        species_pred_prob = np.nan

        if category not in ["Vehicle", "Person"]:
            order_pred, order_pred_prob = run_order_level_model(pred_img, models[0])
            if order_pred in ["Ungulate", "Small Carnivore"]:
                species_pred, species_pred_prob = run_species_level_model(pred_img, order_pred, models[1], models[2])
            else:
                species_pred = order_pred
            
        order_preds.append(order_pred)
        order_pred_probs.append(order_pred_prob)
        species_preds.append(species_pred)
        species_pred_probs.append(species_pred_prob)
        
        cropped_dir = os.path.join(directory,f"Cropped_images\\{species_pred}")
        os.makedirs(cropped_dir, exist_ok=True)
        cropped_img.save(os.path.join(cropped_dir,f"{filename}_{detection}.JPG"))
        
    df["Order_pred"] = order_preds
    df["Order_pred_prob"] = order_pred_probs
    df["Species_pred"] = species_preds
    df["Species_pred_prob"] = species_pred_probs

    df["Cropped_image_dir"] = df["File_directory"] + "\\Cropped_images\\" + df["Species_pred"]
    df["Detection_number"] = df["Detection_number"].astype(str)
    df["Cropped_image_name"] = df["Filename"] + "_" + df["Detection_number"]
    df["Cropped_image_path"] = df["Cropped_image_dir"] + "\\" + df["Cropped_image_name"] + ".JPG"
    df=df.reset_index(drop=True)
    return df


# Define a lock for thread safety when modifying shared data
lock = threading.Lock()

# Define the function for processing a single image
def process_one_image(row, models):
    filepath = row[0]
    filename = row[1]
    detection = row[2]
    bbox = row[5]
    category = row[3]
    directory = row[6]
    
    order_level_model, ungulate_model, small_carnivore_model = models
    cropped_img = crop_img(filepath, bbox)
    pred_img = np.array(cropped_img)
    pred_img = tf.image.resize(pred_img, size=IMG_SIZE, method="area")
    pred_img = np.expand_dims(pred_img, axis=0)

    order_pred = category
    order_pred_prob = np.nan
    species_pred = category
    species_pred_prob = np.nan

    if category not in ["Vehicle", "Person"]:
        order_pred, order_pred_prob = run_order_level_model(pred_img, models[0])
        if order_pred in ["Ungulate", "Small Carnivore"]:
            species_pred, species_pred_prob = run_species_level_model(pred_img, order_pred, models[1], models[2])
        else:
            species_pred = order_pred

    cropped_dir = os.path.join(directory, f"Cropped_images\\{species_pred}")
    os.makedirs(cropped_dir, exist_ok=True)
    cropped_img.save(os.path.join(cropped_dir, f"{filename}_{detection}.JPG"))

    # Ensure thread safety when modifying shared data
    with lock:
        return order_pred, order_pred_prob, species_pred, species_pred_prob

def process_images_batch(df, batch_size, models):
    order_preds = []
    order_pred_probs = []
    species_preds = []
    species_pred_probs = []
    
    results = []
    
    # Split the DataFrame into batches
    num_batches, remainder = divmod(len(df), batch_size)
    
    for i in range(num_batches):
        batch = df[i * batch_size: (i + 1) * batch_size]
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(process_one_image, row, models) for row in batch.itertuples(index=False, name=None)]
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            results.append(result)

    if remainder > 0:
        remaining_data = df[num_batches * batch_size:]
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(process_one_image, row, models) for row in remaining_data.itertuples(index=False, name=None)]
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            results.append(result)
        
        #executor = ThreadPoolExecutor(max_workers=4)  # Adjust max_workers as needed
        # List of inputs (assuming 'remaining_data' is an iterable)
        #list_of_inputs = [(row, models) for row in remaining_data.itertuples(index=False, name=None)]
        # Use executor.map to apply the function to inputs in parallel
        #results = list(executor.map(process_one_image, list_of_inputs))
        # Close the executor when done
        #executor.shutdown()

    for result in results:
        order_pred, order_pred_prob, species_pred, species_pred_prob = result
        order_preds.append(order_pred)
        order_pred_probs.append(order_pred_prob)
        species_preds.append(species_pred)
        species_pred_probs.append(species_pred_prob)

    df["Order_pred"] = order_preds
    df["Order_pred_prob"] = order_pred_probs
    df["Species_pred"] = species_preds
    df["Species_pred_prob"] = species_pred_probs

    df["Cropped_image_dir"] = df["File_directory"] + "\\Cropped_images\\" + df["Species_pred"]
    df["Detection_number"] = df["Detection_number"].astype(str)
    df["Cropped_image_name"] = df["Filename"] + "_" + df["Detection_number"]
    df["Cropped_image_path"] = df["Cropped_image_dir"] + "\\" + df["Cropped_image_name"] + ".JPG"
    return df

log = {}
now = datetime.now()
log.update({"Run timestamp" : str(now)})
num_images = 0
for _,_,files in os.walk(data_dir):
    num_images += len(files) 
log.update({"Num images" : num_images})

In [4]:
#### Load models

models = []

model_load_start=time.time()
order_level_model_path = os.path.join(os.getcwd(), r"Models\Refined_Hierarchical.ckpt")
order_level_model = tf.keras.models.load_model(order_level_model_path)
models.append(order_level_model)
order_level_class_names = ["GIB", "Goat_Sheep", "Hare", "Human", "Raptor", "Small Bird", "Small Carnivore", "Ungulate", "Vehicle", "Wild Pig"]
order_level_class_names.sort()

ungulate_model_path = os.path.join(os.getcwd(), r"Models\Efficient_Net_Ungulates_3.ckpt")
ungulate_model = tf.keras.models.load_model(ungulate_model_path)
models.append(ungulate_model)
ungulate_class_names = ["Camel", "Chinkara", "Nilgai", "Cattle"]
ungulate_class_names.sort()     

small_carnivore_model_path = os.path.join(os.getcwd(), r"Models\Efficient_Net_Small_Carnivores_1.ckpt")
small_carnivore_model= tf.keras.models.load_model(small_carnivore_model_path)
models.append(small_carnivore_model)
small_carnivores_class_names = ["Dog", "Desert Cat", "Fox"]
small_carnivores_class_names.sort()

model_load_end = time.time()
model_load_time = str(timedelta(seconds=round(model_load_end - model_load_start)))
log.update({"Model_load_time" : model_load_time})
print(model_load_time)

0:00:59


In [6]:
##### Common command for running all models

def detect_and_classify(data_dir, models):
    ##### Code for running megadetector
    megadetector_start = time.time()
    json_dir = megadetector(data_dir, num_images)
    df_detections = get_detection_df(data_dir, json_dir)
    megadetector_end = time.time()
    megadetector_time = str(timedelta(seconds=round(megadetector_end - megadetector_start)))
    log.update({"Megadetector_time" : megadetector_time})
    
    #### Code for animal classification on top of detections
    image_classification_start = time.time()
    df_final = process_images(df_detections, models)
    image_classification_end = time.time()
    image_classification_time = str(timedelta(seconds=round(image_classification_end - image_classification_start)))
    log.update({"Image_classification_time" : image_classification_time})
    df_path = os.path.join(data_dir, "predictions.csv")
    df_final.to_csv(df_path, index=False)    
    return df_final

In [None]:
start = time.time()
for d in sub_directories:
    df_final = detect_and_classify(d, models)
end = time.time()
duration = str(timedelta(seconds=round(end - start)))

In [11]:
print(len(df_final))#duration)

80906


3.745648148148148

In [None]:
##### Code for running megadetector

megadetector_start = time.time()
json_dir = megadetector(data_dir, num_images)
df_detections = get_detection_df(data_dir, json_dir)
megadetector_end = time.time()
megadetector_time = str(timedelta(seconds=round(megadetector_end - megadetector_start)))
log.update({"Megadetector_time" : megadetector_time})
print(megadetector_time)

In [None]:
#### Code for animal classification on top of detections

image_classification_start = time.time()
df_final = process_images_batch(df_detections,32)
image_classification_end = time.time()
image_classification_time = str(timedelta(seconds=round(image_classification_end - image_classification_start)))
log.update({"Image_classification_time" : image_classification_time})
print(image_classification_time)

In [10]:
print(model_load_time)
print(megadetector_time)
print(image_classification_time)
print(len(df_final))

0:00:59


NameError: name 'megadetector_time' is not defined

In [None]:
df_final2['area'] = df_final['Detection bbox'].apply(lambda x: x[2] * x[3])
df_final2['y_position'] = df_final['Detection bbox'].apply(lambda x: x[1])