# Setup the workspace

In [None]:
import os
import numpy as np
import torch
from PIL import Image

from torchvision import transforms as tr
# from torchvision.transfroms import Compose

import albumentations as A
import cv2
import random

import matplotlib.pyplot as plt
%matplotlib inline


# output text file with list of paths to the leopards
import pprint
import sys
import glob
from collections import namedtuple

In [None]:
# Load the drive to access the images and annotations
from google.colab import drive
drive.mount('/content/gdrive')

# reduce the path of the drive
def driveSymboLink():
  # this creates a symbolic link so that now the path /content/gdrive/My\ Drive/ is equal to /mydrive
  !ln -s /content/gdrive/My\ Drive/ /mydrive
  # !ls /mydrive

driveSymboLink()

Mounted at /content/gdrive


In [None]:
%cd /content/
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install

/content
Cloning into 'yolov5'...
remote: Enumerating objects: 13071, done.[K
remote: Counting objects: 100% (246/246), done.[K
remote: Compressing objects: 100% (123/123), done.[K
remote: Total 13071 (delta 154), reused 205 (delta 123), pack-reused 12825[K
Receiving objects: 100% (13071/13071), 12.47 MiB | 14.22 MiB/s, done.
Resolving deltas: 100% (8980/8980), done.
/content/yolov5
[K     |████████████████████████████████| 596 kB 15.6 MB/s 
[?25h

In [None]:
def visualize(image):
    plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.imshow(image)

# Find sets
There are three possible sets: images that appear in both, our ideal set and the inference from our trained model; the ideal set which includes 64 carefully detected classes, and the infernece set which considers the constraints of the ideal set:
- There must be 7 or more images for flank, face and full body.
- They must be detected at high confidence (0.90).

We create these three sets and compare the False Negatives (our model didn't detect them but they were in the ideal set) or False positives, meaning, our model detected them but they didn't show up in the ideal set.

In [None]:
# https://drive.google.com/drive/folders/12t0C5P3KWnj0uiqthSA-OTiJcWDpZ8gA
# https://drive.google.com/drive/folders/1UVcB_AMN8XYhGGZzTfUITMffyxYMyNud
#  - ideal -> resize -> (test, train) -> (leop_N) -> (face, flank, full) -> (leop_N_XXXXXXX)


# /content/gdrive/MyDrive/MIDS/W210/ideal
# /content/gdrive/MyDrive/MIDS/W210/inference

# https://drive.google.com/drive/folders/1-ja3xff3KaiZ0Y3lPQw1qxLLXSPoAnzl
#  inference -> test -> images -> (leop_X_YYYYYYY)
#  inference -> train -> (cropped_images_AAA) -> (leop_N) -> (face, flank, full) -> (leop_N_XXXXXXX)

# compare 
#  ideal.test -> inference.test                                 --> find the missing leopards
#  ideal.train -> inference.train.cropped_images_(90, 90x, 95)  --> find the missing leopards


In [None]:

ideal_root = "/content/gdrive/MyDrive/MIDS/W210/ideal/resize"
inference_root = "/content/gdrive/MyDrive/MIDS/W210/inference"

def folder_is_leop_folder(folder_name, parent_path = ""):
  # return folder_name[0:5] == "leop_"

  if folder_name[0:5] != "leop_":
    return False
  
  child_dir = f"{parent_path }{folder_name}/full"

  if not os.path.isdir(child_dir):
    print(f"missing path - {child_dir}")
    return False

  child_count = os.listdir(child_dir)


  return len(child_count) >= 7



def leop_id_from_folder_name(folder_name):
  return int(folder_name.split("_")[1])

def leop_id_folders(folder_path):
  return set([leop_id_from_folder_name(x) for x in os.listdir(folder_path) if folder_is_leop_folder(x, folder_path)])

def leop_id_from_images_in_folder(folder_path):
  return set([leop_id_from_folder_name(x) for x in os.listdir(folder_path) if folder_is_leop_folder(x)])


def missing_leops(ideal_folder, inference_folder):
  out = {}
  inf_leops = leop_id_folders(inference_folder)

  ideal_leops = leop_id_folders(ideal_folder)
  out["both"] = inf_leops.intersection(ideal_leops)
  out["inference"] = inf_leops.difference(ideal_leops)
  out["ideal"] = ideal_leops.difference(inf_leops)

  return out


folders='train'
ideal_missing={}

# for split in folders:
ideal_test_leop_folder = f"/content/gdrive/MyDrive/MIDS/W210/ideal/resize/{folders}/"
inference_test_leop_folder = f"/content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/{folders}/_resized_95/"

missing=missing_leops(ideal_test_leop_folder, inference_test_leop_folder)
# ideal_missing[split]=missing

# missing['ideal']
missing["inference"]

missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_101/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_105/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_110/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_124/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_134/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_136/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_140/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_145/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopard_NI_1/train/_resized_95/leop_150/full
missing path - /content/gdrive/MyDrive/MIDS/W210/inference/leopa

{48,
 248,
 252,
 254,
 257,
 258,
 259,
 260,
 263,
 265,
 266,
 267,
 269,
 271,
 272,
 273,
 278,
 281,
 284,
 285,
 286,
 288,
 289,
 294,
 296}

In [None]:
len(missing["inference"])

25

In [None]:

test_root='/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/test/images'
img_test = os.listdir(test_root)

['leop_1_000008.jpg',
 'leop_1_000012.jpg',
 'leop_1_000013.jpg',
 'leop_1_000034.jpg',
 'leop_1_000035.jpg',
 'leop_1_000036.jpg',
 'leop_1_000044.jpg',
 'leop_1_000056.jpg',
 'leop_1_000063.jpg',
 'leop_1_000070.jpg',
 'leop_1_000074.jpg',
 'leop_1_000089.jpg',
 'leop_1_000105.jpg',
 'leop_1_000114.jpg',
 'leop_1_000117.jpg',
 'leop_1_000119.jpg',
 'leop_1_000121.jpg',
 'leop_0_000127.jpg',
 'leop_3_000128.jpg',
 'leop_4_000140.jpg',
 'leop_4_000151.jpg',
 'leop_6_000183.jpg',
 'leop_7_000187.jpg',
 'leop_7_000198.jpg',
 'leop_7_000199.jpg',
 'leop_7_000220.jpg',
 'leop_7_000227.jpg',
 'leop_3_000229.jpg',
 'leop_6_000243.jpg',
 'leop_8_000248.jpg',
 'leop_8_000249.jpg',
 'leop_8_000250.jpg',
 'leop_8_000253.jpg',
 'leop_9_000257.jpg',
 'leop_9_000265.jpg',
 'leop_10_000273.jpg',
 'leop_10_000274.jpg',
 'leop_10_000281.jpg',
 'leop_11_000293.jpg',
 'leop_12_000315.jpg',
 'leop_12_000379.jpg',
 'leop_12_000390.jpg',
 'leop_12_000396.jpg',
 'leop_12_000397.jpg',
 'leop_12_000406.jpg',


In [None]:
len(missing['inference'])

190

# Images not found in ideal
We must run the detection on the images not found in ideal and set a lower confidence to see what the model is finding, if anything. We chose for this analysis a confidence of 0.5.

In [None]:
import re

raw_train='/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images'
WEIGHTS='/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/detection/610-91-V3-RF/det-610-91-def-aug-ni-rf-v3-m66/weights/best.pt'

imgs_missing_ideal=[]
for element in missing['inference']:
  raw_images=os.listdir(raw_train)
  for img in raw_images:
    if f"leop_{element}_" in img:
      # print(img)
      img_path=raw_train+f"/{img}"
      imgs_missing_ideal.append(img_path)
# print(imgs_missing_ideal)



In [None]:
missing_file='/content/missing.txt'
with open(missing_file, 'w') as f:
  for file_name in imgs_missing_ideal:
    f.write(f'{file_name}\n')

In [None]:
missing_file='/content/missing.txt'
with open(missing_file,'r') as f:
  data=f.readlines()
data

['/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003510.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003511.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003512.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003517.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003518.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003519.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003520.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003521.jpg\n',
 '/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003522.

In [None]:
%cd /content/
!mkdir /content/leop_missing


/content


In [None]:
data[0][:-1]

'/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_106_002296.jpg'

In [None]:
def find_img_name(text):
  print(text)
  return re.search("(leop)+_([^_]+)_[^_]+$", text).group(0)

print(find_img_name(data[0][:-1]))

/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003510.jpg
leop_257_003510.jpg


In [None]:
not_found=[]
print(f"There are a total of {len(data)} images")
for ix,img in enumerate(data):
    name=find_img_name(img[:-1])
    destination=f"/content/leop_missing/{name}"
    try:
      source=img[:-1]
      !cp $source $destination
    except:
      not_found.append(img)
print(f"There are {len(not_found)} images missing in the train folder")

There are a total of 1553 images
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003510.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003511.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003512.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003517.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003518.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003519.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003520.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003521.jpg
/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/datasets/leopard/train/images/leop_257_003522.jpg
/content/gdri

In [None]:
# first=imgs_missing_ideal[0]
# print(first)
%cd yolov5
!python detect.py --weights $WEIGHTS --img 640 --conf 0.90 --source /content/leop_missing
# display.Image(filename='runs/detect/exp/zidane.jpg', width=600)

/content/yolov5
[34m[1mdetect: [0mweights=['/content/gdrive/MyDrive/MIDS/W210/Animal_Identification/detection/610-91-V3-RF/det-610-91-def-aug-ni-rf-v3-m66/weights/best.pt'], source=/content/leop_missing, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.9, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.1-316-g916bdb1 Python-3.7.13 torch-1.12.0+cu113 CPU

Fusing layers... 
Model summary: 378 layers, 35266236 parameters, 0 gradients, 48.9 GFLOPs
image 1/1553 /content/leop_missing/leop_248_003328.jpg: 512x640 Done. (0.984s)
image 2/1553 /content/leop_missing/leop_248_003329.jpg: 512x640 Done. (0.978s)
image 3/1553 /content/leop_missing/leop_248_003330.jpg: 512x640 Done. (0.980s)
image 4/1553 /conten

In [None]:
samp=0
for img in os.listdir('/content/yolov5/runs/detect/exp'):
    if samp%20==0:

      image = cv2.imread(f'/content/yolov5/runs/detect/exp/{img}')
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

      visualize(image)
      samp+=1
    else:
      samp+=1

Output hidden; open in https://colab.research.google.com to view.