In [54]:
!pip install opencv-python


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [55]:
import os 
import json
import time
import requests
import requests
from zipfile import ZipFile
import tarfile
from shutil import copyfile
from dataclasses import dataclass
import yaml
import glob
import random
import numpy as np
import pandas as pd
import cv2
from ultralytics import YOLO
import matplotlib.pyplot as plt


In [56]:
def download_unzip(url,save_path):
    print("Downloading and extracting assets...",end="")
    file = requests.get(url)
    open(save_path,"wb").write(file.content)

    try:
        #Extract files
        if save_path.endswith(".tar"):
            with tarfile.open(save_path,"r") as tar:
                tar.extractall(os.path.split(save_path)[0])

        print("Done")
    except:
        print("Invalid file")


In [57]:
IMAGES_URL = r"http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar"
IMAGES_DIR = "Images"
IMAGES_TAR_PATH = os.path.join(os.getcwd(), f"{IMAGES_DIR}.tar")

ANNS_METADATA_URL = r"https://github.com/benjiebob/StanfordExtra/raw/master/keypoint_definitions.csv"
ANNS_METADATA = "keypoint_definitions.csv"

# Download if dataset does not exists.
if not os.path.exists(IMAGES_DIR):
    download_unzip(IMAGES_URL, IMAGES_TAR_PATH)
    os.remove(IMAGES_TAR_PATH)

if not os.path.isfile(ANNS_METADATA):
    download_unzip(ANNS_METADATA_URL, ANNS_METADATA)


In [58]:
ANN_PATH = "StanfordExtra_V12"
JSON_PATH = os.path.join(ANN_PATH, "StanfordExtra_v12.json")

with open(JSON_PATH) as file:
    json_data = json.load(file)

In [59]:
train_ids = np.load(os.path.join(ANN_PATH,
                                 "train_stanford_StanfordExtra_v12.npy"))
val_ids = np.load(os.path.join(ANN_PATH,
                               "test_stanford_StanfordExtra_v12.npy"))

print(f"Train Samples: {len(train_ids)}")
print(f"Validation Samples: {len(val_ids)}")

Train Samples: 6773
Validation Samples: 1703


In [60]:
print(type(train_ids))
print(train_ids)
train_json_data = []
for train_id in train_ids:
    train_json_data.append(json_data[train_id])

val_json_data = []
for val_id in val_ids:
    val_json_data.append(json_data[val_id])


<class 'numpy.ndarray'>
[ 910  917  920 ... 8977 9745 9742]


In [61]:
DATA_DIR = "animal-pose-data"

TRAIN_DIR         = f"train"
TRAIN_FOLDER_IMG    = f"images"
TRAIN_FOLDER_LABELS = f"labels"

TRAIN_IMG_PATH   = os.path.join(DATA_DIR, TRAIN_DIR, TRAIN_FOLDER_IMG)
TRAIN_LABEL_PATH = os.path.join(DATA_DIR, TRAIN_DIR, TRAIN_FOLDER_LABELS)

VALID_DIR           = f"valid"
VALID_FOLDER_IMG    = f"images"
VALID_FOLDER_LABELS = f"labels"

VALID_IMG_PATH   = os.path.join(DATA_DIR, VALID_DIR, VALID_FOLDER_IMG)
VALID_LABEL_PATH = os.path.join(DATA_DIR, VALID_DIR, VALID_FOLDER_LABELS)

os.makedirs(TRAIN_IMG_PATH, exist_ok=True)
os.makedirs(TRAIN_LABEL_PATH, exist_ok=True)
os.makedirs(VALID_IMG_PATH, exist_ok=True)
os.makedirs(VALID_LABEL_PATH, exist_ok=True)


In [62]:
train_json_data = []
for train_id in train_ids:
    train_json_data.append(json_data[train_id])

val_json_data = []
for val_id in val_ids:
    val_json_data.append(json_data[val_id])

In [63]:
for data in train_json_data:
    img_file = data["img_path"]
    filename = img_file.split("/")[-1]
    copyfile(os.path.join(IMAGES_DIR, img_file),
             os.path.join(TRAIN_IMG_PATH, filename))


for data in val_json_data:
    img_file = data["img_path"]
    filename = img_file.split("/")[-1]
    copyfile(os.path.join(IMAGES_DIR, img_file),
             os.path.join(VALID_IMG_PATH, filename))

In [64]:
CLASS_ID = 0

In [65]:
def create_yolo_boxes_kpts(img_size,boxes,lm_kpts):
    IMG_W,IMG_H  = img_size
    #modifying the kpts with visibilities as  1s to 2s
    vis_ones = np.where(lm_kpts[:, -1] == 1.)
    lm_kpts[vis_ones, -1] = 2.
    #normalizing factor for boxes and kpts.
    res_box_array = np.array([IMG_W , IMG_H,IMG_W,IMG_H])
    res_lm_array = np.array([IMG_W, IMG_H])
    #NORMALIZE LANDMARKS IN THE RANGE[0,1]
    norms_kps_per_img = lm_kpts.copy()
    norms_kps_per_img[:,:-1]= norms_kps_per_img[:, :-1] / res_lm_array
    norm_bbox_per_img = boxes /res_box_array
    
    yolo_boxes = norm_bbox_per_img.copy()
    yolo_boxes[:2] = norm_bbox_per_img[:2] + norm_bbox_per_img[2:]/2
    return yolo_boxes, norm_bbox_per_img

In [66]:
def create_yolo_txt_files(json_data, LABEL_PATH):
    for data in json_data:
        IMAGE_ID =data["img_path"].split("/")[-1].split(".")[0]
        IMG_WIDTH, IMG_HEIGHT = data["img_width"], data["img_height"]
        #converting joints into numpy array with float32 for less memory usage
        landmarks_kpts = np.nan_to_num(np.array(data["joints"], dtype=np.float32))#nan_to_num is function to convert the nan values to any numeric form.
        landmarks_bboxes = np.array(data["img_bbox"],dtype=np.float32)#img_bbox=value representing bounding box coordinates
        bboxes_yolo, kpts_yolo = create_yolo_boxes_kpts(
            (IMG_WIDTH,IMG_HEIGHT),
            landmarks_bboxes,
            landmarks_kpts)
        TXT_FILE = IMAGE_ID+".txt"
        with open(os.path.join(LABEL_PATH, TXT_FILE),"w") as f:
            """The bounding box contains four value which is (centre x,centre y,width ,height)
            which is normalized to 0 and 1 
            .flatten converts the kpt array to 1Darray"""
            x_c_norm, y_c_norm ,box_width_norm, box_height_norm = (round(bboxes_yolo[0],5),
                                                                  round(bboxes_yolo[1],5),
                                                                  round(bboxes_yolo[2],5),
                                                                  round(bboxes_yolo[3],5),)
            kps_flattend = [round(ele,5) for ele in kpts_yolo.flatten().tolist()]
            line = f"{CLASS_ID} {x_c_norm} {y_c_norm} {box_width_norm} {box_height_norm}"
            line+= " ".join(map(str, kps_flattend))#converts kpt to string and join them in one single string
            f.write(line)


In [67]:
create_yolo_txt_files(train_json_data,TRAIN_LABEL_PATH)
create_yolo_txt_files(val_json_data,VALID_LABEL_PATH)

Data Visualization

In [68]:
ann_meta_data = pd.read_csv("keypoint_definitions.csv")
COLORS = ann_meta_data["Hex colour"].values.tolist()
COLORS_RGB_MAP =[]
for color in COLORS:
    R, G, B = int(color[:2], 16), int(color[2:4], 16), int(color[4:], 16)#converting hex to decimal
    COLORS_RGB_MAP.append({color: (R,G,B)})
    

In [69]:
train_images = os.listdir(TRAIN_IMG_PATH)
valid_images = os.listdir(VALID_IMG_PATH)

print(f"Training images: {len(train_images)}, Validation Images: {len(valid_images)}")

Training images: 6773, Validation Images: 1703
