In [None]:
!pip install -U insightface onnxruntime opencv-python-headless matplotlib patool
!apt-get install -y unrar rar         # tools for .zip/.rar handling


In [None]:
import shutil, os
for d in (WORK_DIR, OUTPUT_DIR):
    shutil.rmtree(d, ignore_errors=True)   # delete folder & contents
    os.makedirs(d, exist_ok=True)          # recreate it empty
    print(f"cleared {d}")


In [None]:
import os
import cv2
import math
import shutil
import concurrent.futures
import numpy as np
import patoolib

from insightface.app import FaceAnalysis
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# EDIT THESE FOUR PATHS
ARCHIVE_INPUT_PATH    = "" # Input Path
RAW_EXTRACTION_FOLDER = "/content/crop test_raw"         # extraction target
EYE_CROP_OUTPUT_FOLDER= "/content/crop test_ocular"      # eye-crop folder
ARCHIVE_OUTPUT_PATH   = "" # Output Path

# Fresh start – clear & extract
for folder_path in (RAW_EXTRACTION_FOLDER, EYE_CROP_OUTPUT_FOLDER):
    shutil.rmtree(folder_path, ignore_errors=True)
    os.makedirs(folder_path, exist_ok=True)

print("Extracting archive …")
patoolib.extract_archive(ARCHIVE_INPUT_PATH, outdir=RAW_EXTRACTION_FOLDER, verbosity=-1)

# Dive into the single top-level folder if present
top_level_entries = os.listdir(RAW_EXTRACTION_FOLDER)
if len(top_level_entries) == 1:
    single_entry_path = os.path.join(RAW_EXTRACTION_FOLDER, top_level_entries[0])
    if os.path.isdir(single_entry_path):
        RAW_EXTRACTION_FOLDER = single_entry_path

# Initialise InsightFace (GPU ctx_id=0)
face_app = FaceAnalysis(name='buffalo_l', allowed_modules=['detection'])
face_app.prepare(ctx_id=0, det_size=(640, 640))

# Parameters
NUM_IO_THREADS        = 4
VALID_IMAGE_EXTENSIONS= ('.jpg', '.jpeg', '.png')
MIN_FRONTALITY_RATIO  = 0.40   # frontal-ness filter threshold
MAX_TILT_ANGLE_DEG    = 5      # degrees before in-plane rotation
EYE_CROP_SCALE_FACTOR = 1.0    # eye-crop size factor
INITIAL_CROP_SIZE     = 224    # first resize before final crop
FINAL_CROP_SIZE       = 112    # final resize requested

# Eye‐crop + save helper
def save_eye_patches(face, original_image, output_subfolder, filename):
    # Frontalness test using keypoints: left eye, right eye, nose
    left_eye, right_eye, nose = face.kps[:3]
    dist_left  = abs(nose[0] - left_eye[0])
    dist_right = abs(right_eye[0] - nose[0])
    max_dist   = max(dist_left, dist_right)
    min_dist   = min(dist_left, dist_right)
    if max_dist == 0:
        return 0

    frontal_ratio = min_dist / max_dist
    if frontal_ratio < MIN_FRONTALITY_RATIO:
        return 0

    # Check tilt angle of eyes and align if needed
    dx = right_eye[0] - left_eye[0]
    dy = right_eye[1] - left_eye[1]
    tilt_angle = math.degrees(math.atan2(dy, dx))
    eyes_center = ((left_eye[0] + right_eye[0]) * 0.5,
                   (left_eye[1] + right_eye[1]) * 0.5)

    if abs(tilt_angle) > MAX_TILT_ANGLE_DEG:
        rotation_matrix = cv2.getRotationMatrix2D(eyes_center, tilt_angle, 1.0)
        h, w = original_image.shape[:2]
        aligned = cv2.warpAffine(original_image, rotation_matrix, (w, h))
        transformed = cv2.transform(np.expand_dims(face.kps[:, :2], 0), rotation_matrix)[0]
        left_eye, right_eye = transformed[:2]
        working_image = aligned
    else:
        working_image = original_image

    # Dynamic crop around each eye
    eye_distance = np.linalg.norm(right_eye - left_eye)
    extent      = max(int(eye_distance * EYE_CROP_SCALE_FACTOR) // 2, 5)
    h, w        = working_image.shape[:2]

    def crop_and_save(center, tag):
        cx, cy = map(int, center)
        x1 = max(cx - extent, 0)
        y1 = max(cy - extent, 0)
        x2 = min(cx + extent, w)
        y2 = min(cy + extent, h)
        roi = working_image[y1:y2, x1:x2]
        if roi.size == 0:
            return 0

        # resize to initial and then final
        final = cv2.resize(roi, (FINAL_CROP_SIZE, FINAL_CROP_SIZE), interpolation=cv2.INTER_AREA)

        out_name = f"{tag}_{filename}"
        cv2.imwrite(os.path.join(output_subfolder, out_name), final)
        return 1

    # save both left and right eye crops
    return crop_and_save(left_eye, "left") + crop_and_save(right_eye, "right")


# Threaded disk reader
io_thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_IO_THREADS)

def read_image_from_disk(file_path):
    return file_path, cv2.imread(file_path)

def iterate_images_in_folder(folder_path):
    all_files = os.listdir(folder_path)
    image_paths = [
        os.path.join(folder_path, entry)
        for entry in all_files
        if entry.lower().endswith(VALID_IMAGE_EXTENSIONS)
    ]
    for file_path, image in io_thread_pool.map(read_image_from_disk, image_paths):
        if image is not None:
            yield os.path.basename(file_path), image

# Main loop – walk only leaf dirs, detect & crop
total_saved_patches = 0

for root_folder, subfolders, _ in os.walk(RAW_EXTRACTION_FOLDER):
    # only process leaf directories (no subfolders)
    if subfolders:
        continue

    relative_folder = os.path.relpath(root_folder, RAW_EXTRACTION_FOLDER)
    output_subfolder = os.path.join(EYE_CROP_OUTPUT_FOLDER, relative_folder + "_eyes")
    os.makedirs(output_subfolder, exist_ok=True)

    saved_in_folder = 0
    for filename, image in iterate_images_in_folder(root_folder):
        detected_faces = face_app.get(image)
        if detected_faces:
            saved_in_folder += save_eye_patches(detected_faces[0], image,
                                                output_subfolder, filename)

    total_saved_patches += saved_in_folder
    print(f"{relative_folder}: {saved_in_folder} eye images")

print(f"\nALL DONE – total eye files saved: {total_saved_patches}")

# Final compression to RAR
print("\nCompressing results …")
# Note: using shell magic for brevity; this is equivalent to subprocess.run call
!rar a -r -ep1 "{ARCHIVE_OUTPUT_PATH}" "{EYE_CROP_OUTPUT_FOLDER}"
print(f"RAR saved to:\n{ARCHIVE_OUTPUT_PATH}")
