When we have cropped hand gesture images saved, they are all of different sizes. We want them to have an optimal size so that it is: 
1. Big enough to capture important hand-shape details
2. Small enough to train fast
3. Consistent across the dataset
    
So we chose to look at the distribution of all our image dimensions and choose a size close to the *median* or *mean*, then round to a CNN-friendly size (like 64, 96, 128). 
- *Motivation to this is:* Powers of 2 and Divisibility - many of these numbers are powers of 2 (64, 128, 256, which is close to 224 in practical terms) or easily divisible by 32. This is crucial because standard CNN architectures use multiple layers of pooling operations that typically reduce the image dimensions by half at each stage.

In [3]:
import os
import cv2
import numpy as np

input_folder = "./mediapipe_cropper/gestures_processed/train_point_up/"

widths = []
heights = []
count = 0

for filename in os.listdir(input_folder):
    img = cv2.imread(os.path.join(input_folder, filename))
    h, w = img.shape[:2]
    widths.append(w)
    heights.append(h)

print("Mean width: ", np.mean(widths))
print("Mean height: ", np.mean(heights))
print("Median width: ", np.median(widths))
print("Median height: ", np.median(heights))

print("Min size:", min(widths), min(heights))
print("Max size:", max(widths), max(heights))

Mean width:  75.06502242152466
Mean height:  126.26083707025411
Median width:  74.0
Median height:  124.0
Count:  1338
Min size: 44 60
Max size: 161 304


In [None]:


import argparse 
import sys

# Simulate command line arguments
sys.argv = ['script.py', '-height', '128', '-width', '128', 
            '-input', './mediapipe_cropper/gestures_processed/train_point_up/', '-output', './resizer/output/']

parser = argparse.ArgumentParser()

#-h HEIGHT -w WIDTH -i INPUT -o OUTPUT 
parser.add_argument("-height", "--height", dest = "height", default = 128, help="Output image height", type=int)
parser.add_argument("-width", "--width", dest = "width", default = 128, help="Output image width", type=int)
parser.add_argument("-input", "--input", dest = "input", default = "./input/", help="Path to input folder")
parser.add_argument("-output", "--output", dest = "output", default = "./output/", help="Path to output folder")

args = parser.parse_args()

NEW_WIDTH = args.width
NEW_HEIGHT = args.height
input_path = args.input
output_path = args.output


def resize_and_save(image, save_file_path):
    
    resized_img = cv2.resize(image, (NEW_WIDTH, NEW_HEIGHT))
        
    # Save cropped image
    cv2.imwrite(save_file_path, resized_img)


for subdir, dirs, files in os.walk(input_path, topdown=True):
    for dir in dirs:
        if not os.path.isdir(os.path.join(output_path, dir)):
            os.makedirs(os.path.join(output_path, dir))
    for file in files:
        if not file.endswith((".jpg", ".jpeg", ".png")):
            continue
        file = os.path.relpath(os.path.join(subdir, file), input_path)
        
# Load the input image.
        input_file_path = os.path.join(input_path, file)
        image = cv2.imread(input_file_path)

# Resize and save image 
        output_file_path = os.path.join(output_path, file)
        print("processing ", file, "...")
        resize_and_save(image, output_file_path)
