In [1]:
import os
import numpy as np

# Define directories
TRAINING_DIR = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints"
NORMALIZED_DIR = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/normalized_keypoints"

# Ensure the target directory exists
os.makedirs(NORMALIZED_DIR, exist_ok=True)

In [None]:
# List all npy files in the training directory
npy_files = [f for f in os.listdir(TRAINING_DIR) if f.endswith(".npy")]

# Initialize min and max values
all_keypoints = []

# Load all data and store in a list
for file in npy_files:
    data = np.load(os.path.join(TRAINING_DIR, file))
    all_keypoints.append(data)

# Convert to a large NumPy array
all_keypoints = np.vstack(all_keypoints)  # If keypoints are stored sequentially

# Compute min and max values for scaling
global_min = np.min(all_keypoints, axis=0)
global_max = np.max(all_keypoints, axis=0)

print("Global Min:", global_min)
print("Global Max:", global_max)

# Save the min/max values for later use
np.save(os.path.join(NORMALIZED_DIR, "keypoint_min.npy"), global_min)
np.save(os.path.join(NORMALIZED_DIR, "keypoint_max.npy"), global_max)

Global Min: [ 0.00000000e+00  0.00000000e+00 -1.93358839e+00  0.00000000e+00
  0.00000000e+00 -1.85553241e+00  0.00000000e+00  0.00000000e+00
 -1.85579896e+00  0.00000000e+00  0.00000000e+00 -1.85605848e+00
  0.00000000e+00  0.00000000e+00 -1.85301745e+00  0.00000000e+00
  0.00000000e+00 -1.85346699e+00  0.00000000e+00  0.00000000e+00
 -1.85385895e+00  0.00000000e+00  0.00000000e+00 -1.38894296e+00
  0.00000000e+00  0.00000000e+00 -1.39636683e+00  0.00000000e+00
  0.00000000e+00 -1.76036477e+00  0.00000000e+00  0.00000000e+00
 -1.75435925e+00  0.00000000e+00  0.00000000e+00 -1.03126025e+00
  0.00000000e+00  0.00000000e+00 -1.02654994e+00  0.00000000e+00
 -4.73416038e-02 -1.47295272e+00 -1.73933059e-01  0.00000000e+00
 -1.53240013e+00  0.00000000e+00 -1.65291071e-01 -2.52853894e+00
 -3.76049340e-01  0.00000000e+00 -2.90126061e+00  0.00000000e+00
 -2.48940125e-01 -2.82061911e+00 -4.94438618e-01  0.00000000e+00
 -3.17750216e+00  0.00000000e+00 -2.36646578e-01 -2.74749422e+00
 -5.08548617e

In [3]:
def normalize_keypoints(keypoints, min_vals, max_vals):
    return (keypoints - min_vals) / (max_vals - min_vals + 1e-8)  # Avoid division by zero

# Iterate over all npy files and save normalized data
for file in npy_files:
    data = np.load(os.path.join(TRAINING_DIR, file))
    normalized_data = normalize_keypoints(data, global_min, global_max)

    # Save the normalized file
    save_path = os.path.join(NORMALIZED_DIR, file)
    np.save(save_path, normalized_data)

print(f"All {len(npy_files)} files successfully normalized and saved.")

All 8360 files successfully normalized and saved.
