<a href="https://colab.research.google.com/github/alimomennasab/ASL-Translator/blob/main/CS4200ASLDatasetPreprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import cv2
import numpy as np
from moviepy.editor import VideoFileClip, vfx
from tqdm import tqdm
import random

base_dir = '/content/drive/MyDrive/ASL_Word_Letter_Labeled_Files/dataset/SL'


In [None]:
# augmentation functions

def mirror_video(input_path, output_path):
    # mirror every frame in a video
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        mirrored = cv2.flip(frame, 1)  # horizontal flip
        out.write(mirrored)

    cap.release()
    out.release()

def change_brightness(input_path, output_path, factor=1.3):
    # increase/decrease the brightness of each frame in a video
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        bright = np.clip(frame.astype(np.float32) * factor, 0, 255).astype(np.uint8)
        out.write(bright)

    cap.release()
    out.release()

def grayscale_video(input_path, output_path):
    # apply grayscale to every frame in a video
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (w, h), isColor=False)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        out.write(gray)

    cap.release()
    out.release()

def zoom_video(input_path, output_path, zoom_factor=1.2):
    # apply a zoom to every frame in a video
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))

    new_w, new_h = int(w / zoom_factor), int(h / zoom_factor)
    x1 = (w - new_w) // 2
    y1 = (h - new_h) // 2

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        cropped = frame[y1:y1+new_h, x1:x1+new_w]
        zoomed = cv2.resize(cropped, (w, h))
        out.write(zoomed)

    cap.release()
    out.release()

def speed_video(input_path, output_path, factor=1.5):
    # speedup/slowdown a video by skipping/duplicating frames
    # factor > 1 = faster, factor < 1 = slower
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

    if factor >= 1.0:
        new_fps = fps
    else:
        new_fps = fps * factor  # fewer frames per second for slowdown

    out = cv2.VideoWriter(output_path, fourcc, new_fps, (w, h))

    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()

    if factor > 1.0:
        # Speed up: keep every nth frame
        step = int(factor)
        for i in range(0, len(frames), step):
            out.write(frames[i])
    elif factor < 1.0:
        # Slow down: duplicate frames
        repeat = int(1 / factor)
        for frame in frames:
            for _ in range(repeat):
                out.write(frame)

    out.release()


def crop_border(input_path, output_path, crop_amount=75):
    # crop useless background pixels from both right/left ends of each video frame
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

    new_w = w - 2 * crop_amount
    out = cv2.VideoWriter(output_path, fourcc, fps, (new_w, h))

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # Crop both sides
        cropped = frame[:, crop_amount:-crop_amount]
        out.write(cropped)

    cap.release()
    out.release()



In [None]:
# test_crop_vid = base_dir + "/a/01610.mp4"
# out_dir = base_dir + "/a/test_crop.mp4"
# crop_border(test_crop_vid, out_dir)

In [None]:
augmentations = {
    "mirror": mirror_video,
    "bright": lambda inp, out: change_brightness(inp, out, factor=random.uniform(0.5, 1.5)),
    "gray": grayscale_video,
    "zoom": zoom_video,
    "fast": lambda inp, out: speed_video(inp, out, factor=random.uniform(1.1, 1.75)),
    "slow": lambda inp, out: speed_video(inp, out, factor=random.uniform(0.25, 0.9)),
    "crop": crop_border
}

BUCKETS = {
    "mirror": ["mirror"],
    "color": ["bright", "gray"],
    "speed": ["fast", "slow"],
    "zoom": ["zoom"]
}

base_dir = '/Users/alimomennasab/Downloads/dataset/SL'
out_dir = '/Users/alimomennasab/Desktop/CS4200AugmentedDataset'

count = 1
sign_folders = sorted(
    [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
)
total_files = sum(
    1 for sign in sign_folders
    for f in os.listdir(os.path.join(base_dir, sign))
    if f.endswith(".mp4")
)

for root, dirs, files in os.walk(base_dir):
    sign_label = os.path.basename(root)
    if not files:
        continue

    sign_out_dir = os.path.join(out_dir, sign_label)
    os.makedirs(sign_out_dir, exist_ok=True)

    for file in tqdm(files, desc=f"Processing {sign_label}"):
        if not file.endswith(".mp4"):
            continue

        input_path = os.path.join(root, file)
        filename = os.path.splitext(file)[0]

        # 1. make cropped temp file
        cropped_path = os.path.join(sign_out_dir, f"{filename}_temp_crop.mp4")
        augmentations["crop"](input_path, cropped_path)

        # 2. create 3 augmented copies (A, B, C)
        for label in ["A", "B", "C"]:
            # Pick 2 augs from the 4 option buckets
            chosen_buckets = random.sample(list(BUCKETS.keys()), 2)
            chosen_augs = [random.choice(BUCKETS[b]) for b in chosen_buckets]
            random.shuffle(chosen_augs)

            aug_tag = "_".join(chosen_augs)
            output_filename = f"{filename}_{label}_{aug_tag}.mp4"
            output_path = os.path.join(sign_out_dir, output_filename)

            # Apply augmentations sequentially
            src = cropped_path
            temp_path = output_path.replace(".mp4", "_temp.mp4")

            for i, aug_name in enumerate(chosen_augs):
                dest = output_path if i == len(chosen_augs) - 1 else temp_path
                augmentations[aug_name](src, dest)
                if src != cropped_path and os.path.exists(src):
                    os.remove(src)
                src = dest

            print(f"{count}/{total_files} | {file} -> {label}: {' + '.join(chosen_augs)}")

        # 3. delete cropped temp file after all augmentations are done
        if os.path.exists(cropped_path):
            os.remove(cropped_path)

    count += 1


Sign label:  SL
Sign label:  lazy


Processing lazy:   2%|▏         | 1/56 [00:00<00:38,  1.44it/s]

1/18671 | 32514.mp4 -> A: zoom + fast
1/18671 | 32514.mp4 -> B: zoom + mirror
1/18671 | 32514.mp4 -> C: mirror + slow
2/18671 | 32511.mp4 -> A: bright + zoom
2/18671 | 32511.mp4 -> B: gray + fast


Processing lazy:   4%|▎         | 2/56 [00:08<04:38,  5.15s/it]

2/18671 | 32511.mp4 -> C: zoom + gray
3/18671 | 32512.mp4 -> A: zoom + gray
3/18671 | 32512.mp4 -> B: bright + mirror


Processing lazy:   5%|▌         | 3/56 [00:12<03:44,  4.24s/it]

3/18671 | 32512.mp4 -> C: gray + fast
4/18671 | 32510.mp4 -> A: bright + zoom
4/18671 | 32510.mp4 -> B: gray + fast


Processing lazy:   7%|▋         | 4/56 [00:13<02:34,  2.98s/it]

4/18671 | 32510.mp4 -> C: bright + zoom
5/18671 | 66019.mp4 -> A: bright + mirror
5/18671 | 66019.mp4 -> B: zoom + mirror


Processing lazy:   9%|▉         | 5/56 [00:14<02:10,  2.55s/it]

5/18671 | 66019.mp4 -> C: zoom + mirror
6/18671 | 32513.mp4 -> A: zoom + bright
6/18671 | 32513.mp4 -> B: gray + fast


Processing lazy:  11%|█         | 6/56 [00:15<01:36,  1.93s/it]

6/18671 | 32513.mp4 -> C: gray + mirror
7/18671 | 32518.mp4 -> A: slow + zoom
7/18671 | 32518.mp4 -> B: zoom + mirror


Processing lazy:  12%|█▎        | 7/56 [00:18<01:48,  2.21s/it]

7/18671 | 32518.mp4 -> C: fast + zoom


Processing lazy:  14%|█▍        | 8/56 [00:18<01:20,  1.67s/it]

8/18671 | 32514_mirror.mp4 -> A: bright + fast
8/18671 | 32514_mirror.mp4 -> B: zoom + bright
8/18671 | 32514_mirror.mp4 -> C: fast + mirror


Processing lazy:  16%|█▌        | 9/56 [00:19<01:01,  1.31s/it]

9/18671 | 32514_bright.mp4 -> A: zoom + fast
9/18671 | 32514_bright.mp4 -> B: mirror + fast
9/18671 | 32514_bright.mp4 -> C: gray + fast


Processing lazy:  18%|█▊        | 10/56 [00:19<00:46,  1.01s/it]

10/18671 | 32514_gray.mp4 -> A: zoom + mirror
10/18671 | 32514_gray.mp4 -> B: slow + mirror
10/18671 | 32514_gray.mp4 -> C: bright + mirror


Processing lazy:  20%|█▉        | 11/56 [00:20<00:37,  1.20it/s]

11/18671 | 32514_zoom.mp4 -> A: bright + slow
11/18671 | 32514_zoom.mp4 -> B: bright + mirror
11/18671 | 32514_zoom.mp4 -> C: fast + mirror


Processing lazy:  21%|██▏       | 12/56 [00:20<00:31,  1.38it/s]

12/18671 | 32514_fast.mp4 -> A: zoom + mirror
12/18671 | 32514_fast.mp4 -> B: zoom + slow
12/18671 | 32514_fast.mp4 -> C: zoom + mirror


Processing lazy:  23%|██▎       | 13/56 [00:21<00:27,  1.54it/s]

13/18671 | 32514_slow.mp4 -> A: gray + mirror
13/18671 | 32514_slow.mp4 -> B: slow + zoom
13/18671 | 32514_slow.mp4 -> C: slow + bright


Processing lazy:  25%|██▌       | 14/56 [00:21<00:25,  1.68it/s]

14/18671 | 32514_crop.mp4 -> A: fast + bright
14/18671 | 32514_crop.mp4 -> B: bright + mirror
14/18671 | 32514_crop.mp4 -> C: gray + fast
15/18671 | 32511_mirror.mp4 -> A: zoom + slow
15/18671 | 32511_mirror.mp4 -> B: fast + gray


Processing lazy:  27%|██▋       | 15/56 [00:27<01:34,  2.31s/it]

15/18671 | 32511_mirror.mp4 -> C: zoom + gray
16/18671 | 32511_bright.mp4 -> A: gray + fast
16/18671 | 32511_bright.mp4 -> B: zoom + mirror


Processing lazy:  29%|██▊       | 16/56 [00:36<02:48,  4.20s/it]

16/18671 | 32511_bright.mp4 -> C: slow + mirror
17/18671 | 32511_gray.mp4 -> A: zoom + mirror
17/18671 | 32511_gray.mp4 -> B: zoom + bright


Processing lazy:  30%|███       | 17/56 [00:39<02:26,  3.77s/it]

17/18671 | 32511_gray.mp4 -> C: zoom + mirror


Processing lazy:  30%|███       | 17/56 [00:42<01:38,  2.52s/it]


KeyboardInterrupt: 