In [None]:
!pip install mediapipe
import os
import glob
import torch
import torchaudio
import cv2
import numpy as np
from torchvision.transforms import ToTensor
from torchaudio.transforms import Resample
from moviepy.editor import VideoFileClip
from PIL import Image
import mediapipe as mp

AUDIO_SR = 16000
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Setup MediaPipe face mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False)

# Extract lip region using MediaPipe Face Mesh
def extract_lip_region(frame):
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(img_rgb)

    if not results.multi_face_landmarks:
        return None

    landmarks = results.multi_face_landmarks[0]
    h, w, _ = frame.shape

    # Lip landmark indices (based on MediaPipe documentation)
    lip_indices = list(set([
        61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308,
        78, 95, 88, 178, 87, 14, 317, 402, 318, 324
    ]))

    pts = [(int(landmark.x * w), int(landmark.y * h))
           for i, landmark in enumerate(landmarks.landmark) if i in lip_indices]

    x_coords, y_coords = zip(*pts)
    x1, x2 = max(min(x_coords)-5, 0), min(max(x_coords)+5, w)
    y1, y2 = max(min(y_coords)-5, 0), min(max(y_coords)+5, h)

    cropped = frame[y1:y2, x1:x2]
    if cropped.size == 0:
        return None

    cropped_resized = cv2.resize(cropped, (96, 96))
    return ToTensor()(cropped_resized)

# Extract video frames and lip regions
def extract_video_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        lip = extract_lip_region(frame)
        if lip is not None:
            frames.append(lip)

    cap.release()
    return torch.stack(frames) if frames else None

# Extract audio features using Wav2Vec2
def extract_audio_tokens(video_path):
    temp_audio = "temp_audio.wav"
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile(temp_audio, fps=AUDIO_SR, verbose=False, logger=None)
    waveform, sr = torchaudio.load(temp_audio)
    if sr != AUDIO_SR:
        waveform = Resample(sr, AUDIO_SR)(waveform)

    with torch.inference_mode():
        model = torchaudio.pipelines.WAV2VEC2_BASE.get_model()
        features, _ = model.extract_features(waveform[0].unsqueeze(0))

    return features[0].squeeze(0)  # [T, D]

# Process single sample
def process_sample(folder, vid_id):
    video_path = os.path.join(folder, f"{vid_id}.mp4")
    txt_path = os.path.join(folder, f"{vid_id}.txt")

    if not os.path.exists(video_path) or not os.path.exists(txt_path):
        return None

    with open(txt_path, 'r') as f:
        label = f.read().strip()

    video = extract_video_frames(video_path)
    if video is None:
        print(f"Skipping {vid_id}: no valid lip region.")
        return None

    audio = extract_audio_tokens(video_path)
    return {"video": video, "audio": audio, "label": label}

# Process entire folder of videos
def process_folder(folder_path, save_path):
    os.makedirs(save_path, exist_ok=True)
    files = glob.glob(os.path.join(folder_path, "*.mp4"))
    for vid_file in files:
        vid_id = os.path.basename(vid_file).split(".")[0]
        data = process_sample(folder_path, vid_id)
        if data:
            torch.save(data, os.path.join(save_path, f"{vid_id}.pt"))

In [None]:
if __name__ == "__main__":
    main_root = "/kaggle/input/lets-do-it-once-and-for-all-dm-10-gb/main_subset_10gb"  # ← Replace with actual path, e.g. "/kaggle/input/lrs2/main"
    save_root = "preprocessed/lrs2_subset_all2"

    subfolders = sorted([
        os.path.join(main_root, name)
        for name in os.listdir(main_root)
        if os.path.isdir(os.path.join(main_root, name))
    ])
    counter = 1
    for subfolder in subfolders:
        
            
        folder_name = os.path.basename(subfolder)
        counter+=1
        print(counter,end=" ")
        if counter%40==1:
            print(f"\n📦 Processing folder: {folder_name}")
            #break
        process_folder(
            folder_path=subfolder,
            save_path=os.path.join(save_root, folder_name)
        )