Alignment checking and EDA

In [None]:
# Colab Starter — Set base, validate data, and run lightweight EDA

%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

def add_src_to_sys_path(start: Path | None = None) -> Path:
    """Find the nearest folder containing 'src' and add it to sys.path."""
    here = Path(start or Path.cwd()).resolve()
    for parent in (here, *here.parents):
        src = parent / "src"
        if src.is_dir():
            if str(src) not in sys.path:
                sys.path.insert(0, str(src))
            print("✅ Added to sys.path:", src)
            return src
    raise FileNotFoundError("Could not find a 'src' directory above this notebook.")

SRC_DIR = add_src_to_sys_path()


In [None]:
import os, math, random, warnings, json
from glob import glob
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import correlate
from scipy.io import wavfile, loadmat

from utils.rvtall_io import (
    find_rvtall_base, list_sets, list_corpora_dirs, corpus_type,
    resolve_corpus_dir, kinect_pair_index_paths, kinect_first_pair,
    load_audio_env, load_lip_aperture_csv, first_npy, load_npy, zscore
)

random.seed(123)
np.set_printoptions(precision=3, suppress=True)
warnings.filterwarnings("ignore")

BASE = find_rvtall_base(Path.cwd())
print("RVTALL base:", BASE)

# fast sanity counts (doesn't traverse all frames)
def fast_counts(BASE: Path):
    root = str(BASE)
    audio_wavs = len(glob(os.path.join(root, "kinect_processed", "*", "*", "audios", "audio_proc_*.wav"))) \
                 + len(glob(os.path.join(root, "kinect_processed", "*", "*", "audio_proc_*.wav")))
    mouth_pngs = len(glob(os.path.join(root, "kinect_processed", "*", "*", "videos", "video_*", "mouth", "*.png"))) \
                 + len(glob(os.path.join(root, "kinect_processed", "*", "*", "mouth", "*.png")))
    radar_npys = len(glob(os.path.join(root, "radar_processed", "*", "*", "sample_*.npy")))
    uwb_npys   = len(glob(os.path.join(root, "uwb_processed",   "*", "*", "sample_*.npy")))
    laser_npys = len(glob(os.path.join(root, "laser_processed", "*", "*", "sample_*.npy")))
    return audio_wavs, mouth_pngs, radar_npys, uwb_npys, laser_npys

audio_wavs, mouth_pngs, radar_npys, uwb_npys, laser_npys = fast_counts(BASE)
print(f"audio wavs : {audio_wavs}")
print(f"mouth pngs : {mouth_pngs}")
print(f"radar npy  : {radar_npys}")
print(f"uwb npy    : {uwb_npys}")
print(f"laser npy  : {laser_npys}")


In [None]:
def rms_envelope(x, win):
    if win <= 1: return np.abs(x)
    k = np.ones(win)/win
    return np.sqrt(np.convolve(x**2, k, mode="same"))

def find_lag(a, b):
    """Return lag in samples that best aligns a to b (positive => a lags b)."""
    a, b = zscore(a), zscore(b)
    corr = correlate(a, b, mode="full")
    lag = np.argmax(corr) - (len(b) - 1)
    return lag

def resample_to_len(x, L):
    return np.interp(np.linspace(0,1,L), np.linspace(0,1,len(x)), x)

def lip_aperture(csv_path, upper_idx=51, lower_idx=57):
    la = load_lip_aperture_csv(csv_path, upper_idx, lower_idx)
    return la

def audio_envelope(wav_path, rms_ms=20):
    ae, t, fs = load_audio_env(wav_path, rms_ms)
    return ae, t, fs
