In [1]:
import numpy as np
import librosa
import scipy.signal
from pydub import AudioSegment
import matplotlib.pyplot as plt
import seaborn as sns
import typing as tp
import torch
import torchaudio
import pandas as pd
import glob
from tqdm import tqdm
import soundfile as sf
import io

In [2]:
# Load metadata and audio samples from CREMA-D dataset
def load_crema_d_metadata(metadata_path):
    df = pd.read_csv(metadata_path)
    return df

def load_audio_samples(actor_id, base_path):
    file_paths = []
    # find all audio files starting with the actor_id
    for file_path in glob.glob(f"{base_path}/{actor_id}*.wav"):
        file_paths.append(file_path)
    
    audio_samples = []

    for file_path in file_paths:
        audio, sr = load_audio_sample(file_path)
        audio_samples.append((audio, sr, file_path))
    
    return audio_samples
    



def load_audio_sample(
    file_path: str
) -> tp.Optional[tp.Tuple[torch.Tensor, int]]:
    try:
        wav, sample_rate = torchaudio.load(file_path)
        return wav, sample_rate
    except Exception as e:
        print(f"Error while loading audio: {e}")
        return None

In [21]:

def resample_audio(audio, original_sr, target_sr):
    return librosa.resample(audio, orig_sr=original_sr, target_sr=target_sr)


In [27]:
import random 
import os

metadata_path = '../crema-d/VideoDemographics.csv'  # Placeholder path
base_audio_path = '../crema-d'  # Placeholder path

crema_d_metadata = load_crema_d_metadata(metadata_path)
crema_d_metadata = random.choice(crema_d_metadata['ActorID'].values)

output_dir = 'random-test-output'
os.makedirs(output_dir, exist_ok=True)
# delete all files in the output directory
for file in glob.glob(f"{output_dir}/*"):
    os.remove(file)

audio_sr = load_audio_samples(crema_d_metadata, base_audio_path)
for audio, sr, fp in random.sample(audio_sr, 1):
    audio = resample_audio(audio.numpy().flatten(), sr, sr*2)
    sf.write(f'{output_dir}/{os.path.basename(fp)}', audio, sr * 2)

    

