In [6]:
import librosa
import os
import pandas as pd

# Check current working directory and list files
print("Current working directory:", os.getcwd())
print("Files in the directory:", os.listdir())

# Load metadata
metadata_path = '../../cv-corpus-20.0-delta-2024-12-06/en/validated.tsv'
print(f"Loading metadata from: {metadata_path}")
try:
    metadata = pd.read_csv(metadata_path, sep='\t')
    print("Metadata loaded successfully.")
except FileNotFoundError as e:
    print(f"Error: {e}")
    exit()

# Preprocess audio
def preprocess_audio(file_path, target_sr=16000):
    try:
        y, sr = librosa.load(file_path, sr=None)
        y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        print(f"Processed audio file: {file_path}, Duration: {len(y_resampled) / target_sr:.2f}s")
        return y_resampled, target_sr
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None, None

# Process all files
audio_data = []
for index, row in metadata.iterrows():
    file_path = os.path.join('../../cv-corpus-20.0-delta-2024-12-06/en/clips', row['path'])
    transcription = row['sentence']
    print(f"Processing file {index + 1}/{len(metadata)}: {file_path}")
    y, sr = preprocess_audio(file_path)
    if y is not None:
        audio_data.append({'audio': y, 'sampling_rate': sr, 'transcription': transcription})
    else:
        print(f"Skipping file: {file_path}")

# Summary
print(f"Processed {len(audio_data)} audio files successfully.")


Current working directory: /Users/gabriel/Desktop/SPRING2025/CST 440/Code/Speech-Text
Files in the directory: ['Labs', 'SpeechRecognition.ipynb', 'README.md', '.gitignore', '.git']
Loading metadata from: ../../cv-corpus-20.0-delta-2024-12-06/en/validated.tsv
Metadata loaded successfully.
Processing file 1/250: ../../cv-corpus-20.0-delta-2024-12-06/en/clips/common_voice_en_41383256.mp3
Processed audio file: ../../cv-corpus-20.0-delta-2024-12-06/en/clips/common_voice_en_41383256.mp3, Duration: 4.82s
Processing file 2/250: ../../cv-corpus-20.0-delta-2024-12-06/en/clips/common_voice_en_41823983.mp3
Processed audio file: ../../cv-corpus-20.0-delta-2024-12-06/en/clips/common_voice_en_41823983.mp3, Duration: 4.72s
Processing file 3/250: ../../cv-corpus-20.0-delta-2024-12-06/en/clips/common_voice_en_41881685.mp3
Processed audio file: ../../cv-corpus-20.0-delta-2024-12-06/en/clips/common_voice_en_41881685.mp3, Duration: 8.06s
Processing file 4/250: ../../cv-corpus-20.0-delta-2024-12-06/en/clips

KeyboardInterrupt: 