In [None]:
import os
import datetime
from pydub import AudioSegment
import speech_recognition as sr

In [None]:
def ensure_dir(directory: str) -> None:
    """
    Create the specified directory if it does not already exist.
    Params:
      :directory: The path of the directory to check/create.
    """
    os.makedirs(directory, exist_ok=True)

def convert_m4a_to_wav(input_file: str, output_file: str) -> None:
    """
    Convert an m4a file to wav using pydub.
    Params:
      :input_file: A .m4a file to be converted.
      :output_file: Desired name of the output file.
    """
    audio = AudioSegment.from_file(input_file, format="m4a")
    # Export overwrites if the file exists
    audio.export(output_file, format="wav")
    print(f"Conversion complete: {input_file} -> {output_file}")

def batch_convert_m4a_to_wav(uploaded_files: list, output_dir: str) -> list:
    """
    Convert a list of m4a files to wav files, save in a date-specific subfolder, and delete originals.
    Params:
      :uploaded_files: List of uploaded .m4a files.
      :output_dir: Directory where converted .wav files will be stored.
    Returns:
      A list of converted wav filenames (with paths).
    """
    # Create a dated subfolder for today's date
    today_date = datetime.datetime.now().strftime("%Y-%m-%d")
    dated_output_dir = os.path.join(output_dir, today_date)
    ensure_dir(dated_output_dir)
    
    converted_files = []
    for input_file in uploaded_files:
        # Remove the file extension ("recording.m4a" -> "recording")
        base_name = os.path.splitext(os.path.basename(input_file))[0]
        output_filename = os.path.join(dated_output_dir, f"{base_name}_converted.wav")
        
        # Convert the .m4a file to a .wav file, overwrite if exists
        convert_m4a_to_wav(input_file, output_filename)
        converted_files.append(output_filename)
        
        # Remove the original .m4a file
        os.remove(input_file)
    
    return converted_files

def transcribe_audio_file(audio_file: str, recogniser: sr.Recognizer) -> str:
    """
    Transcribe a single wav file using Google's API.
    Params:
      :audio_file: Path to the audio file.
      :recogniser: An instance of the Recognizer class.
    Returns:
      The transcription text, or an error message if transcription fails.
    """
    with sr.AudioFile(audio_file) as source:
        # Record the entire audio file
        audio = recogniser.record(source)
    
    try:
        # Use Google's API for transcription
        text = recogniser.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        return "[Could not understand audio.]"
    except sr.RequestError as e:
        return f"[API error: {e}]"

def batch_transcribe_wav_files(wav_files: list, output_dir: str) -> dict:
    """
    Transcribe a list of wav files and save transcriptions to text files in a date-specific subfolder.
    Params:
      :wav_files: List of converted wav filenames (with paths).
      :output_dir: Directory where transcription text files will be saved.
    Returns:
      A dictionary containing {filename: transcription text}.
    """
    # Create subfolder with today's date
    today_date = datetime.datetime.now().strftime("%Y-%m-%d")
    dated_output_dir = os.path.join(output_dir, today_date)
    ensure_dir(dated_output_dir)
    
    recogniser = sr.Recognizer()
    transcriptions = {}
    
    for audio_file in wav_files:
        # Transcribe the audio file
        text = transcribe_audio_file(audio_file, recogniser)
        transcriptions[audio_file] = text
        
        # Save transcription to a .txt file in the dated subfolder
        base_name = os.path.splitext(os.path.basename(audio_file))[0]
        txt_filename = os.path.join(dated_output_dir, f"{base_name}.txt")
        with open(txt_filename, "w") as f:
            f.write(text)
        print(f"Saved transcription to {txt_filename}")
    
    return transcriptions

# List .m4a files in the current directory
m4a_files = [f for f in os.listdir() if f.endswith(".m4a")]

# Convert and transcribe
converted_files = batch_convert_m4a_to_wav(m4a_files, "processed_audio")
transcriptions = batch_transcribe_wav_files(converted_files, "transcriptions")

Conversion complete: cricket_season.m4a -> processed_audio/2025-06-05/cricket_season_converted.wav
Conversion complete: job_market.m4a -> processed_audio/2025-06-05/job_market_converted.wav
Saved transcription to transcriptions/2025-06-05/cricket_season_converted.txt
Saved transcription to transcriptions/2025-06-05/job_market_converted.txt
