In [None]:
import os
import jiwer
from typing import Dict, List, Tuple
import requests
import time
import assemblyai as aai


def transcribe_audio(audio_filepath: str) -> str:
    """
    Placeholder for your audio transcription function.
    Replace this with your actual transcription logic.
    This function should take the path to a WAV file and return the
    transcribed text as a string.

    Args:
        audio_filepath: The path to the WAV file.

    Returns:
        The transcribed text.  Returns an empty string if transcription fails.
    """

    # Start by making sure the `assemblyai` package is installed.
    # If not, you can install it by running the following command:
    # pip install -U assemblyai
    #
    # Note: Some macOS users may need to use `pip3` instead of `pip`.

    # Replace with your API key
    aai.settings.api_key = "assembly_api_key"

    output_folder = "assembly_ai"
    os.makedirs(output_folder, exist_ok=True)

    transcriber = aai.Transcriber()
    start_time = time.time()
    transcript = transcriber.transcribe(audio_filepath)
    end_time = time.time()
    elapsed = end_time - start_time
    print(f"Run took {end_time - start_time} seconds.")

    if transcript.status == aai.TranscriptStatus.error:
        return transcript.error
    else:
        transcribed_text = transcript.text

        # Extract filename without extension
        file_name = os.path.splitext(os.path.basename(audio_filepath))[0]
        output_file = os.path.join(output_folder, f"{file_name}.txt")

        # Save transcript to file
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(transcribed_text)

        print(f"Transcript saved to {output_file}")
        return transcribed_text


def create_file_subtitle_map(directory: str) -> Dict[str, str]:
    """
    Creates a dictionary mapping filenames (without extension) to their corresponding subtitles.

    Args:
        directory: The directory containing the WAV and TXT files.

    Returns:
        A dictionary where keys are filenames (without extension) and values are the subtitle text.
        Returns an empty dictionary if there are errors (e.g., missing files).
    """
    file_map = {}
    try:
        for filename in os.listdir(directory):
            if filename.endswith(".mp3"):
                base_name = os.path.splitext(filename)[0]  # Remove extension
                txt_filepath = os.path.join(directory, base_name + ".txt")

                if os.path.exists(txt_filepath):
                    with open(txt_filepath, "r", encoding="utf-8") as f:
                        subtitles = f.read().strip()
                    file_map[base_name] = subtitles
                else:
                    print(f"Warning: No corresponding .txt file found for {filename}")
    except FileNotFoundError:
        print(f"Error: Directory not found: {directory}")
        return {}
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return {}

    return file_map


def calculate_wer(ground_truth: str, hypothesis: str) -> float:
    """
    Calculates the Word Error Rate (WER) between the ground truth and the hypothesis.

    Args:
        ground_truth: The reference text.
        hypothesis: The transcribed text.

    Returns:
        The WER as a float.
    """
    return jiwer.wer(ground_truth, hypothesis)


def process_audio_files(directory: str) -> List[Tuple[str, float]]:
    """
    Processes audio files in a directory, transcribes them, and calculates WER.

    Args:
        directory: The directory containing the WAV and TXT files.

    Returns:
      A list of tuples where first element is the filename and the second is the WER.
    """
    results = []

    file_map = create_file_subtitle_map(directory)
    if not file_map:
        print("Error: Could not create file map.")
        return []  # Return empty list on error

    for base_name, subtitles in file_map.items():
        audio_filepath = os.path.join(directory, base_name + ".mp3")
        try:
            if "kurz" in base_name:
                transcription = transcribe_audio(audio_filepath)

                if transcription is not None:
                    wer = calculate_wer(subtitles, transcription)
                    results.append((base_name, wer))
                    print(f"File: {base_name}, WER: {wer:.4f}")
                    print("Subtitles:", subtitles[:10])
                    print("Transcription:", transcription[:10])
                else:
                    print(f"Skipping {base_name} due to transcription error.")
        except Exception as e:
            print(e)
            print(audio_filepath)

    return results

In [None]:
"""Main function to demonstrate the process."""

audio_directory = "audio"  # Replace with the actual path!

# Example usage (for demonstration, let's create dummy files)
if not os.path.exists(audio_directory):
    os.makedirs(audio_directory)
    with open(os.path.join(audio_directory, "audio1.txt"), "w") as f:
        f.write("This is the ground truth for audio 1.")
    with open(
        os.path.join(audio_directory, "audio1.wav"), "w"
    ) as f:  # create a fake wav
        pass
    with open(os.path.join(audio_directory, "audio2.txt"), "w") as f:
        f.write("Another ground truth example.")
    with open(
        os.path.join(audio_directory, "audio2.wav"), "w"
    ) as f:  # create a fake wav
        pass
# End of dummy file creation

wer_results = process_audio_files(audio_directory)

print("\n--- Summary ---")
for filename, wer in wer_results:
    print(f"{filename}: {wer:.4f}")