In [None]:
import os
from google.cloud.speech_v2 import SpeechClient as speech
from google.cloud.speech_v2.types import cloud_speech
from docx import Document

# Set your Google Cloud Project ID here.
# This ID should match the project where your service account and bucket reside.
PROJECT_ID = "verdant-branch-457906-a2"

# Define your custom phrases here.
# These phrases will be given higher priority during transcription.
# You can add phrases in both English and Farsi.
CUSTOM_PHRASES = [
    "Gemini AI",
    "Google Cloud Speech API",
    "Verdant Branch",
    "Cloud Storage Bucket",
    "Speech to Text",
    "فارسی صحبت می‌کنم", # I speak Farsi (Example Farsi phrase)
    "سلام و درود",       # Hello and greetings (Example Farsi phrase)
    "چطور هستید؟",       # How are you? (Example Farsi phrase)
]

# Define unique IDs for your custom PhraseSet and Recognizer.
# These will be used to identify your custom resources in Google Cloud.
PHRASE_SET_ID = "my-custom-phrases-set"
RECOGNIZER_ID = "my-custom-multilingual-recognizer"

def create_or_get_custom_recognizer(project_id: str, recognizer_id: str, phrase_set_id: str, phrases: list[str], language_codes: list[str]) -> str:
    """
    Creates or retrieves a custom PhraseSet and a custom Recognizer that uses it.
    
    Args:
        project_id (str): Your Google Cloud Project ID.
        recognizer_id (str): A unique ID for the custom recognizer.
        phrase_set_id (str): A unique ID for the phrase set.
        phrases (list[str]): A list of custom phrases to boost recognition for.
        language_codes (list[str]): A list of BCP-47 language codes for recognition.

    Returns:
        str: The full resource name of the custom recognizer (e.g., "projects/.../recognizers/...").
    """
    client = speech.SpeechClient()

    # --- 1. Create or Get the PhraseSet ---
    # The PhraseSet will contain your custom phrases.
    phrase_set_name = f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}"
    try:
        # Attempt to retrieve the phrase set if it already exists.
        phrase_set = client.get_phrase_set(name=phrase_set_name)
        print(f"PhraseSet '{phrase_set_id}' already exists.")
    except Exception as e:
        # If the phrase set is not found, create it.
        if "NOT_FOUND" in str(e):
            print(f"PhraseSet '{phrase_set_id}' not found, creating...")
            # Convert string phrases into PhraseSet.Phrase objects.
            phrase_set_phrases = [cloud_speech.PhraseSet.Phrase(value=p) for p in phrases]
            
            # Define the request to create the phrase set.
            phrase_set_request = cloud_speech.CreatePhraseSetRequest(
                parent=f"projects/{project_id}/locations/global",
                phrase_set_id=phrase_set_id,
                phrase_set=cloud_speech.PhraseSet(phrases=phrase_set_phrases),
            )
            # Send the request to Google Cloud to create the phrase set.
            phrase_set = client.create_phrase_set(request=phrase_set_request)
            print(f"PhraseSet '{phrase_set_id}' created.")
        else:
            # Re-raise any other unexpected exceptions.
            raise

    # --- 2. Create or Get the Recognizer ---
    # The Recognizer uses the PhraseSet and defines recognition settings.
    recognizer_name = f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
    try:
        # Attempt to retrieve the recognizer if it already exists.
        recognizer = client.get_recognizer(name=recognizer_name)
        print(f"Recognizer '{recognizer_id}' already exists.")
    except Exception as e:
        # If the recognizer is not found, create it.
        if "NOT_FOUND" in str(e):
            print(f"Recognizer '{recognizer_id}' not found, creating...")
            # Define the recognition configuration for the custom recognizer.
            # It includes auto-decoding, specified language codes, the "long" model,
            # and references the created phrase set for boosting.
            recognizer_config = cloud_speech.RecognitionConfig(
                auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
                language_codes=language_codes,
                model="long", # "long" model is suitable for long-form audio.
                phrase_set_references=[phrase_set.name], # Link the PhraseSet to the Recognizer.
            )
            
            # Define the request to create the recognizer.
            recognizer_request = cloud_speech.CreateRecognizerRequest(
                parent=f"projects/{project_id}/locations/global",
                recognizer_id=recognizer_id,
                recognizer=cloud_speech.Recognizer(default_recognition_config=recognizer_config),
            )
            # Send the request to Google Cloud to create the recognizer.
            # This is a long-running operation, so we wait for its result.
            recognizer_operation = client.create_recognizer(request=recognizer_request)
            recognizer = recognizer_operation.result()
            print(f"Recognizer '{recognizer_id}' created.")
        else:
            # Re-raise any other unexpected exceptions.
            raise

    # Return the full resource name of the custom recognizer.
    return recognizer.name

def transcribe_and_save_docx(gcs_uri: str, project_id: str, recognizer_name: str):
    """
    Transcribes audio from a Google Cloud Storage URI using a specified custom recognizer
    and saves the transcription results to a DOCX file.
    
    Args:
        gcs_uri (str): The Google Cloud Storage URI of the audio file to transcribe.
        project_id (str): Your Google Cloud Project ID.
        recognizer_name (str): The full resource name of the custom recognizer to use.
    """
    client = speech.SpeechClient()
    
    # Define the audio file metadata (GCS URI).
    audio = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
    
    # Create the batch recognition request.
    request = cloud_speech.BatchRecognizeRequest(
        recognizer=recognizer_name, # Use the previously created custom recognizer.
        files=[audio],
        recognition_output_config=cloud_speech.RecognitionOutputConfig(
            inline_response_config=cloud_speech.InlineOutputConfig(), # Get results directly in the response.
        ),
    )

    print("Waiting for batch recognition operation to complete (timeout: up to 90 minutes)...")
    # Execute the batch recognition operation.
    operation = client.batch_recognize(request=request)
    # Wait for the operation to complete, with a maximum timeout.
    response = operation.result(timeout=5400) # 90 minutes * 60 seconds/minute

    # Create a new Word document.
    document = Document()
    document.add_heading(f'Transcription Results for {os.path.basename(gcs_uri)}', level=0)

    # Process the transcription results.
    transcript_results = response.results.get(gcs_uri)

    if not transcript_results or not transcript_results.transcript.results:
        document.add_paragraph("No transcription results found for this audio file.")
    else:
        # Iterate through the transcription results and add them to the document.
        for result in transcript_results.transcript.results:
            for alternative in result.alternatives:
                document.add_paragraph(alternative.transcript)
                document.add_paragraph() # Add an empty paragraph for spacing between segments.

    
    base_filename = os.path.splitext(os.path.basename(gcs_uri))[0]  # Determine the output filename based on the GCS URI.
    output_filename = f'{base_filename}.docx'
    
    # Save the Word document.
    document.save(output_filename)
    print(f"Transcription saved to {output_filename}")

if __name__ == "__main__":
    
    # Replace with the Google Cloud Storage URI of your audio file. Example: "gs://your-bucket-name/your-audio-file.wav"

    gcs_uri = "gs://bdav42/Spch2txt/AudioInput/Shelly.wav"  

    print("--- Starting Custom Speech-to-Text Process ---")

    # Step 1: Create or get the custom recognizer. This function ensures that your custom PhraseSet and Recognizer exist in Google Cloud.
    
    print("\nSetting up custom recognizer...")
    custom_recognizer_full_name = create_or_get_custom_recognizer(
        PROJECT_ID,
        RECOGNIZER_ID,
        PHRASE_SET_ID,
        CUSTOM_PHRASES,
        ["en-US", "fa-IR"] # Specifies the languages for auto-detection.
    )
    print(f"Using custom recognizer: {custom_recognizer_full_name}")

    # Step 2: Transcribe the audio using the newly created/retrieved custom recognizer.
    print(f"\nTranscribing audio from: {gcs_uri}")
    transcribe_and_save_docx(gcs_uri, PROJECT_ID, custom_recognizer_full_name)
    
    print("\n--- Speech-to-Text Process Completed ---")