In [None]:
FILE_NAME = "S56.wav"  # Replace with your audio file name


CUSTOM_PHRASES = [
    {"value": "Warrior", "boost": 10.0},
    {"value": "تولتک", "boost": 8.0},
    {"value": "دون خوان", "boost": 12.0},
    {"value": "دون خنارو", "boost": 12.0},
    {"value": "کاستاندا", "boost": 12.0},
]   # Boost needs to be under 20 and should be a float 

PROJECT_ID = "glassy-groove-461110-u0" # Replace with your project ID


In [59]:
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
from google.protobuf.field_mask_pb2 import FieldMask


PHRASE_SET_ID = "my-custom-phrases-set"
PHRASE_SET_NAME = f"projects/{PROJECT_ID}/locations/global/phraseSets/{PHRASE_SET_ID}"


client = SpeechClient()
phrase_set_phrases = [
    cloud_speech.PhraseSet.Phrase(value=p["value"], boost=p.get("boost", 0.0))
    for p in CUSTOM_PHRASES
]

phrase_set_request = cloud_speech.CreatePhraseSetRequest(
    parent=f"projects/{PROJECT_ID}/locations/global",
    phrase_set_id=PHRASE_SET_ID,
    phrase_set=cloud_speech.PhraseSet(phrases=phrase_set_phrases),
)

try:
    operation = client.create_phrase_set(request=phrase_set_request)
    phrase_set = operation.result() # For PhraseSet creation, it's often an immediate response, but using .result() is safer for LROs
    print(f"PhraseSet '{PHRASE_SET_ID}' created: {phrase_set.name}")
except Exception as e:
    new_phrases_data = CUSTOM_PHRASES
    existing_phrase_set = client.get_phrase_set(name=PHRASE_SET_NAME)

    new_phrase_objects = [
        cloud_speech.PhraseSet.Phrase(value=p["value"], boost=p.get("boost", 0.0))
        for p in new_phrases_data
    ]

    updated_phrases = list(existing_phrase_set.phrases)
    updated_phrases.extend(new_phrase_objects)

    updated_phrase_set = cloud_speech.PhraseSet(
        name=existing_phrase_set.name,
        phrases=updated_phrases
    )

    field_mask = FieldMask(paths=["phrases"])

    request = cloud_speech.UpdatePhraseSetRequest(
        phrase_set=updated_phrase_set,
        update_mask=field_mask
    )

    operation = client.update_phrase_set(request=request)
    response = operation.result()

In [60]:
from google.api_core.exceptions import AlreadyExists

RECOGNIZER_NAME = f"projects/{PROJECT_ID}/locations/global/recognizers/{RECOGNIZER_ID}"

try:
    # Attempt to retrieve the recognizer if it already exists.
    recognizer = client.get_recognizer(name=RECOGNIZER_NAME)
    print(f"Recognizer '{RECOGNIZER_ID}' already exists.")
except NotFound:
    recognizer_config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        language_codes=["fa-IR", "en-US"],  # Or your specific languages
        model="long",  # Use the 'long' model here
        # adaptation=cloud_speech.SpeechAdaptation(
        #     phrase_set_references=[PHRASE_SET_NAME]
        # ),
        features=cloud_speech.RecognitionFeatures(
            # enable_automatic_punctuation=True,
            # speaker_diarization_config=cloud_speech.SpeakerDiarizationConfig(...)
        ),
    )

    recognizer_request = cloud_speech.CreateRecognizerRequest(
        parent=f"projects/{PROJECT_ID}/locations/global",
        recognizer_id=RECOGNIZER_ID,
        recognizer=cloud_speech.Recognizer(default_recognition_config=recognizer_config),
    )

    try:
        operation = client.create_recognizer(request=recognizer_request)
        recognizer = operation.result()
        print(f"Recognizer '{RECOGNIZER_ID}' created: {recognizer.name}")
    except AlreadyExists:
        print(f"Recognizer '{RECOGNIZER_ID}' already exists (caught AlreadyExists error).")
        recognizer = client.get_recognizer(name=RECOGNIZER_NAME)
except Exception as e:
    print(f"Error occurred: {e}")

Recognizer 'my-custom-multilingual-recognizer' already exists.


In [61]:

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

def transcribe_and_save_docx(gcs_uri: str, project_id: str, recognizer_name: str):
    """
    Transcribes audio from a Google Cloud Storage URI using a specified custom recognizer
    and saves the transcription results to a DOCX file.
    
    Args:
        gcs_uri (str): The Google Cloud Storage URI of the audio file to transcribe.
        project_id (str): Your Google Cloud Project ID.
        recognizer_name (str): The full resource name of the custom recognizer to use.
    """
    client = SpeechClient()
    
    # Define the audio file metadata (GCS URI).
    audio = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
    
    # Create the batch recognition request.
    request = cloud_speech.BatchRecognizeRequest(
        recognizer=recognizer_name, # Use the previously created custom recognizer.
        files=[audio],
        recognition_output_config=cloud_speech.RecognitionOutputConfig(
            inline_response_config=cloud_speech.InlineOutputConfig(), # Get results directly in the response.
        ),
    )

    print("Waiting for batch recognition operation to complete (timeout: up to 90 minutes)...")
    # Execute the batch recognition operation.
    operation = client.batch_recognize(request=request)
    # Wait for the operation to complete, with a maximum timeout.
    response = operation.result(timeout=5400) # 90 minutes * 60 seconds/minute

    # Create a new Word document.
    document = Document()
    document.add_heading(f'Transcription Results for {os.path.basename(gcs_uri)}', level=0)

    # Process the transcription results.
    transcript_results = response.results.get(gcs_uri)

    if not transcript_results or not transcript_results.transcript.results:
        document.add_paragraph("No transcription results found for this audio file.")
    else:
        # Iterate through the transcription results and add them to the document.
        for result in transcript_results.transcript.results:
            for alternative in result.alternatives:
                document.add_paragraph(alternative.transcript)
                # document.add_paragraph() # Add an empty paragraph for spacing between segments.

    
    base_filename = os.path.splitext(os.path.basename(gcs_uri))[0]  # Determine the output filename based on the GCS URI.
    output_filename = f'{base_filename}.docx'
    
    # Save the Word document.
    document.save(output_filename)
    print(f"Transcription saved to {output_filename}")


In [None]:

if __name__ == "__main__":
    
    gcs_uri = "gs://blob_speech/Spch2txt/AudioInput/S56.wav"  # Replace with the Google Cloud Storage URI of your audio file. Example: "gs://your-bucket-name/your-audio-file.wav"

    print("--- Starting Custom Speech-to-Text Process ---")
    print("\nSetting up custom recognizer...")
    try:

        # Step 2: Transcribe the audio using the already created/retrieved custom recognizer.
        print(f"\nTranscribing audio from: {gcs_uri}")
        transcribe_and_save_docx(gcs_uri, PROJECT_ID, RECOGNIZER_NAME)
        
        print("\n--- Speech-to-Text Process Completed ---")

    except Exception as e:
        print(f"\nFailed to set up recognizer or transcribe: {e}")
        

--- Starting Custom Speech-to-Text Process ---

Setting up custom recognizer...

Transcribing audio from: gs://bdav42/Spch2txt/AudioInput/S56.wav
Waiting for batch recognition operation to complete (timeout: up to 90 minutes)...
Transcription saved to S56.docx

--- Speech-to-Text Process Completed ---
