In [3]:
# Speech-to-Text with Google Cloud Speech API       ****    Basic Model    ****
# This script transcribes audio from a Google Cloud Storage (GCS) URI and saves the transcription to a DOCX file.       
# It uses the Google Cloud Speech-to-Text API with speaker diarization enabled.


from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

from docx import Document
# Set your actual GCP project ID here
PROJECT_ID = "verdant-branch-457906-a2"


def transcribe_and_save_docx(gcs_uri):
    """Transcribes audio from GCS and saves the result to a DOCX file with an increased timeout."""
    # Set up the Google Cloud Speech client
    # Make sure to set the GOOGLE_APPLICATION_CREDENTIALS environment variable to your service account key file
    client = SpeechClient()
    
    # Configure the audio file and recognition settings
    # The audio file must be in a format supported by the API (e.g., FLAC, WAV, MP3)
    audio = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
    config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        language_codes=[ "fa-IR", "en-US"],
        model="long",
    )
    request = cloud_speech.BatchRecognizeRequest(
        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
        config=config,
        files=[audio],
        recognition_output_config=cloud_speech.RecognitionOutputConfig(
            inline_response_config=cloud_speech.InlineOutputConfig(),
        ),
    )

    # Transcribes the audio into text
    operation = client.batch_recognize(request=request)
    # Perform the transcription with long-running recognition
    # operation = client.long_running_recognize(config=config, audio=audio)
    
    print("Waiting for operation to complete (timeout: up to 90 minutes)...")
    response = operation.result(timeout=5400)  # 90 minutes * 60 seconds/minute
    # Print the response for debugging
    
    document = Document()
    document.add_heading(f'Persian Translation Results {gcs_uri.split("/")[-1]}', level=0)

    transcript_results = response.results[gcs_uri].transcript.results
    for result in transcript_results:
        for alternative in result.alternatives:
            document.add_paragraph(alternative.transcript)
            document.add_paragraph()  # Add an empty paragraph for spacing

    document.save(f'{gcs_uri.split("/")[-1][:-4]}.docx')
    print(f"Transcription saved to {gcs_uri.split('/')[-1][:-4]}.docx")
# 
if __name__ == "__main__":
    gcs_uri = "gs://bdav42/Spch2txt/AudioInput/Russi.wav"  # Replace with your GCS URI
    transcribe_and_save_docx(gcs_uri)

Waiting for operation to complete (timeout: up to 90 minutes)...
Transcription saved to Russi.docx


In [None]:
# import os

# from google.cloud.speech_v2 import SpeechClient
# from google.cloud.speech_v2.types import cloud_speech

# PROJECT_ID = os.getenv("verdant-branch-457906-a2")


# def transcribe_batch_gcs_input_inline_output_v2(
#     audio_uri: str,
# ) -> cloud_speech.BatchRecognizeResults:
#     """Transcribes audio from a Google Cloud Storage URI using the Google Cloud Speech-to-Text API.
#         The transcription results are returned inline in the response.
#     Args:
#         audio_uri (str): The Google Cloud Storage URI of the input audio file.
#             E.g., gs://[BUCKET]/[FILE]
#     Returns:
#         cloud_speech.BatchRecognizeResults: The response containing the transcription results.
#     """
#     # Instantiates a client
#     client = SpeechClient()

#     config = cloud_speech.RecognitionConfig(
#         auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
#         language_codes=["fa-IR","en-US"],
#         model="long",
#     )
#     # audio_uri = audio_uri.strip()
#     # if not audio_uri.startswith("gs://"):
#     #     raise ValueError("The audio URI must start with 'gs://'.")
#     audio_uri = f"{BUCKET_URI}/Spch2txt/AudioInput/{FILE_NAME}"
#     file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)

#     request = cloud_speech.BatchRecognizeRequest(
#         recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
#         config=config,
#         files=[file_metadata],
#         recognition_output_config=cloud_speech.RecognitionOutputConfig(
#             inline_response_config=cloud_speech.InlineOutputConfig(),
#         ),
#     )



In [None]:
    # # The request contains the configuration and the audio file metadata
    # # Transcribes the audio into text
    # operation = client.batch_recognize(request=request)

    # print("Waiting for operation to complete...")
    # response = operation.result(timeout=120)

    # for result in response.results[audio_uri].transcript.results:
    #     print(f"Transcript: {result.alternatives[0].transcript}")

    # return response.results[audio_uri].transcript

In [None]:
# import os

# from google.cloud.speech_v2 import SpeechClient
# from google.cloud.speech_v2.types import cloud_speech

# PROJECT_ID = os.getenv("verdant-branch-457906-a2")


# def transcribe_batch_gcs_input_inline_output_v2(
#     audio_uri: str,
# ) -> cloud_speech.BatchRecognizeResults:
#     """Transcribes audio from a Google Cloud Storage URI using the Google Cloud Speech-to-Text API.
#         The transcription results are returned inline in the response.
#     Args:
#         audio_uri (str): The Google Cloud Storage URI of the input audio file.
#             E.g., gs://[BUCKET]/[FILE]
#     Returns:
#         cloud_speech.BatchRecognizeResults: The response containing the transcription results.
#     """
#     # Instantiates a client
#     client = SpeechClient()

#     config = cloud_speech.RecognitionConfig(
#         auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
#         language_codes=["en-US"],
#         model="long",
#     )

#     file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)

#     request = cloud_speech.BatchRecognizeRequest(
#         recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
#         config=config,
#         files=[file_metadata],
#         recognition_output_config=cloud_speech.RecognitionOutputConfig(
#             inline_response_config=cloud_speech.InlineOutputConfig(),
#         ),
#     )

#     # Transcribes the audio into text
#     operation = client.batch_recognize(request=request)

#     print("Waiting for operation to complete...")
#     response = operation.result(timeout=120)

#     for result in response.results[audio_uri].transcript.results:
#         print(f"Transcript: {result.alternatives[0].transcript}")

#     return response.results[audio_uri].transcript


In [None]:
    # print("Waiting for operation to complete (timeout: up to 90 minutes)...")
    # response = operation.result(timeout=5400)  # 90 minutes * 60 seconds/minute
    # # Print the response for debugging
    # document = Document()
    # transcript_results = response.results[gcs_uri].transcript.results
    # for result in transcript_results:
    #     for alternative in result.alternatives:
    #         document.add_paragraph(alternative.transcript)
    #         if hasattr(alternative, "words"):
    #             for word_info in alternative.words:
    #                 document.add_paragraph(f"Word: {word_info.word}, Speaker: {getattr(word_info, 'speaker_tag', 'N/A')}")
    #         document.add_paragraph("-" * 20)
    # # Save the transcription to a DOCX file
    # document.save("transcription_1hour.docx")
    # print("Transcription saved to transcription_1hour.docx")