In [None]:
# Speech-to-Text with Google Cloud Speech API       ****    Basic Model    ****
# This script transcribes audio from a Google Cloud Storage (GCS) URI and saves the transcription to a DOCX file.       
# It uses the Google Cloud Speech-to-Text API with speaker diarization enabled.


from google.cloud import speech_v1p1beta1 as speech
from docx import Document

def transcribe_and_save_docx(gcs_uri):
    """Transcribes audio from GCS and saves the result to a DOCX file with an increased timeout."""
    # Set up the Google Cloud Speech client
    # Make sure to set the GOOGLE_APPLICATION_CREDENTIALS environment variable to your service account key file
    client = speech.SpeechClient()
    
    # Configure the audio file and recognition settings
    # The audio file must be in a format supported by the API (e.g., FLAC, WAV, MP3)
    audio = speech.RecognitionAudio(uri=gcs_uri)
    config = speech.RecognitionConfig(
        language_code="fa-IR",
        enable_speaker_diarization=True,
        diarization_config=speech.SpeakerDiarizationConfig(
            min_speaker_count=2,
            max_speaker_count=6,
        ),
    )
    # Perform the transcription with long-running recognition
    operation = client.long_running_recognize(config=config, audio=audio)
    print("Waiting for operation to complete (timeout: up to 90 minutes)...")
    response = operation.result(timeout=5400)  # 90 minutes * 60 seconds/minute
    # Print the response for debugging
    document = Document()
    for result in response.results:
        for alternative in result.alternatives:
            document.add_paragraph(alternative.transcript)
            for word_info in alternative.words:
                document.add_paragraph(f"Word: {word_info.word}, Speaker: {word_info.speaker_tag}")
            document.add_paragraph("-" * 20)
    # Save the transcription to a DOCX file
    document.save("transcription_1hour.docx")
    print("Transcription saved to transcription_1hour.docx")
# 
if __name__ == "__main__":
    gcs_uri = "gs://blob_speech/Spch2txt/AudioInput/file.wav"  # Replace with your GCS URI
    transcribe_and_save_docx(gcs_uri)

InvalidArgument: 400 Audio URI `Arvin_converted.wav` is an invalid GCS path.

In [None]:
from google.cloud import speech_v2
from google.cloud.speech_v2.types import cloud_speech

def transcribe_audio(audio_file):
    """Transcribes a short audio file using the chirp_2 model."""
    client = speech_v2.SpeechClient()

    with open(audio_file, "rb") as f:
        content = f.read()

    audio = cloud_speech.RecognitionAudio(content=content)
    config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        model="chirp_2",
        language_codes=["en-US"],  # Adjust language code as needed
    )

    request = cloud_speech.RecognizeRequest(config=config, audio=audio)

    try:
        response = client.recognize(request=request)
        for result in response.results:
            for alternative in result.alternatives:
                print(f"Transcript: {alternative.transcript}")
                print(f"Confidence: {alternative.confidence}")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    audio_file = "gs://blob_speech/Spch2txt/AudioInput/file.wav"  # Replace with the actual path to your audio file
    transcribe_audio(audio_file)

AttributeError: module 'google.cloud.speech_v2.types.cloud_speech' has no attribute 'RecognitionAudio'