In [1]:
import pandas as pd

In [46]:
import os
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
from google.api_core.client_options import ClientOptions

# Set your project ID, region, and GCS URI
PROJECT_ID = "etundrant"
REGION = "asia-southeast1"
GCS_URI = "gs://pdf_list/23_07_2024/Call_Recordings/bagcfdpckn_varsha_mayndraguti_olivaclinic_com_2024-05-28-17-17-45.wav"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\\All_Files\\Chirp_Google\\etundrant-ab7487fb20ac.json"

def transcribe_batch_gcs_input_inline_output_v2(audio_uri: str):
    """Transcribes audio from a Google Cloud Storage URI using Chirp model."""
    
    # Instantiates the client with the correct regional endpoint
    print("Initializing SpeechClient...")
    client = SpeechClient(client_options=ClientOptions(api_endpoint=f"{REGION}-speech.googleapis.com"))
    print("SpeechClient initialized.")

    config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        language_codes=["en-US"],  # English (US)
        model="chirp",  # Chirp model
    )
    print("Recognition config created.")

    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)
    print("File metadata set.")

    request = cloud_speech.BatchRecognizeRequest(
        recognizer=f"projects/{PROJECT_ID}/locations/{REGION}/recognizers/_",
        config=config,
        files=[file_metadata],
        recognition_output_config=cloud_speech.RecognitionOutputConfig(
            inline_response_config=cloud_speech.InlineOutputConfig(),
        ),
    )
    print("Batch recognize request created.")

    # Transcribes the audio into text
    print("Sending batch recognize request...")
    operation = client.batch_recognize(request=request)
    print("Waiting for operation to complete...")
    response = operation.result(timeout=120)  # Adjust timeout as necessary
    print("Operation completed.")

    # Access the transcription results from the inline_result
    if response.results:
        print("Transcription Results found:")
        for result_key, result_value in response.results.items():
            if result_value.inline_result and result_value.inline_result.transcript.results:
                for res in result_value.inline_result.transcript.results:
                    for alternative in res.alternatives:
                        print(f"Transcript: {alternative.transcript}")
            else:
                print("No alternatives found.")
    else:
        print("No results found.")

# Call the function
print("Starting transcription process...")
transcribe_batch_gcs_input_inline_output_v2(GCS_URI)


Starting transcription process...
Initializing SpeechClient...
SpeechClient initialized.
Recognition config created.
File metadata set.
Batch recognize request created.
Sending batch recognize request...
Waiting for operation to complete...
Operation completed.
Transcription Results found:
Transcript:  good evening thank you for calling oliva skin hair and weight loss clinic this is varsha home for the day maine appointment pick book kiya tha py ke naam se ok aapka number boliye 706115 706115 4492 okay be online pay right okay yes kal ke liye book kiye the naam
Transcript:  5:30 fir aaj ke liye schedule karaya fir se aaj ke liye ha ha reschedule kare the kya ha reschedule aaj ke liye phle aaj ke liye karwaya tha fir kal ke liye karwaya because of emergency schedule karwa diya tha aaj ke liye hi hona fir theek hai line pe rahiye ma batangi aapko line pe hai ok actually problem ye hai ki wo appointment
Transcript:  google map pe mariye mam google map me search kariye oliva skin hair and 

In [49]:
import os
from google.cloud import speech_v2
from google.cloud import translate_v2 as translate
from google.api_core.client_options import ClientOptions

# Set your project ID, region, and GCS URI
PROJECT_ID = "etundrant"
REGION = "asia-southeast1"
GCS_URI = "gs://pdf_list/23_07_2024/Call_Recordings/bagcfdpckn_varsha_mayndraguti_olivaclinic_com_2024-05-28-17-17-45.wav"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\\All_Files\\Chirp_Google\\etundrant-ab7487fb20ac.json"

def translate_text(text, target_language="en"):
    """Translates text to the target language."""
    translate_client = translate.Client()
    result = translate_client.translate(text, target_language=target_language)
    return result["translatedText"]

def transcribe_and_translate_batch_gcs_input_inline_output_v2(audio_uri: str):
    """Transcribes audio from a Google Cloud Storage URI using Chirp model and translates to English."""
    
    print("Initializing SpeechClient...")
    client = speech_v2.SpeechClient(client_options=ClientOptions(api_endpoint=f"{REGION}-speech.googleapis.com"))
    print("SpeechClient initialized.")

    config = speech_v2.RecognitionConfig(
        auto_decoding_config=speech_v2.AutoDetectDecodingConfig(),
        language_codes=["hi-IN"],  # Hindi
        model="chirp",  # Chirp model
    )
    print("Recognition config created.")

    file_metadata = speech_v2.BatchRecognizeFileMetadata(uri=audio_uri)
    print("File metadata set.")

    request = speech_v2.BatchRecognizeRequest(
        recognizer=f"projects/{PROJECT_ID}/locations/{REGION}/recognizers/_",
        config=config,
        files=[file_metadata],
        recognition_output_config=speech_v2.RecognitionOutputConfig(
            inline_response_config=speech_v2.InlineOutputConfig(),
        ),
    )
    print("Batch recognize request created.")

    print("Sending batch recognize request...")
    operation = client.batch_recognize(request=request)
    print("Waiting for operation to complete...")
    response = operation.result(timeout=120)  # Adjust timeout as necessary
    print("Operation completed.")

    if response.results:
        print("Transcription and Translation Results:")
        for result_key, result_value in response.results.items():
            if result_value.inline_result and result_value.inline_result.transcript.results:
                for res in result_value.inline_result.transcript.results:
                    for alternative in res.alternatives:
                        hindi_transcript = alternative.transcript
                        english_translation = translate_text(hindi_transcript)
                        print(f"Hindi Transcript: {hindi_transcript}")
                        print(f"English Translation: {english_translation}")
                        print("---")
            else:
                print("No alternatives found.")
    else:
        print("No results found.")

# Call the function
print("Starting transcription and translation process...")
transcribe_and_translate_batch_gcs_input_inline_output_v2(GCS_URI)

Starting transcription and translation process...
Initializing SpeechClient...
SpeechClient initialized.
Recognition config created.
File metadata set.
Batch recognize request created.
Sending batch recognize request...
Waiting for operation to complete...
Operation completed.
Transcription and Translation Results:
Hindi Transcript:  गुड इवनिंग थैंक यू फॉलिंग एंड वेट लॉस क्लिनिक दिस इस वर्षा फॉर द डे बोलिए मैंने अपॉइंटमेंट पिक बुक किया था पायल सिंह के नाम से ओके आपका नंबर बोलिए 706150615 4492 ओके बी ऑनलाइन पायल सिंह राइट ओके यस कल के लिए बुक किए थे ना
English Translation:  good evening thank you falling and weight loss clinic this is varsa for the day tell me i had booked an appointment pick in the name of payal singh ok tell me your number 706150615 4492 ok be online payal singh right ok yes i had booked it for tomorrow
---
Hindi Transcript:  में 5:30 फिर आज के लिए शेड्यूल करवाया फिर से आज के लिए हां हां रिस्केड्यूल के लिए करे थे क्या हां रिश्केड्यूल आज के लिए पहले आज के लिए करवाया था

In [48]:
pip install google-cloud-translate

Collecting google-cloud-translate
  Downloading google_cloud_translate-3.16.0-py2.py3-none-any.whl.metadata (5.3 kB)
Downloading google_cloud_translate-3.16.0-py2.py3-none-any.whl (175 kB)
   ---------------------------------------- 0.0/175.8 kB ? eta -:--:--
   ---------------------------------------- 0.0/175.8 kB ? eta -:--:--
   -- ------------------------------------- 10.2/175.8 kB ? eta -:--:--
   --------- ----------------------------- 41.0/175.8 kB 393.8 kB/s eta 0:00:01
   ------------------------------------- -- 163.8/175.8 kB 1.2 MB/s eta 0:00:01
   ---------------------------------------- 175.8/175.8 kB 1.1 MB/s eta 0:00:00
Installing collected packages: google-cloud-translate
Successfully installed google-cloud-translate-3.16.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [42]:
import os
from google.cloud import storage
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
from google.api_core.client_options import ClientOptions

# Set project ID
PROJECT_ID = "etundrant"
REGION = "asia-southeast1"
BUCKET_NAME = "pdf_list"
FOLDER_PATH = "23_07_2024/Call_Recordings/"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\\All_Files\\Chirp_Google\\etundrant-ab7487fb20ac.json"

def list_wav_files(bucket_name: str, folder_path: str):
    """Lists all .wav files in the specified Google Cloud Storage folder."""
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(bucket_name, prefix=folder_path)
    wav_files = [f"gs://{bucket_name}/{blob.name}" for blob in blobs if blob.name.endswith('.wav')]
    return wav_files

def transcribe_batch_multiple_files_v2(audio_uris, gcs_output_path):
    """Transcribes audio from multiple Google Cloud Storage URIs using the Chirp model."""
    
    # Initialize SpeechClient with correct regional endpoint
    print("Initializing SpeechClient...")
    client = SpeechClient(client_options=ClientOptions(api_endpoint=f"{REGION}-speech.googleapis.com"))
    print("SpeechClient initialized.")

    # Recognition configuration
    config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        language_codes=["en-US"],  # Set to English (US)
        model="chirp",  # Chirp model
    )

    # Process files in batches of 15
    for i in range(0, len(audio_uris), 15):
        batch_files = audio_uris[i:i+15]
        files = [cloud_speech.BatchRecognizeFileMetadata(uri=uri) for uri in batch_files]
        
        request = cloud_speech.BatchRecognizeRequest(
            recognizer=f"projects/{PROJECT_ID}/locations/{REGION}/recognizers/_",
            config=config,
            files=files,
            recognition_output_config=cloud_speech.RecognitionOutputConfig(
                gcs_output_config=cloud_speech.GcsOutputConfig(uri=gcs_output_path),
            ),
        )

        # Send the batch recognize request
        print(f"Sending batch recognize request for files {i+1} to {i+len(batch_files)}...")
        operation = client.batch_recognize(request=request)
        print("Waiting for operation to complete...")
        response = operation.result(timeout=1200)  # Adjust timeout for long audio files
        print("Operation completed.")

        # Fetching and printing transcription results
        print("Fetching transcription results:")
        for uri in batch_files:
            if uri in response.results:
                result_value = response.results[uri]
                if result_value.inline_result and result_value.inline_result.transcript.results:
                    for res in result_value.inline_result.transcript.results:
                        for alternative in res.alternatives:
                            print(f"Transcript for {uri}: {alternative.transcript}")
                else:
                    print(f"No alternatives found for {uri}")
            else:
                print(f"No results found for {uri}")

# Get list of all .wav files from the specified folder
audio_uris = list_wav_files(BUCKET_NAME, FOLDER_PATH)
gcs_output_path = f"gs://{BUCKET_NAME}/transcriptions/"
transcribe_batch_multiple_files_v2(audio_uris, gcs_output_path)

Initializing SpeechClient...
SpeechClient initialized.
Transcribing gs://pdf_list/23_07_2024/Call_Recordings/bagcfdoaln_gangadhara_gb_olivaclinic_com_2024-07-09-18-38-36.wav...
Operation completed.
Transcript:  very good evening thank you for calling oliva skin hair weight loss cleaning gangadhary i help you yes hello hi i'm actually calling from ashok vihar and i was wondering if yeah i was wondering if you treat issues regarding premature brain of hair premature gray hair you are talking about premature gray hair i'm so sorry for this particular concern we don't have treatment at v like if it is any air loss
Detected Language: en
Transcript:  for alopia we have the treatment sorry you have treatment for hair loss dandruff and alopatia okay okay okay gray hair we don't have okay okay no problem thank you thank you for okay in future if you need any skin or hair related concern please give a call
Detected Language: en
Initializing SpeechClient...
SpeechClient initialized.
Transcribing 

InvalidArgument: 400 Audio can be of a maximum of 60 seconds.

In [45]:
import os
from google.cloud import storage
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
from google.api_core.client_options import ClientOptions

# Set project ID and environment variables
PROJECT_ID = "etundrant"
REGION = "asia-southeast1"
BUCKET_NAME = "pdf_list"
FOLDER_PATH = "23_07_2024/Call_Recordings/"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\\All_Files\\Chirp_Google\\etundrant-ab7487fb20ac.json"

def list_wav_files(bucket_name: str, folder_path: str):
    """Lists all .wav files in the specified Google Cloud Storage folder."""
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(bucket_name, prefix=folder_path)
    wav_files = [f"gs://{bucket_name}/{blob.name}" for blob in blobs if blob.name.endswith('.wav')]
    return wav_files

def transcribe_batch_multiple_files_v2(audio_uris, gcs_output_path):
    """Transcribes long audio files using auto language detection and Chirp model."""
    
    # Initialize SpeechClient with correct regional endpoint
    print("Initializing SpeechClient...")
    client = SpeechClient(client_options=ClientOptions(api_endpoint=f"{REGION}-speech.googleapis.com"))
    print("SpeechClient initialized.")

    # Recognition configuration with auto language detection
    config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        language_codes=["auto"],  # Auto-detect language
        model="chirp",  # Chirp model for large files
    )

    # Process files in batches of 15
    for i in range(0, len(audio_uris), 15):
        batch_files = audio_uris[i:i+15]
        files = [cloud_speech.BatchRecognizeFileMetadata(uri=uri) for uri in batch_files]
        
        request = cloud_speech.BatchRecognizeRequest(
            recognizer=f"projects/{PROJECT_ID}/locations/{REGION}/recognizers/_",
            config=config,
            files=files,
            recognition_output_config=cloud_speech.RecognitionOutputConfig(
                gcs_output_config=cloud_speech.GcsOutputConfig(uri=gcs_output_path),
            ),
        )

        # Send the batch recognize request
        print(f"Sending batch recognize request for files {i+1} to {i+len(batch_files)}...")
        operation = client.batch_recognize(request=request)
        print("Waiting for operation to complete...")
        response = operation.result(timeout=1800)  # Adjust timeout for long audio files
        print("Operation completed.")

        # Fetching and printing transcription results
        print("Fetching transcription results:")
        for uri in batch_files:
            if uri in response.results:
                result_value = response.results[uri]
                if result_value.inline_result and result_value.inline_result.transcript.results:
                    for res in result_value.inline_result.transcript.results:
                        for alternative in res.alternatives:
                            print(f"Transcript for {uri}: {alternative.transcript}")
                else:
                    print(f"No alternatives found for {uri}")
            else:
                print(f"No results found for {uri}")

# Get list of all .wav files from the specified folder
audio_uris = list_wav_files(BUCKET_NAME, FOLDER_PATH)
gcs_output_path = f"gs://{BUCKET_NAME}/transcriptions/"
transcribe_batch_multiple_files_v2(audio_uris, gcs_output_path)

Initializing SpeechClient...
SpeechClient initialized.
Sending batch recognize request for files 1 to 15...
Waiting for operation to complete...
Operation completed.
Fetching transcription results:
No alternatives found for gs://pdf_list/23_07_2024/Call_Recordings/bagcfdoaln_gangadhara_gb_olivaclinic_com_2024-07-09-18-38-36.wav
No alternatives found for gs://pdf_list/23_07_2024/Call_Recordings/bagcfdpckn_sameer_mohammed_olivaclinic_com_2024-07-09-18-45-00.wav
No alternatives found for gs://pdf_list/23_07_2024/Call_Recordings/bagcfmfoio_pradeep_mr_olivaclinic_com_2024-07-09-17-34-00.wav
No alternatives found for gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_102297_2024-07-09-11-16-32.wav
No alternatives found for gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_102506_2024-07-09-10-57-21.wav
No alternatives found for gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_102506_2024-07-09-13-17-16.wav
No alternatives found for gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_10

In [50]:
import os
import json
from google.cloud import speech_v2
from google.cloud import translate_v2 as translate
from google.api_core.client_options import ClientOptions
from google.cloud import storage

# Set your project ID, region, and GCS URI
PROJECT_ID = "etundrant"
REGION = "asia-southeast1"
GCS_FOLDER_URI = "gs://pdf_list/23_07_2024/Call_Recordings/"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\\All_Files\\Chirp_Google\\etundrant-ab7487fb20ac.json"

def list_audio_files(gcs_folder_uri: str):
    """List all files in a GCS folder."""
    storage_client = storage.Client()
    bucket_name = gcs_folder_uri.replace("gs://", "").split("/")[0]
    prefix = "/".join(gcs_folder_uri.replace("gs://", "").split("/")[1:])
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=prefix)
    audio_files = [f"gs://{bucket_name}/{blob.name}" for blob in blobs if blob.name.endswith(".wav")]
    return audio_files

def translate_text(text, target_language="en"):
    """Translates text to the target language."""
    translate_client = translate.Client()
    result = translate_client.translate(text, target_language=target_language)
    return result["translatedText"]

def transcribe_and_translate_audio(client, audio_uri: str):
    """Transcribes and translates an individual audio file."""
    config = speech_v2.RecognitionConfig(
        auto_decoding_config=speech_v2.AutoDetectDecodingConfig(),
        language_codes=["hi-IN"],  # Hindi
        model="chirp",  # Chirp model
    )

    file_metadata = speech_v2.BatchRecognizeFileMetadata(uri=audio_uri)
    request = speech_v2.BatchRecognizeRequest(
        recognizer=f"projects/{PROJECT_ID}/locations/{REGION}/recognizers/_",
        config=config,
        files=[file_metadata],
        recognition_output_config=speech_v2.RecognitionOutputConfig(
            inline_response_config=speech_v2.InlineOutputConfig(),
        ),
    )

    operation = client.batch_recognize(request=request)
    response = operation.result(timeout=120)

    transcriptions = []
    if response.results:
        for result_key, result_value in response.results.items():
            if result_value.inline_result and result_value.inline_result.transcript.results:
                for res in result_value.inline_result.transcript.results:
                    for alternative in res.alternatives:
                        hindi_transcript = alternative.transcript
                        english_translation = translate_text(hindi_transcript)
                        transcriptions.append({
                            "hindi_transcript": hindi_transcript,
                            "english_translation": english_translation
                        })
    return transcriptions

def transcribe_and_translate_batch_gcs_folder(gcs_folder_uri: str):
    """Transcribes and translates all audio files in a GCS folder and saves to a JSON file."""
    # List all audio files in the folder
    audio_files = list_audio_files(gcs_folder_uri)

    if not audio_files:
        print("No audio files found in the folder.")
        return

    print(f"Found {len(audio_files)} audio files.")

    # Initialize SpeechClient
    client = speech_v2.SpeechClient(client_options=ClientOptions(api_endpoint=f"{REGION}-speech.googleapis.com"))

    # Prepare output data
    output_data = []

    # Process each audio file
    for audio_file in audio_files:
        print(f"Processing file: {audio_file}")
        transcriptions = transcribe_and_translate_audio(client, audio_file)
        output_data.append({
            "audio_file": audio_file,
            "transcriptions": transcriptions
        })

    # Save output to a JSON file
    output_file = "transcriptions_and_translations.json"
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(output_data, f, ensure_ascii=False, indent=4)

    print(f"All transcriptions and translations have been saved to {output_file}.")

# Call the function
print("Starting transcription and translation process for all files...")
transcribe_and_translate_batch_gcs_folder(GCS_FOLDER_URI)


Starting transcription and translation process for all files...
Found 17 audio files.
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bagcfdoaln_gangadhara_gb_olivaclinic_com_2024-07-09-18-38-36.wav
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bagcfdpckn_sameer_mohammed_olivaclinic_com_2024-07-09-18-45-00.wav
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bagcfdpckn_varsha_mayndraguti_olivaclinic_com_2024-05-28-17-17-45.wav
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bagcfmfoio_pradeep_mr_olivaclinic_com_2024-07-09-17-34-00.wav
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_102297_2024-07-09-11-16-32.wav
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_102506_2024-07-09-10-57-21.wav
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_102506_2024-07-09-13-17-16.wav
Processing file: gs://pdf_list/23_07_2024/Call_Recordings/bdchmaddnj_102521_2024-07-09-16-21-15.wav
Processing file: g

In [None]:
transcriptions_and_translations.json

NameError: name 'transcriptions_and_translations' is not defined