In [5]:
from google.cloud import speech

client = speech.SpeechClient()

gcs_uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"

audio = speech.RecognitionAudio(uri=gcs_uri)

config = speech.RecognitionConfig(
    encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz = 16000,
    language_code="en-US",
)

response = client.recognize(config = config, audio = audio)

for result in response.results:
    print("Transcript: {}".format(result.alternatives[0].transcript))

I0000 00:00:1723354289.821774    2075 check_gcp_environment.cc:61] BIOS data file does not exist or cannot be opened.


Transcript: how old is the Brooklyn Bridge


In [7]:
len(response.results)

1

## Using LangChain

In [2]:
from langchain_core.messages import HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI
from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory
import json

## Using VertexAI

In [4]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part
import json
from google.cloud import speech

In [24]:
with open("environments/env.json") as f:
    env = json.load(f)

client = speech.SpeechClient()
gcs_uri = env["sample_audio_file"]
audio = speech.RecognitionAudio(uri=gcs_uri)

config = speech.RecognitionConfig(
    encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz = 16000,
    language_code="en-US",
)

s2t_response = client.recognize(config = config, audio = audio)

vertexai.init(project = env["project_id"], location=env["location"])
multimodal_model = GenerativeModel("gemini-1.5-flash-001")
for result in s2t_response.results:
    response = multimodal_model.generate_content(
        [
            result.alternatives[0].transcript
        ]
    )
    print("Transcript: {}".format(result.alternatives[0].transcript))
    print("Answer: {}".format(response.candidates[0].content.parts[0]))

Transcript: how old is the Brooklyn Bridge
Answer: text: "The Brooklyn Bridge was completed in **1883**, so it\'s currently **140 years old** (as of 2023). \n"



In [21]:
print(response.candidates[0].content.parts[0])

text: "The Brooklyn Bridge opened to the public on **May 24, 1883**. \n\nTherefore, as of today (October 27, 2023), the Brooklyn Bridge is **140 years old**. \n"



'ksst-project-one'