# <font color="blue">AI 應用開發</font> <font size=4>by Enos Chou</font>

<b>Focus</b>
<ol>
    <li>Google Cloud Speech-to-Text API</li>
    <li>Google Cloud Vision API</li>
    <li>Gemini API</li>
    <li>Google Cloud Firestore for Vector</li>
    <li>AI Agent ft. LangGraph + Gemini</li>
    <li>Google Cloud Text-to-Speech API ft. Gemini</li>
    <li>OpenAI API ft. Azure gpt-4o-mini-tts</li>
</ol>

<b>Language</b>
<ul>
    <li type="None">Python 3.12</li>
</ul>

## Google Cloud Speech-to-Text API

<b>GCP Role</b>
<ul>
    <li type="None"><font color="gray">(optional) Storage 物件使用者</font></li>
</ul>

<b>Dependencies</b>
<ul>
    <li type="None">google-cloud-speech</li>
    <li type="None"><font color="gray">(optional) google-cloud-storage</font></li>
</ul>

In [None]:
!pip install google-cloud-speech

### 初始化

In [None]:
# case 1: init by assignment

from google.cloud import speech

YOUR_SERVICE = 'YOUR_SERVICE'

speech_client = speech.SpeechClient.from_service_account_json(YOUR_SERVICE)

In [None]:
# case 2: init by environment

import os

from google.cloud import speech

YOUR_SERVICE = 'YOUR_SERVICE'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
speech_client = speech.SpeechClient()

### 準備音檔

In [None]:
# case 1: one-shot upload 

YOUR_AUDIO = 'YOUR_AUDIO'

# prepare audio content
with open(YOUR_AUDIO, 'rb') as f:
    content = f.read()
audio = speech.RecognitionAudio(content=content)

In [None]:
# case 2: through GCS

from google.cloud import storage

YOUR_BUCKET = 'YOUR_BUCKET'
YOUR_AUDIO = 'YOUR_AUDIO'

# prepare audio content
storage_client = storage.Client.from_service_account_json(YOUR_SERVICE)
bucket = storage_client.bucket(YOUR_BUCKET)
bucket.blob(YOUR_AUDIO).upload_from_filename(YOUR_AUDIO)
uri = f'gs://{YOUR_BUCKET}/{YOUR_AUDIO}'
audio = speech.RecognitionAudio(uri=uri)

### recognize

In [None]:
# transcript the audio
config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.MP3,  # omit this line if WAV
    sample_rate_hertz=44100,
    audio_channel_count=2,  # take care, default is 1
    language_code="zh-TW",
    max_alternatives=10
)
response = speech_client.recognize(config=config, audio=audio)

# get response
for r in response.results:
    print(f'{r.alternatives[0].transcript}, {r.alternatives[0].confidence:.3f}')

## Google Cloud Vision API

<b>GCP Role</b>
<ul>
    <li type="None"><font color="gray">(optional) Storage 物件使用者</font></li>
</ul>

<b>Dependencies</b>
<ul>
    <li type="None">google-cloud-vision</li>
    <li type="None"><font color="gray">(optional) google-cloud-storage</font></li>
    <li type="None"><font color="gray">(optional) matplotlib</font></li>
    <li type="None"><font color="gray">(optional) pillow</font></li>
</ul>

In [None]:
!pip install google-cloud-vision matplotlib pillow

### 初始化

In [None]:
# case 1: init by assignment

from google.cloud import vision

YOUR_SERVICE = 'YOUR_SERVICE'

client = vision.ImageAnnotatorClient.from_service_account_json(YOUR_SERVICE)

In [None]:
# case 2: init by environment

import os

from google.cloud import vision

YOUR_SERVICE = 'YOUR_SERVICE'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
client = vision.ImageAnnotatorClient()

### 準備圖檔

In [None]:
# case 1: one-shot upload

YOUR_PIC = 'YOUR_PIC'

with open(YOUR_PIC, 'rb') as image_file:
    content = image_file.read()
image = vision.Image(content=content)

In [None]:
# case 2: through GCS

from google.cloud import storage

YOUR_BUCKET = 'YOUR_BUCKET'
YOUR_PIC = 'YOUR_PIC'

storage_client = storage.Client()
bucket = storage_client.bucket(YOUR_BUCKET)
bucket.blob(YOUR_PIC).upload_from_filename(YOUR_PIC)
image_uri = f'gs://{YOUR_BUCKET}/{YOUR_PIC}'
source = vision.ImageSource(image_uri=image_uri)
image = vision.Image(source=source)
#image = vision.Image()
#image.source.image_uri = image_uri

### label_detection

In [None]:
response = client.label_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)
plt.imshow(im)
plt.show()

for label in response.label_annotations:
    print(f'{label.description}, {label.score:.3f}')

### face_detection

In [None]:
response = client.face_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)

plt.imshow(im)

for face in response.face_annotations:
    vs = [{'v': face.bounding_poly.vertices, 'c': 'red'},
          {'v': face.fd_bounding_poly.vertices, 'c': 'blue'}]
    for vd in vs:
        a = [(v.x, v.y) for v in vd['v']]
        a.append(a[0])
        x, y = zip(*a)
        plt.plot(x, y, color=vd['c'])

plt.show()

for face in response.face_annotations:
    print(f'joy: {face.joy_likelihood.name}')
    print(f'sorrow: {face.sorrow_likelihood.name}')
    print(f'anger: {face.anger_likelihood.name}')
    print(f'surprise: {face.surprise_likelihood.name}')
    print(f'under_exposed: {face.under_exposed_likelihood.name}')
    print(f'blurred: {face.blurred_likelihood.name}')
    print(f'headwear: {face.headwear_likelihood.name}')

### text_detection

In [None]:
response = client.text_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)

plt.imshow(im)

for text in response.text_annotations:
    print(text.description)
    a = [(v.x, v.y) for v in text.bounding_poly.vertices]
    a.append(a[0])
    x, y = zip(*a)
    plt.plot(x, y, color='blue')

plt.show()

### document_text_detection

In [None]:
response = client.document_text_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)

plt.imshow(im)

for text in response.text_annotations:
    print(text.description)
    a = [(v.x, v.y) for v in text.bounding_poly.vertices]
    a.append(a[0])
    x, y = zip(*a)
    plt.plot(x, y, color='blue')

plt.show()

### object_localization

In [None]:
response = client.object_localization(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)

plt.imshow(im)

w, h = im.size
for obj in response.localized_object_annotations:
    desc = f'{obj.name}: {obj.score:.2f}'
    a = [(v.x * w, v.y * h) for v in obj.bounding_poly.normalized_vertices]
    a.append(a[0])
    x, y = zip(*a)
    plt.plot(x, y, color='blue')
    plt.text(x[0], y[0], desc, color='yellow')

plt.show()

### web_detection

In [None]:
response = client.web_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)
plt.imshow(im)
plt.show()

for x in response.web_detection.web_entities:
    print(x.description)
for x in response.web_detection.visually_similar_images :
    print(x.url)
for x in response.web_detection.best_guess_labels:
    print(x) 

### landmark_detection

In [None]:
# one-shot upload

YOUR_PIC = 'YOUR_PIC'

with open(YOUR_PIC, 'rb') as image_file:
    content = image_file.read()
image = vision.Image(content=content)

In [None]:
response = client.landmark_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)

plt.imshow(im)

for text in response.landmark_annotations:
    desc = f'{text.description} @ ' \
           f'({text.locations[0].lat_lng.latitude}, {text.locations[0].lat_lng.longitude})'
    print(desc)
    a = [(v.x, v.y) for v in text.bounding_poly.vertices]
    a.append(a[0])
    x, y = zip(*a)
    plt.plot(x, y, color='blue')
    plt.text(x[0], y[0], desc, color='yellow')

plt.show()

### logo_detection

In [None]:
# one-shot upload

YOUR_PIC = 'YOUR_PIC'

with open(YOUR_PIC, 'rb') as image_file:
    content = image_file.read()
image = vision.Image(content=content)

In [None]:
response = client.logo_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)

plt.imshow(im)

for logo in response.logo_annotations :
    print(logo.description)
    a = [(v.x, v.y) for v in logo.bounding_poly.vertices]
    a.append(a[0])
    x, y = zip(*a)
    plt.plot(x, y, color='blue')
    plt.text(x[0], y[0], logo.description, color='yellow')

plt.show()

### safe_search_detection

In [None]:
# one-shot upload

YOUR_PIC = 'YOUR_PIC'

with open(YOUR_PIC, 'rb') as image_file:
    content = image_file.read()
image = vision.Image(content=content)

In [None]:
response = client.safe_search_detection(image=image)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

im = Image.open(YOUR_PIC)
plt.imshow(im)
plt.show()

print('adult:', response.safe_search_annotation.adult.name)
print('spoof:', response.safe_search_annotation.spoof.name)
print('medical:', response.safe_search_annotation.medical.name)
print('violence:', response.safe_search_annotation.violence.name)
print('racy:', response.safe_search_annotation.racy.name)

## Gemini API

<b>GCP Role</b>
<ul>
    <li type="None">Vertex AI 使用者</li>
</ul>

<b>Dependency</b>
<ul>
    <li type="None">google-genai</li>
</ul>

<b>Available MIME Types</b>

<i>Document</i> <a href="https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/document-understanding#document-requirements">[ref]</a>
<ul><tt>
    <li type="None">application/pdf</li>
    <li type="None">text/plain</li>
</tt></ul>

<i>Audio</i> <a href="https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/audio-understanding#audio-requirements">[ref]</a>
<ul><tt>
    <li type="None">audio/aac</li>
    <li type="None">audio/flac</li>
    <li type="None">audio/mp3</li>
    <li type="None">audio/m4a</li>
    <li type="None">audio/mpeg</li>
    <li type="None">audio/mpga</li>
    <li type="None">audio/mp4</li>
    <li type="None">audio/opus</li>
    <li type="None">audio/pcm</li>
    <li type="None">audio/wav</li>
    <li type="None">audio/webm</li>
</tt></ul>

<i>Image</i> <a href="https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements">[ref]</a>
<ul><tt>
    <li type="None">image/png</li>
    <li type="None">image/jpeg</li>
    <li type="None">image/webp</li>
</tt></ul>

<i>Video</i> <a href="https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/video-understanding#video-requirements">[ref]</a>
<ul><tt>
    <li type="None">video/x-flv</li>
    <li type="None">video/quicktime</li>
    <li type="None">video/mpeg</li>
    <li type="None">video/mpegps</li>
    <li type="None">video/mpg</li>
    <li type="None">video/mp4</li>
    <li type="None">video/webm</li>
    <li type="None">video/wmv</li>
    <li type="None">video/3gpp</li>
</tt></ul>

In [None]:
!pip install google-genai

### 1. 車牌辨識

In [None]:
import os
from time import time

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig


YOUR_JPG = 'YOUR_JPG'
YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_PROMPT = 'YOUR_PROMPT'

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# parameters
config = GenerateContentConfig(
    max_output_tokens=100,
    temperature=0
)

# prompt
prompt = Part.from_text(text=YOUR_PROMPT)

# data
with open(YOUR_JPG, 'rb') as f:
    data = f.read()
data = Part.from_bytes(data=data, mime_type='image/jpeg')
contents = [Content(role='user', parts=[prompt, data])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
print(r.text)
print(r.usage_metadata)

### 2. 健檢報告分析

In [None]:
import os
from time import time
import json
from pprint import pprint

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig


YOUR_JPG = 'YOUR_JPG'
YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_PROMPT = 'YOUR_PROMPT'

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# parameters
config = GenerateContentConfig(
    max_output_tokens=2000,
    temperature=0,
    response_mime_type='application/json'
)

# prompt
prompt = Part.from_text(text=YOUR_PROMPT)

# data
with open(YOUR_JPG, 'rb') as f:
    data = f.read()
data = Part.from_bytes(data=data, mime_type='image/jpeg')
contents = [Content(role='user', parts=[prompt, data])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
pprint(json.loads(r.text))
print(r.usage_metadata)

### 3. 文件摘要

In [None]:
import os
from time import time

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig


YOUR_PDF = 'YOUR_PDF'
YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_PROMPT = 'YOUR_PROMPT'

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# parameters
config = GenerateContentConfig(
    max_output_tokens=2000,
    temperature=1.4
)

# prompt
prompt = Part.from_text(text=YOUR_PROMPT)

# data
with open(YOUR_PDF, 'rb') as f:
    data = f.read()
data = Part.from_bytes(data=data, mime_type='application/pdf')
contents = [Content(role='user', parts=[prompt, data])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
print(r.text)
print(r.usage_metadata)

### 4. 影評歸納

In [None]:
import os
from time import time
import json
from pprint import pprint

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_PROMPT = 'YOUR_PROMPT'

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# parameters
config = GenerateContentConfig(
    max_output_tokens=1024,
    temperature=0
)

# prompt
prompt = Part.from_text(text=YOUR_PROMPT)

# data
data = input('請輸入影評：')
data = Part.from_text(text=data)
contents = [Content(role='user', parts=[prompt, data])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
pprint(json.loads(r.text.strip('json`\n')))
print(r.usage_metadata)

In [None]:
import os
from time import time
from pprint import pprint

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_PROMPT = 'YOUR_PROMPT'
YOUR_RESPONSE_SCHEMA = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "abstract": {"type": "string"},
        "review": {"type": "string", "enum": ["正評", "中立", "負評"]}
    },
    "required": ["name", "abstract", "review"]
}

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# parameters
config = GenerateContentConfig(
    max_output_tokens=1024,
    temperature=0,
    response_mime_type='application/json',
    response_schema=YOUR_RESPONSE_SCHEMA
)

# prompt
prompt = Part.from_text(text=YOUR_PROMPT)

# data
data = input('請輸入影評：')
data = Part.from_text(text=data)
contents = [Content(role='user', parts=[prompt, data])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
pprint(r.parsed)
print(r.usage_metadata)

### 5. 文字聊天

In [None]:
import os
from time import time

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_SYSTEM_INSTRUCTION = 'YOUR_SYSTEM_INSTRUCTION'
YOUR_SAFETY_SETTINGS = [
    {'category': 'HARM_CATEGORY_HATE_SPEECH', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_HARASSMENT', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'threshold': 'BLOCK_NONE'}
]
YOUR_CHAT = 'YOUR_CHAT'

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# parameters
config = GenerateContentConfig(
    max_output_tokens=1024,
    temperature=1.5,
    system_instruction=YOUR_SYSTEM_INSTRUCTION,
    safety_settings=YOUR_SAFETY_SETTINGS
)

# prompt
chat = Part.from_text(text=YOUR_CHAT)

# data
contents = [Content(role='user', parts=[chat])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
print(r.text)
print(r.usage_metadata)

In [None]:
YOUR_CHAT = 'YOUR_CHAT'

# prompt
chat = Part.from_text(text=YOUR_CHAT)

# data
contents = [Content(role='user', parts=[chat])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
print(r.text)
print(r.usage_metadata)

In [None]:
import os
from time import time

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_SYSTEM_INSTRUCTION = '''YOUR_SYSTEM_INSTRUCTION'''
YOUR_SAFETY_SETTINGS = [
    {'category': 'HARM_CATEGORY_HATE_SPEECH', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_HARASSMENT', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'threshold': 'BLOCK_NONE'}
]
YOUR_CHAT = 'YOUR_CHAT'

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# parameters
config = GenerateContentConfig(
    max_output_tokens=1024,
    temperature=1.5,
    system_instruction=YOUR_SYSTEM_INSTRUCTION,
    safety_settings=YOUR_SAFETY_SETTINGS
)

# prompt
chat = Part.from_text(text=YOUR_CHAT)

# data
contents = [Content(role='user', parts=[chat])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
print(r.text)
print(r.usage_metadata)

In [None]:
YOUR_CHAT = 'YOUR_CHAT'

# prompt
previous_chat, previous_res = chat, Part.from_text(text=r.text)
chat = Part.from_text(text=YOUR_CHAT)

# data
contents = [Content(role='user', parts=[previous_chat]),
            Content(role='model', parts=[previous_res]),
            Content(role='user', parts=[chat])
           ]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
print(r.text)
print(r.usage_metadata)

### 6. 聊天查詢真實天氣
ref 1: https://ai.google.dev/gemini-api/docs/function-calling?hl=zh-tw<br>
ref 2: https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/function-calling/intro_function_calling.ipynb

In [None]:
# Step One

import os
from time import time

from google import genai
from google.genai.types import Part, Content, GenerateContentConfig, FunctionDeclaration, Tool
import wea


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_REGION = 'YOUR_REGION'
YOUR_SYSTEM_INSTRUCTION = '你專注於查詢並回報台灣各地天氣，總是引導客戶詢問正確的地理位置，藉由工具計算的結果回覆天氣資訊'
YOUR_SAFETY_SETTINGS = [
    {'category': 'HARM_CATEGORY_HATE_SPEECH', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_HARASSMENT', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'threshold': 'BLOCK_NONE'},
    {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'threshold': 'BLOCK_NONE'}
]
YOUR_CHAT = 'YOUR_CHAT'

# initialization
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

# function declaration
wea_fndec = FunctionDeclaration(
    name='get_taiwan_weather',
    description="Get current weather of a given location in Taiwan",
    parameters={
        "type": "object",
        "properties": {"site": {"type": "string", "description": "Location of Taiwan", "enum": ["臺北", "苗栗"]}},
        "required": ["site"]
    }
)

# tools
tools = Tool(function_declarations=[wea_fndec])

# parameters
config = GenerateContentConfig(
    max_output_tokens=1024,
    temperature=1.5,
    system_instruction=YOUR_SYSTEM_INSTRUCTION,
    safety_settings=YOUR_SAFETY_SETTINGS,
    tools=[tools]
)

# prompt
chat = Part.from_text(text=YOUR_CHAT)

# data
contents = [Content(role='user', parts=[chat])]

# call model
start = time()

r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)

# output
print(f'{time()-start:.3f}s elapsed')
if r.function_calls:
    for f in r.function_calls:
        print(f)
else:
    print(r.text)
print(r.usage_metadata)

In [None]:
# Step Two

if r.function_calls:
    # data
    for fn in r.function_calls:
        if fn.name == 'get_taiwan_weather':
            wea_info = wea.tostr(wea.grab(**fn.args))
            fn_response = Part.from_function_response(
                name=fn.name,
                response={'result': wea_info}
            )
            contents.append(Content(role="model", parts=[Part(function_call=fn)]))
            contents.append(Content(role="user", parts=[fn_response]))
    
    # call model
    start = time()
    
    r = genai_client.models.generate_content(model=YOUR_MODEL, contents=contents, config=config)
    
    # output
    print(f'{time()-start:.3f}s elapsed')
    print(r.text)
    print(r.usage_metadata)

## Google Cloud Firestore for Vector

<b>GCP Role</b>
<ul>
    <li type="None">Vertex AI 使用者</li>
    <li type="None">Cloud Datastore 使用者</li>
</ul>

<b>Dependencies</b>
<ul>
    <li type="None">google-genai</li>
    <li type="None">google-cloud-firestore</li>
</ul>

In [None]:
!pip install google-genai google-cloud-firestore

### 2. prepare the data and embedding 

In [None]:
import os

from google import genai
from google.genai.types import EmbedContentConfig

YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_REGION = 'YOUR_REGION'
YOUR_MODEL = 'gemini-embedding-001'  # 維度上限 3072
YOUR_DIM = 768  # Firestore Vector 維度上限 2048
YOUR_EMBEDDING_KEY = 'YOUR_EMBEDDING_KEY'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

In [None]:
data = [
    {'title': '植物辨識',
     'text': '用 YOLO 與 CNN 技術訓練植物辨識 AI 模型，並且整合到 LINE Bot 以及 Android APP',
     'author': ['John Wang', 'Jack Chang'],
     'date': '20230603'},
    {'title': '點名系統',
     'text': '運用 dlib 人臉辨識模組實作點名系統，以 Windows Desktop 呈現',
     'author': ['Jerry Lin', 'John Wang', 'Jim Hsiao'],
     'date': '20230730'},
    {'title': '跳舞機',
     'text': '利用 MediaPipe 搭配 Tkinter 實作跨平台跳舞機，並以邊緣運算裝置進行螢幕輸出',
     'author': ['Jackson Lu', 'James Chao'],
     'date': '20240426'},
    {'title': '理賠計算機',
     'text': '搭配 Gemini 與 LINE Bot 實作保險理賠計算機',
     'author': ['Jenny Wu', 'Jacob Chiu', 'Jim Hsiao'],
     'date': '20250712'},
    {'title': '資安檢測機器人',
     'text': '混和 AI Agent 與 RAG 技術，實作全自動資安檢測機器人',
     'author': ['Joe Li', 'Jim Hsiao'],
     'date': '20251011'}
]

In [None]:
contents = [d['text'] for d in data]
r = genai_client.models.embed_content(
    model=YOUR_MODEL,
    contents=contents,
    config=EmbedContentConfig(taskType='RETRIEVAL_DOCUMENT', outputDimensionality=YOUR_DIM)
)

In [None]:
print(r.embeddings)

In [None]:
docs = [d | {'id': f'{i:03}', YOUR_EMBEDDING_KEY: Vector(e.values)} for i, (d, e) in enumerate(zip(data, r.embeddings))]

### 3. store the data into Firestore

In [None]:
from google.cloud import firestore

YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_DATABASE = 'YOUR_DATABASE'  # None 為預設 DB
YOUR_COLLECTION = 'YOUR_COLLECTION'

firestore_client = firestore.Client.from_service_account_json(YOUR_SERVICE, database=YOUR_DATABASE)
collection = firestore_client.collection(YOUR_COLLECTION)

In [None]:
for d in docs:
    collection.document(d['id']).set(d)

### 5. Vector Search

In [None]:
import os

from google import genai
from google.genai.types import EmbedContentConfig

YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_REGION = 'YOUR_REGION'
YOUR_MODEL = 'gemini-embedding-001'  # 維度上限 3072
YOUR_DIM = 768  # Firestore Vector 維度上限 2048
YOUR_EMBEDDING_KEY = 'YOUR_EMBEDDING_KEY'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
genai_client = genai.Client(vertexai=True, location=YOUR_REGION)

In [None]:
from google.cloud import firestore
from google.cloud.firestore_v1.vector import Vector
from google.cloud.firestore_v1.base_vector_query import DistanceMeasure

YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_DATABASE = 'YOUR_DATABASE'  # None 為預設 DB
YOUR_COLLECTION = 'YOUR_COLLECTION'
YOUR_EMBEDDING_KEY = 'YOUR_EMBEDDING_KEY'

firestore_client = firestore.Client.from_service_account_json(YOUR_SERVICE, database=YOUR_DATABASE)
collection = firestore_client.collection(YOUR_COLLECTION)

In [None]:
YOUR_SEARCH = 'YOUR_SEARCH'

r = genai_client.models.embed_content(
    model=YOUR_MODEL,
    contents=[YOUR_SEARCH],
    config=EmbedContentConfig(taskType='RETRIEVAL_DOCUMENT', outputDimensionality=YOUR_DIM)
)

In [None]:
# 向量搜尋
vector_search = collection.find_nearest(
    vector_field=YOUR_EMBEDDING_KEY,
    query_vector=Vector(r.embeddings[0].values),
    #distance_measure=DistanceMeasure.EUCLIDEAN,  # 愈小愈接近
    #distance_measure=DistanceMeasure.COSINE,  # 愈小愈接近
    distance_measure=DistanceMeasure.DOT_PRODUCT,  # 愈大愈接近
    limit=5,
    distance_result_field='d'  # 保留距離欄位
)

In [None]:
# 混合搜尋
vector_search = collection.where("author", "array_contains", "Jim Hsiao").find_nearest(
    vector_field=YOUR_EMBEDDING_KEY,
    query_vector=Vector(r.embeddings[0].values),
    #distance_measure=DistanceMeasure.EUCLIDEAN,  # 愈小愈接近
    #distance_measure=DistanceMeasure.COSINE,  # 愈小愈接近
    distance_measure=DistanceMeasure.DOT_PRODUCT,  # 愈大愈接近
    limit=5,
    distance_result_field='d'  # 保留距離欄位
)

In [None]:
nearest = [v.to_dict() for v in vector_search.get()]

In [None]:
from pprint import pprint

pprint([(a['text'], a['d']) for a in nearest])

## AI Agent ft. LangGraph + Gemini

<b>GCP 啟用 API</b>
<ul>
    <li type="None">Generative Language API or Gememi API</li>
</ul>

<b>GCP Role</b>
<ul>
    <li type="None">Vertex AI 使用者</li>
</ul>

<b>Dependencies</b>
<ul>
    <li type="None">langgraph</li>
    <li type="None">langchain-google-genai</li>
    <li type="None">geopy</li>
    <li type="None">wea</li>
</ul>

In [None]:
!pip install langgraph langchain-google-genai geopy

In [None]:
import os

from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool
from geopy.geocoders import Nominatim
import wea


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_PROMPT = 'YOUR_PROMPT'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
llm = ChatGoogleGenerativeAI(model=YOUR_MODEL, temperature=0)
geolocator = Nominatim(user_agent='user_agent')

@tool
def get_coordinates(location: str) -> dict[str, float] | str:
    '''輸入地點名稱, 取得地點座標'''
    if loc := geolocator.geocode(location):
        return {'latitude': loc.latitude, 'longitude': loc.longitude}
    return f"找不到 {location}"

@tool
def get_weather_by_location_coordinates(latitude: float, longitude: float) -> str:    
    '''輸入地點座標, 取得天氣資訊'''
    return wea.tostr(wea.grab((latitude, longitude)))


tools = [
    get_coordinates,
    get_weather_by_location_coordinates
]
agent = create_react_agent(model=llm, tools=tools, prompt=YOUR_PROMPT)


def ask(something: str) -> str:
    '''ask AI Agent something
    something - asking
    return    - answer from AI Agent
    '''
    messages = [{"role": "user", "content": something}]
    result = agent.invoke({"messages": messages})
    return result["messages"][-1].content

if __name__ == '__main__':
    something = '擎天崗天氣如何？'
    print(ask(something))

In [None]:
import os

from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool
from geopy.geocoders import Nominatim
import wea


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'
YOUR_PROMPT = 'YOUR_PROMPT'

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
llm = ChatGoogleGenerativeAI(model=YOUR_MODEL, temperature=0)
geolocator = Nominatim(user_agent='user_agent')


@tool
def get_coordinates(location: str) -> dict[str, float] | str:
    '''輸入地點名稱, 取得地點座標'''
    if loc := geolocator.geocode(location):
        return {'latitude': loc.latitude, 'longitude': loc.longitude}
    return f"找不到 {location}"

@tool
def get_weather_by_location_coordinates(latitude: float, longitude: float) -> str:    
    '''輸入地點座標, 取得天氣資訊'''
    return wea.tostr(wea.grab((latitude, longitude)))
    

@tool
def get_weather_by_location_name(name: str) -> str:  
    '''輸入地點名稱, 取得天氣資訊'''
    return wea.tostr(wea.grab(name))

tools = [
    get_coordinates,
    get_weather_by_location_coordinates,
    get_weather_by_location_name
]
agent = create_react_agent(model=llm, tools=tools, prompt=YOUR_PROMPT)


def ask(something: str) -> str:
    '''ask AI Agent something
    something - asking
    return    - answer from AI Agent
    '''
    messages = [{"role": "user", "content": something}]
    result = agent.invoke({"messages": messages})
    return result["messages"][-1].content


if __name__ == '__main__':
    something = '擎天崗天氣如何？'
    print(ask(something))

## Google Cloud Text-to-Speech API ft. Gemini

<b>GCP 啟用 API</b>
<ul>
    <li type="None">Cloud Text-to-Speech API</li>
    <li type="None">Generative Language API or Gememi API</li>
</ul>

<b>GCP Role</b>
<ul>
    <li type="None">Vertex AI 使用者</li>
</ul>

<b>Dependencies</b>
<ul>
    <li type="None">google-cloud-texttospecch</li>
</ul>

In [None]:
!pip install google-cloud-texttospecch

In [None]:
from google.cloud import texttospeech


YOUR_SERVICE = 'YOUR_SERVICE'
YOUR_MODEL = 'YOUR_MODEL'  # ex: gemini-2.5-pro-tts (better), gemini-2.5-flash-tts
YOUR_AUDIO = 'YOUR_AUDIO'
YOUR_INPUT = 'YOUR_INPUT'
YOUR_PROMPT = 'YOUR_PROMPT'
YOUR_VOICE = 'YOUR_VOICE'  # ex: Sulafat


tts_client = texttospeech.TextToSpeechClient.from_service_account_json(
    YOUR_SERVICE
)

synthesis_input = texttospeech.SynthesisInput(text=YOUR_INPUT, prompt=YOUR_PROMPT)

voice = texttospeech.VoiceSelectionParams(
    language_code="cmn-tw",  # cmn-tw: 台灣國語/ ja-JP: 日語/ en-US: 英語/ cmn-cn: 大陸語
    name=YOUR_VOICE,
    model_name=YOUR_MODEL
)

audio_config = texttospeech.AudioConfig(
    audio_encoding=texttospeech.AudioEncoding.MP3  # 指定輸出格式
)

response = tts_client.synthesize_speech(
    input=synthesis_input,
    voice=voice,
    audio_config=audio_config
)

with open(YOUR_AUDIO, 'wb') as f:
    f.write(response.audio_content)

## OpenAI API ft. Azure gpt-4o-mini-tts

<b>Azure AI Foundry 部署</b>
<ul>
    <li type="None">gpt-4o-mini-tts</li>
</ul>

<b>Dependencies</b>
<ul>
    <li type="None">openai</li>
</ul>

In [None]:
!pip install openai

In [None]:
from openai import AzureOpenAI


YOUR_AZURE_ENDPOINT = 'YOUR_AZURE_ENDPOINT'  # ex: https://xxx.openai.azure.com/
YOUR_API_KEY = 'YOUR_API_KEY'
YOUR_API_VERSION = 'YOUR_API_VERSION'  # ex: 2025-03-01-preview
YOUR_MODEL = 'YOUR_MODEL'  # ex: gpt-4o-mini-tts
YOUR_INPUT = 'YOUR_INPUT'  # ex: 今天都在下雨，很煩
YOUR_INSTRUCTIONS = 'YOUR_INSTRUCTIONS'  # ex: 傷心的語調
YOUR_VOICE = 'YOUR_VOICE'  # ex: nova
YOUR_FORMAT = 'YOUR_FORMAT'  # ex: mp3
YOUR_AUDIO = 'YOUR_FILE'  # tts.mp3


model = AzureOpenAI(
    api_key=YOUR_API_KEY,
    azure_endpoint=YOUR_AZURE_ENDPOINT,
    api_version=YOUR_API_VERSION
)

r = model.audio.speech.create(
    model=YOUR_MODEL,
    input=YOUR_INPUT,
    instructions=YOUR_INSTRUCTIONS,
    voice=YOUR_VOICE,
    response_format=YOUR_FORMAT
)

r.write_to_file(YOUR_AUDIO, 'wb')