In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 開始使用 Vertex AI Gemini API & Python SDK

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/doggy8088/generative-ai/blob/main/search/search_data_blending_with_gemini_summarization.zh.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory 標誌"><br> 在 Colab 中執行
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2Fdoggy8088%2Fgenerative-ai%2Fmain%2Fsearch%2Fsearch_data_blending_with_gemini_summarization.zh.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise 標誌"><br> 在 Colab Enterprise 中執行
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://github.com/doggy8088/generative-ai/blob/main/search/search_data_blending_with_gemini_summarization.zh.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub 標誌"><br> 在 GitHub 上檢視
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/doggy8088/generative-ai/blob/main/search/search_data_blending_with_gemini_summarization.zh.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI 標誌"><br> 在 Vertex AI Workbench 中開啟
    </a>
  </td>
</table>


| | |
|-|-|
|作者 | [Shantam Gupta](https://github.com/ShantamGupta)


## 概觀

### 搜尋

Vertex AI Search 結合深度資訊檢索、最先進的自然語言處理和大型語言處理的最新技術，以了解使用者意圖並為使用者傳回最相關的結果。

使用 Vertex AI Search，你可以建立用於搜尋和提供建議的應用程式。Vertex AI Search 對於某些產業，例如媒體、醫療保健和零售，也具備了特殊功能。



### Gemini

Gemini 是由 Google DeepMind 開發的生成式 AI 模型家族，專為多模態使用案例而設計。Gemini API 讓你能夠存取 Gemini Pro 和 Gemini Pro Vision 模型。

### Vertex AI Gemini API

Vertex AI Gemini API 提供統一介面，供與 Gemini 模型互動。Gemini API 中提供了兩個 Gemini 1.0 Pro 模型：

- **Gemini 1.0 Pro 模型** (`gemini-1.0-pro`)：旨在處理自然語言任務、多輪次文字和程式碼聊天及程式碼產生。
.

你可以使用下列方法與 Gemini API 互動：

- 使用 [Vertex AI Studio](https://cloud.google.com/generative-ai-studio) 進行快速測試和指令產生
- 使用 cURL 指令
- 使用 Vertex AI SDK

有關更多資訊，請參閱 [Vertex AI 上的生成式 AI](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) 文件。

本教學說明如何使用混合資料儲存空間呼叫搜尋應用程式、取得搜尋摘要並使用 Gemini Pro 摘要回應。


### 使用混合資料庫建立搜尋應用程式


1. 請依照下列步驟建立搜尋 App
   - https://cloud.google.com/generative-ai-app-builder/docs/create-engine-es
2. 建立相關資料儲存區 (GCS、BQ、網站) 
   - https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es
3. 將資料儲存區連結到搜尋 App
   - https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores


### 安裝相關套件


In [None]:
%pip install --upgrade --user -q google-cloud-aiplatform google-cloud-discoveryengine

### 重新啟動目前的執行階段

要在此 Jupyter 執行階段中使用新安裝的套件，你必須重新啟動執行階段。你可以執行下列Cell來執行此項操作，如此將重新啟動目前的Kernel。


In [None]:
# Restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### 驗證你的筆記本環境 (僅限 Colab) 

如果你在 Google Colab 上執行此筆記本，請執行下列單元格來驗證你的環境。如果你使用 [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench)，則不需要這個步驟。


In [None]:
import sys

# Additional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

### 定義 Google Cloud 專案資訊


In [None]:
PROJECT_ID = "PROJECT_ID"  # @param {type:"string"}
SEARCH_APP_LOCATION = "global"  # @param {type:"string"}
SEARCH_ENGINE_ID = "VERTEX_SEARCH_ENGINE_ID"  # @param {type:"string"}
LOCATION_GEMINI_MODEL = "NORTHAMERICA-NORTHEAST1"  # @param {type:"string"}

### 初始化 Vertex AI SDK


In [None]:
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION_GEMINI_MODEL)

### 引入相關套件


In [None]:
from vertexai.generative_models import (
    GenerativeModel,
    GenerationConfig,
    HarmCategory,
    HarmBlockThreshold,
)
import requests
import subprocess
import re

### 使用資料混合 (混合 Datastore) 向 Vertex AI Search App 發送要求

- https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores


In [None]:
# Obtain the access token
access_token = (
    subprocess.check_output(["gcloud", "auth", "print-access-token"])
    .decode("utf-8")
    .strip()
)

# Construct the API endpoint URL
url = (
    "https://discoveryengine.googleapis.com/v1beta/projects/"
    + PROJECT_ID
    + "/locations/"
    + SEARCH_APP_LOCATION
    + "/collections/default_collection/engines/"
    + SEARCH_ENGINE_ID
    + "/servingConfigs/default_search:search"
)

# Headers for the request
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json",
}

# Data payload for the POST request
data = {
    "servingConfig": "projects/"
    + PROJECT_ID
    + "/locations/"
    + SEARCH_APP_LOCATION
    + "/collections/default_collection/engines/"
    + SEARCH_ENGINE_ID
    + "/servingConfigs/default_search",
    "query": "How many 10-ks are there in the datastore ",  # <- insert your search prompt/query
    "pageSize": "10",
}

# Send the POST request
response = requests.post(url, headers=headers, json=data)

# Check for successful response
if response.status_code != 200:
    print(f"Request failed with status code: {response.status_code}")
    exit()

output = response.text
print(output)

# Store response is in a variable called 'response_data'
response_data = response.json()

### 從搜尋結果擷取程式碼片段


In [None]:
snippets_list = []

for idx, result in enumerate(response_data["results"]):
    document = result["document"]
    if "derivedStructData" in document:
        print(f"\n--- Snippets from Document {idx+1} ---")
        for snippet_item in document["derivedStructData"].get("snippets", []):
            print(snippet_item["snippet"])
            snippets_list.append(snippet_item["snippet"])

# clean up snippets
for item in snippets_list:
    clean_text = re.sub("<[^>]*>", "", item)
    print(clean_text)
    print("\n")

### 將搜尋結果片段提供給 Gemini Pro 模型，並根據你的原始提示制定摘要/回應


#### 模型參數

你對模型所使用的每一個提示，都包含用於控制模型如何產生回應之參數值。

模型可以針對不同參數值產生不同的結果。

你可以透過實驗不同的模型參數，觀察結果如何產生變化。


In [None]:
generation_config = GenerationConfig(
    temperature=0.9,
    top_p=1.0,
    max_output_tokens=2048,
)

In [None]:
# Prompt for Gemini Pro model
PROMPT_GEMINI = (
    str(response_data)
    + " organize the json results based on the question :"
    + data["query"]
)

model = GenerativeModel("gemini-1.0-pro")  # specify the gemini model version


def generate(PROMPT_GEMINI: str) -> str:
    """
    Given the prompt
    output the summarized response to user's orginal query
    """
    responses = model.generate_content(
        PROMPT_GEMINI,
        generation_config=generation_config,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
        },
        stream=True,
    )

    for response in responses:
        print(response.text, end="")


generate(PROMPT_GEMINI)