In [None]:
"Davlan/xlm-roberta-base-ner-hrl"

In [11]:
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
from transformers import MT5ForConditionalGeneration, T5Tokenizer
import torch

def load_nlu_models():
    """載入 NER（XLM-RoBERTa）與 Query 分類（mT5）模型"""
    # NER 模型（XLM-RoBERTa，使用新模型 Davlan/xlm-roberta-base-ner-hrl）
    ner_model_name = "Davlan/xlm-roberta-base-ner-hrl"
    ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
    ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name)
    ner_pipeline = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, device=0 if torch.cuda.is_available() else -1)
    
    # Query 類別分類（mT5）
    query_model_name = "google/mt5-small"
    query_tokenizer = T5Tokenizer.from_pretrained(query_model_name, model_max_length=512)
    query_model = MT5ForConditionalGeneration.from_pretrained(query_model_name, device_map="auto")
    
    return ner_pipeline, query_model, query_tokenizer

def extract_entities(query, ner_pipeline):
    """使用 XLM-RoBERTa NER 模型來提取 LOCATION、POI_TYPE、NEED、ADDRESS，並合併子詞"""
    entities = ner_pipeline(query)
    
    # 初始化結果
    result = {"LOCATION": [], "ADDRESS": [], "POI_TYPE": [], "NEED": []}
    label_map = {"LOC": "LOCATION", "ORG": "POI_TYPE", "MISC": "NEED"}
    
    prev_label, prev_word = None, ""
    for entity in entities:
        word = entity['word'].replace("▁", "").strip()  # 去掉特殊符號並去除空格
        label = entity['entity']  # BIO 格式標籤
        main_label = label[2:] if "-" in label else label  # 移除 B- 或 I-，保留類別
        mapped_label = label_map.get(main_label, None)  # 轉換成我們的分類
        
        if not mapped_label or not word:
            continue  # 如果標籤不在我們的分類中或是空字串，跳過
        
        # BIO 轉換，將 B- 和 I- 合併處理
        if label.startswith("B-"):
            if prev_label and prev_word.strip():
                result[prev_label].append(prev_word)  # 存入前一個詞
            prev_word = word
            prev_label = mapped_label  # 存儲對應的標籤
        elif label.startswith("I-") and prev_label == mapped_label:
            prev_word += word  # 合併詞
        else:
            if prev_label and prev_word.strip():
                result[prev_label].append(prev_word)  # 存入前一個詞
            prev_label, prev_word = None, ""
    
    if prev_label and prev_word.strip():
        result[prev_label].append(prev_word)  # 存入最後一個詞
    
    # 如果 LOCATION 是縣市，就移到 ADDRESS
    location_as_address = {"台北市", "新北市", "台中市", "台南市", "高雄市", "桃園市", "宜蘭縣", "新竹市"}
    for loc in result["LOCATION"][:]:
        if loc in location_as_address:
            result["LOCATION"].remove(loc)
            result["ADDRESS"].append(loc)
    
    return result

def classify_query_type(query, query_model, query_tokenizer):
    """使用 mT5 模型來分類 Query 類型（geo_filter / geo_distance）"""
    input_ids = query_tokenizer(query, return_tensors="pt").input_ids.to(query_model.device)
    outputs = query_model.generate(input_ids, max_length=10, num_return_sequences=1, do_sample=False)
    result = query_tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    if result not in ["geo_filter", "geo_distance"]:
        result = "geo_filter"  # 預設為 geo_filter
    
    return {"search_type": [result]}

def nlu_pipeline(query):
    """完整 NLU 流程：NER + Query 類型分類"""
    ner_pipeline, query_model, query_tokenizer = load_nlu_models()
    
    entities = extract_entities(query, ner_pipeline)
    query_type = classify_query_type(query, query_model, query_tokenizer)
    
    return {**entities, **query_type}

# 測試
query = "阿里山有哪些咖啡館？"
result = nlu_pipeline(query)
print(result)


Device set to use cuda:0


{'LOCATION': ['阿里山'], 'ADDRESS': [], 'POI_TYPE': [], 'NEED': [], 'search_type': ['geo_filter']}


#### 效果尚可，還有進步空間

In [58]:
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
from transformers import MT5ForConditionalGeneration, T5Tokenizer
from sentence_transformers import SentenceTransformer
import faiss
import torch
import numpy as np

def load_nlu_models():
    """載入 NER（XLM-RoBERTa）與 Query 分類（mT5）模型"""
    # NER 模型（XLM-RoBERTa，使用新模型 Davlan/xlm-roberta-base-ner-hrl）
    ner_model_name = "Davlan/xlm-roberta-base-ner-hrl"
    ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
    ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name)
    ner_pipeline = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, device=0 if torch.cuda.is_available() else -1)
    
    # Query 類別分類（mT5）
    query_model_name = "google/mt5-small"
    query_tokenizer = T5Tokenizer.from_pretrained(query_model_name, model_max_length=512)
    query_model = MT5ForConditionalGeneration.from_pretrained(query_model_name, device_map="auto")
    
    return ner_pipeline, query_model, query_tokenizer

# 初始化語意向量模型
embedding_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

# 定義 POI 類型關鍵字
POI_KEYWORDS = [
    "餐廳", "中餐館", "亞洲菜餐廳", "火鍋餐廳", "夜市", "甜品店", "冰品飲料店", "美食廣場",
    "咖啡館", "茶葉店", "茶市", "茶批發商", "茶製造商", "酒品專賣店", "釀酒廠", "酒樓", "飯盒供應商",
    "酒店", "賓館", "旅館", "民宿", "渡假村", "長期住宿酒店", "宗教住宿場所",
    "旅遊景點", "觀光牧場", "觀光農場", "自然保護區", "國家公園", "國家森林", "水上樂園", "海濱長廊", "沙灘",
    "湖泊", "河流", "島嶼", "山峰", "遺址博物館", "歷史景點", "文化地標", "紀念公園", "紀念碑",
    "登山纜車", "行山徑", "溫泉", "露營地點", "釣魚池","公園",
    "博物館", "歷史博物館", "科學館", "藝術博物館", "手工藝博物館", "動物學博物館", "海事博物館",
    "音樂廳", "歌劇院", "演藝劇場", "展覽場地", "展覽貿易中心", "藝術中心", "文化中心",
    "百貨公司", "購物中心", "市場", "農產品市場", "海鮮市場", "紀念品商店", "禮品店", "書店", "古董店",
    "寺廟", "天主教教堂", "神社", "道觀", "宗教聖地", "神壇",
    "火車站", "渡輪碼頭", "機票代理公司", "橋樑", "隧道", "鐵路公司", "鐵道服務", "停車場",
    "遊樂場", "主題公園", "摩天輪", "動物園", "野生動物園", "體育館", "漆彈射擊場", "潛水中心", "單車徑"
]

# 預計算 POI 關鍵字向量
poi_embeddings = embedding_model.encode(POI_KEYWORDS, convert_to_numpy=True)
faiss_index = faiss.IndexFlatL2(poi_embeddings.shape[1])
faiss_index.add(poi_embeddings)

def detect_poi_type(query):
    """使用語意向量檢索 POI_TYPE，選擇最符合的類別"""
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    D, I = faiss_index.search(query_embedding, 3)  # 取最相近的 3 個 POI 類型
    
    # 過濾：優先選擇較廣泛的 POI 類型
    preferred_types = {"餐廳", "美食廣場", "夜市", "旅遊景點", "市場"}
    for idx in I[0]:
        if idx >= 0 and POI_KEYWORDS[idx] in preferred_types:
            return [POI_KEYWORDS[idx]]
    
    # 若找不到適合的廣泛類型，則回傳最相似的結果
    return [POI_KEYWORDS[I[0][0]]] if I[0][0] >= 0 else []

def nlu_pipeline(query):
    """完整 NLU 流程：NER + Query 類型分類 + POI_TYPE 語意檢索"""
    ner_pipeline, query_model, query_tokenizer = load_nlu_models()
    
    entities = extract_entities(query, ner_pipeline)
    query_type = classify_query_type(query, query_model, query_tokenizer)
    
    # **使用語意檢索 POI_TYPE**
    if not entities["POI_TYPE"]:  
        entities["POI_TYPE"] = detect_poi_type(query)
    
    return {**entities, **query_type}

# 測試
query = "I'm looking for a takeaway restaurant or snack near Sun Moon Lake"
result = nlu_pipeline(query)
print(result)


Device set to use cuda:0


{'LOCATION': ['SunMoonLake'], 'ADDRESS': [], 'POI_TYPE': ['餐廳'], 'NEED': [], 'search_type': ['unknown']}


####  嘗試訓練

In [41]:
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
from transformers import MT5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from sentence_transformers import SentenceTransformer
import faiss
import torch
import numpy as np
import datasets
import pandas as pd

def load_nlu_models():
    """載入 NER（XLM-RoBERTa）與 Query 分類（Distil-mT5）模型"""
    # NER 模型（XLM-RoBERTa）
    ner_model_name = "Davlan/xlm-roberta-base-ner-hrl"
    ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
    ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name)
    ner_pipeline = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, device=0 if torch.cuda.is_available() else -1)
    
    # Query 類別分類（Distil-mT5）
    query_model_name = "google/mt5-small"
    query_tokenizer = T5Tokenizer.from_pretrained(query_model_name, model_max_length=128)
    query_model = MT5ForConditionalGeneration.from_pretrained(query_model_name, device_map="auto")
    
    return ner_pipeline, query_model, query_tokenizer

# 初始化語意向量模型
embedding_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

def load_training_data(file_path, tokenizer):
    """從 CSV 文件載入訓練數據，並對 Query 進行 Tokenization"""
    df = pd.read_csv(file_path)
    
    # 轉換文字標籤（geo_filter / geo_distance）為 Token 格式
    label_texts = df["label"].tolist()
    tokenized_labels = tokenizer(label_texts, padding=True, truncation=True, max_length=10, return_tensors="pt")
    
    # 對 Query 進行 Tokenization
    tokenized_data = tokenizer(df["query"].tolist(), padding=True, truncation=True, max_length=128, return_tensors="pt")
    
    # 建立 Dataset 格式
    dataset = datasets.Dataset.from_dict({
        "input_ids": tokenized_data["input_ids"].tolist(),
        "attention_mask": tokenized_data["attention_mask"].tolist(),
        "labels": tokenized_labels["input_ids"].tolist()
    })
    
    return dataset.train_test_split(test_size=0.1)

training_file_path = "geo_training_data.csv"
tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
data_splits = load_training_data(training_file_path, tokenizer)
train_data = data_splits["train"]
eval_data = data_splits["test"]

def train_distilled_mt5():
    """訓練 Distil-mT5 進行 `geo_filter` vs `geo_distance` 分類"""
    train_args = TrainingArguments(
        output_dir="./distilled_mt5", per_device_train_batch_size=8, num_train_epochs=5,
        logging_dir="./logs", save_steps=500, evaluation_strategy="epoch",
        learning_rate=5e-5, weight_decay=0.01, save_total_limit=2
    )
    
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=MT5ForConditionalGeneration.from_pretrained("google/mt5-small"))
    trainer = Trainer(
        model=MT5ForConditionalGeneration.from_pretrained("google/mt5-small"),
        args=train_args,
        train_dataset=train_data,
        eval_dataset=eval_data,
        data_collator=data_collator
    )
    
    trainer.train()
    trainer.save_model("./distilled_mt5")
    print("Distilled mT5 訓練完成")

train_distilled_mt5()

# 測試
query = "I'm looking for a takeaway restaurant or snack near Sun Moon Lake"
result = nlu_pipeline(query)
print(result)

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,No log,11.342694
2,No log,5.695917
3,No log,2.318912
4,No log,1.22249
5,10.493400,0.885306


Distilled mT5 訓練完成


Device set to use cuda:0


{'LOCATION': ['SunMoonLake'], 'ADDRESS': [], 'POI_TYPE': ['餐廳'], 'NEED': [], 'search_type': ['geo_filter']}


In [None]:
from transformers import T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
tokenizer.save_pretrained("./distilled_mt5")


In [None]:
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
from transformers import MT5ForConditionalGeneration, T5Tokenizer
import torch

def classify_query_type(query, query_model, query_tokenizer):
    """使用 `distilled_mt5` 進行查詢類別分類"""
    input_ids = query_tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=128).input_ids.to(query_model.device)
    
    
    outputs = query_model.generate(
        input_ids, 
        max_length=20, 
        num_return_sequences=1,
        do_sample=False,   
        num_beams=5        
    )
    
    result = query_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    
    
    if result.startswith("<extra_id_") or result == "":
        result = "unknown"
    
    return {"search_type": [result]}

# 測試 `distilled_mt5`
query = "I'm looking for a takeaway restaurant or snack near Sun Moon Lake"
result = classify_query_type(query, query_model, query_tokenizer)
print(result)


{'search_type': ['unknown']}


In [None]:
from transformers import MT5ForConditionalGeneration, T5Tokenizer
import torch

def load_trained_mt5():
    """載入已訓練的 Distilled mT5"""
    query_model_name = "./distilled_mt5"  
    query_tokenizer = T5Tokenizer.from_pretrained(query_model_name)
    query_model = MT5ForConditionalGeneration.from_pretrained(query_model_name, device_map="auto")
    return query_model, query_tokenizer

def generate_es_query(query_model, query_tokenizer, location, poi_type, need, distance):
    """用 mT5 產生 Elasticsearch Query DSL"""
    prompt = f"""
    請根據以下條件生成 Elasticsearch Query：
    地點：「{location}」
    類別：「{poi_type}」
    需求：「{need}」
    搜尋範圍：「{distance} 內」
    請輸出符合 Elasticsearch DSL 格式的 JSON 查詢語句：
    """
    
    input_ids = query_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).input_ids.to(query_model.device)
    outputs = query_model.generate(input_ids, max_length=256)
    result = query_tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return {"es_query": result}

if __name__ == "__main__":
    #  載入模型
    query_model, query_tokenizer = load_trained_mt5()
    
    #  測試 Elasticsearch Query 生成
    test_query = generate_es_query(query_model, query_tokenizer, "台北101", "餐廳", "景觀", "5km")
    print(test_query)


{'es_query': '<extra_id_0>_distance'}


In [None]:
from transformers import MT5ForConditionalGeneration, T5Tokenizer
import torch

def generate_es_query(query_model, query_tokenizer, location, poi_type, need, distance):
    """用 mT5 產生 Elasticsearch Query DSL"""
    
    # 讓 mT5 學會 JSON 結構
    example_query = """
    請根據以下條件生成 Elasticsearch Query：
    地點：「阿里山」
    類別：「咖啡館」
    需求：「景觀」
    搜尋範圍：「5km 內」
    輸出：
    {
      "query": {
        "bool": {
          "must": [
            { "match": { "gmap_location": "阿里山" } },
            { "match": { "class": "咖啡館" } }
          ],
          "filter": [
            { "geo_distance": { "distance": "5km", "gmap_coordinates": { "lat": 23.508, "lon": 120.802 } } }
          ]
        }
      }
    }
    """

    prompt = f"""
    {example_query}
    現在請根據以下條件生成 Elasticsearch Query：
    地點：「{location}」
    類別：「{poi_type}」
    需求：「{need}」
    搜尋範圍：「{distance} 內」
    輸出：
    """
    
    input_ids = query_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).input_ids.to(query_model.device)
    outputs = query_model.generate(input_ids, max_length=256)
    result = query_tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return {"es_query": result}


if __name__ == "__main__":
    #  載入模型（改用 `google/mt5-small`）
    query_model, query_tokenizer = load_mt5()
    
    #  測試 Elasticsearch Query 生成
    test_query = generate_es_query(query_model, query_tokenizer, "台北101", "餐廳", "景觀", "5km")
    print(test_query)


{'es_query': '<extra_id_0>:'}


### 嘗試Gemini API

In [None]:
pip install google-generativeai


In [None]:
import google.generativeai as genai
import json

# 設定你的 Gemini API Key
genai.configure(api_key="")

def generate_es_query_gemini(query):
    """使用 Gemini 產生符合 Elasticsearch Mapping 的 Query，確保 `filter` 是陣列"""
    schema_info = """
    你的 Elasticsearch 資料結構如下：
    - `gmap_location`: 地點名稱 (如 "台北101")
    - `location_ID`: Google 地點 ID
    - `class`: 地點類型 (如 "餐廳", "咖啡館", "旅遊景點")
    - `address`: 地址
    - `summary_1`: 簡介
    - `tags`: 標籤 (如 "適合兒童", "景觀優美", "無障礙設施")
    - `gmap_coordinates`: 經緯度座標 (lat, lon)

    **請確保生成的 Elasticsearch Query 只包含這些欄位，且 `filter` 必須是 `list` 格式。**
    """

    prompt = f"""
    {schema_info}
    請將以下自然語言查詢轉換為符合 Elasticsearch JSON Query DSL：
    查詢：「{query}」
    生成符合 JSON 格式的 Elasticsearch 查詢語句，**請確保 `filter` 是 `list` 格式**：
    """

    model = genai.GenerativeModel("gemini-1.5-pro")  # 使用 Gemini 1.5 Pro
    response = model.generate_content(prompt)

    return response.text

# 測試
query = "我想找阿里山附近的咖啡廳？"
es_query = generate_es_query_gemini(query)
print("🔍 產生的 Elasticsearch Query:\n", es_query)


🔍 產生的 Elasticsearch Query:
 ```json
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "class": "咖啡廳"
          }
        }
      ],
      "filter": [
        {
          "geo_distance": {
            "distance": "5km",  // 可調整距離
            "gmap_coordinates": {
              "lat": 23.500754,  // 阿里山的緯度，需要實際查詢
              "lon": 120.802632   // 阿里山的經度，需要實際查詢
            }
          }
        }
      ]
    }
  }
}
```

**說明:**

1. **`query` - `bool` - `must`**:  使用 `match` 查詢 `class` 欄位，確保結果包含 "咖啡廳"。
2. **`query` - `bool` - `filter`**: 使用 `geo_distance` 篩選器，根據提供的阿里山經緯度座標，查找附近的結果。`distance` 設定為 "5km"，可以根據需要調整搜索半徑。**注意：阿里山的經緯度需要替換成實際的經緯度值。**  使用 `filter` context 確保效能，因為 `filter` 不計算分數。
3. **`filter` 為 list 格式**: 符合題目要求，即使只有一個 filter 也使用 list 包裹。


**使用方法:**

1. 將阿里山的經緯度替換成正確的值。
2. 將 JSON 查詢語句複製到 Elasticsearch 的搜尋 API 中執行。
3. 調整 `distance` 參數以控制搜索半徑。


**額外說明:**

* 如果需要更精確的「附近」定義，可以考慮使用更小的 `distance` 值。
* 可以根據需求添加其他 `must` 或 `filter` 條件，例如根據 `tags` 

In [None]:
import google.generativeai as genai
import json
import numpy as np
from elasticsearch import Elasticsearch

# 設定Gemini API Key
genai.configure(api_key="")

# 設定全域變數，統一使用 Gemini 模型
GEMINI_MODEL = genai.GenerativeModel("gemini-1.5-pro")

# 連接 Elasticsearch
es = Elasticsearch("http://localhost:9200")

INDEX_NAME = "gmap_location"  

def find_location_in_database(query):
    """查詢地點是否存在於 Elasticsearch，返回經緯度或 None"""
    search_body = {
        "query": {
            "match_phrase": {"gmap_location": query}
        }
    }
    response = es.search(index=INDEX_NAME, body=search_body)
    
    if response["hits"]["total"]["value"] > 0:
        location_data = response["hits"]["hits"][0]["_source"]
        return {
            "gmap_location": location_data["gmap_location"],
            "gmap_coordinates": location_data["gmap_coordinates"]
        }
    return "None"

def generate_es_query_gemini(query):
    """綜合流程，產生 Elasticsearch Query，並執行查詢"""
    location_data = find_location_in_database(query)
    
    if location_data == "None":
        raise ValueError("無法找到地點，請檢查查詢內容！")
    
    center_point = location_data.get("gmap_coordinates", None)
    if center_point is None:
        raise ValueError("回傳的地點缺少 `gmap_coordinates`，檢查 API 回應")
    
    search_type = "geo_distance" 
    
    schema_info = """
    Elasticsearch Index 結構如下：
    - `gmap_location`: 地點名稱 (如 "台北101")
    - `class`: 地點類型 (如 "餐廳", "咖啡館", "旅遊景點")
    - `gmap_coordinates`: 經緯度座標 (lat, lon)
    **請確保生成的 Elasticsearch Query 只包含這些欄位，並符合 JSON 格式。**
    """
    
    prompt = f"""
    {schema_info}
    請根據以下條件生成 Elasticsearch Query：
    - 地點：「{query}」
    - 查詢類型：「{search_type}」
    - 參考經緯度：「{center_point}」
    - 搜尋範圍：「1km」
    **請輸出完整的 JSON 查詢語句，不要額外解釋。**
    """
    response = GEMINI_MODEL.generate_content(prompt)
    es_query = response.text.strip()
    
    # 執行 Elasticsearch 查詢
    try:
        es_response = es.search(index=INDEX_NAME, body=json.loads(es_query))
        return es_response["hits"]["hits"]
    except json.JSONDecodeError:
        raise ValueError(f"JSON 解析錯誤，Gemini 回傳內容無法解析:\n{es_query}")

if __name__ == "__main__":
    query = "阿里山有哪些咖啡館？"
    try:
        results = generate_es_query_gemini(query)
        print("Elasticsearch 查詢結果:")
        for result in results:
            print(result["_source"])
    except ValueError as e:
        print(e)


In [None]:
import google.generativeai as genai
import json
import time
import random

# Gemini API Key
genai.configure(api_key="")

# 設定全域變數，使用 Gemini 模型
GEMINI_MODEL = genai.GenerativeModel("gemini-1.5-pro")


def parse_query_with_gemini(query, chat):
    """讓 Gemini 解析 Query 並輸出結構化 JSON"""
    prompt = f"""    
    做為NLP解析器，以及台灣的旅遊專家，解析以下使用者查詢，並輸出標籤與內容：
    
    **標籤**：
    - `gmap_location`: 確切地點名稱
    - `address`: 地址
    - `class`: google map上的地點類型
    - `opening_hours`: 營業時間
    - `entrance_fee`: 門票、免費等
    - `tags`: google map的地點標籤，如"適合兒童"、"無障礙停車場"、"洗手間"、Wi-Fi"
    - `geo_distance`: 地理距離搜尋的半徑，如"1公里"、"附近"
    - `semantic_keywords`: 語意關鍵詞(用於語意檢索)、其他不屬於上述資料結構的詞，或是任何可能在google map評論出現的詞，如"安靜的"、"海邊看夕陽"
      
     
    
    以下是一些解析範例：

    query：「南港適合小孩子的博物館，有無障礙停車場」 
    輸出：
        "address": "南港區",        
        "class": "博物館",
        "tags": ["無障礙停車場","適合兒童"]
    
    query：「推薦國父紀念館5公里內的咖啡店，週二下午有開的」
    輸出：
        "gmap_location": "國父紀念館",        
        "class": "咖啡館",
        "geo_distance": "5km",
        "opening_hours":["週二","下午"]
    
    query：「大安和信義區有哪些免費的美術館，要有現代設計的展覽內容」
    輸出：
        "address": "大安區","信義區"        
        "class": "藝術博物館","現代藝術博物館"
        "entrance_fee": "free",
        "semantic_keywords":["現代","設計"]

    query：「推薦一間台東靠海邊安靜的餐廳」
    輸出：
        "address": "台東縣","台東市"        
        "class": "餐廳",        
        "semantic_keywords":["安靜","靠海","海邊"]
    
    **台灣所有的縣市都要解析為address，如台中=台中市、新竹="新竹縣"、"新竹市"**
    **"gmap_location"和"address"不同時出現在同一輸出，沒有明確地點才選擇"address"
    **class要對應google map上的地點類型**
    **如果有對距離的描述，如"附近"、"周邊"請使用"geo_distance": "5km"**
    *輸入外文要轉換成中文，除了"semantic_keywords"保留原文*

    請將以下查詢解析
    「{query}」

    ```
    """
    # response = GEMINI_MODEL.generate_content(prompt)
    response = chat.send_message(prompt)
    
   # 判斷是否成功回傳
    if response.parts:
        try:
            parsed_query = json.loads(response.text.strip())
            return parsed_query
        except json.JSONDecodeError:
            raise ValueError(f"\n{response.text.strip()}")
    else:
        raise ValueError(f"回傳失敗")

if __name__ == "__main__":
    
    # 建立新的對話
    chat = GEMINI_MODEL.start_chat()

    # 測試不同的查詢語句
    queries = [        
        "I would like find the nearest museum of Shilin Night Market"
    ]
    
    for query in queries:
        print(f"\nQuery: {query}")
        try:
            result = parse_query_with_gemini(query, chat)
            print("解析結果:")
            print(json.dumps(result, ensure_ascii=False, indent=2))
        except ValueError as e:
            print(e)
        time.sleep(1.5)



Query: I would like find the nearest museum of Shilin Night Market

```json
{
  "gmap_location": "士林夜市",
  "class": "博物館",
  "geo_distance": "附近"
}
```


Query: 台北市有哪些免費的博物館？
{"address": "台北市","class": "博物館","entrance_fee": "免費"}

Query: 哪裡有適合親子的公園？
{"class": "公園","tags": "適合兒童"}

Query: 我想找陽明山的咖啡館
{"location": "陽明山","type":"gmap_location","class": "咖啡館"}

Query: 我想找信義區 1 公里內的夜市
{"location": "信義區","type": "address","class": "夜市","geo_distance": 1km"}

Query: 推薦一個安靜的餐廳
{"class": "餐廳","semantic_keywords":['安靜']}

