# LLMs
- https://python.langchain.com/docs/introduction/

In [None]:
# llm模型設定
# https://build.nvidia.com/deepseek-ai/deepseek-r1
# nvapi-xxx
import getpass
import os
if not os.environ.get("NVIDIA_API_KEY"):
  os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter API key for NVIDIA: ")

## 模型來源
# https://build.nvidia.com/deepseek-ai
# https://build.nvidia.com/meta
# https://build.nvidia.com/google
# https://build.nvidia.com/qwen

# OPENAI

In [None]:
!uv pip install -qU "openai"

In [None]:
##  Using General OpenAI Client
from openai import OpenAI

# client = OpenAI()  ## Assumes OPENAI_API_KEY is set
client = OpenAI(
    base_url = "https://integrate.api.nvidia.com/v1",
    api_key = os.environ.get("NVIDIA_API_KEY", "")
)

completion = client.chat.completions.create(
    model="meta/llama-4-maverick-17b-128e-instruct",
    # model="gpt-4-turbo-2024-04-09",
    messages=[
        {"role":"system","content":"Translate the following from English into Italian"},
        {"role":"user","content":"Hi"}
    ],
    temperature=1,
    top_p=1,
    max_tokens=1024,
    stream=True,
)

## Streaming with Generator: Results come out as they're generated
for chunk in completion:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")

In [None]:
## Non-Streaming: Results come from server when they're all ready
##  Using General OpenAI Client
from openai import OpenAI

# client = OpenAI()  ## Assumes OPENAI_API_KEY is set
client = OpenAI(
    base_url = "https://integrate.api.nvidia.com/v1",
    api_key = os.environ.get("NVIDIA_API_KEY", "")
)
completion = client.chat.completions.create(
    model="meta/llama-4-maverick-17b-128e-instruct",
    # model="gpt-4-turbo-2024-04-09",
    messages=[
        {"role":"system","content":"Translate the following from English into Italian"},
        {"role":"user","content":"Hi"}
    ],
    temperature=1,
    top_p=1,
    max_tokens=1024,
    stream=False,
)

#completion
if completion.choices and completion.choices[0].message:
    print(completion.choices[0].message.content)
else:
    print("No response content.")

# Langchain

In [None]:
!uv pip install -qU "langchain-nvidia-ai-endpoints"

In [None]:
from langchain.chat_models import init_chat_model
llm = init_chat_model("meta/llama-4-maverick-17b-128e-instruct", model_provider="nvidia")

In [None]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage("Translate the following from English into Italian"),
    HumanMessage("hi!"),
]

response=llm.invoke(messages)
print(response)
print("---------------")
print(response.content)

## 作業
**作業：**

1.  **模型選擇：**
    * 選擇兩個不同的 LLM 模型來完成以下任務。
    * 請勿使用 notebook 中已使用的 `meta/llama-4-maverick-17b-128e-instruct` 模型。

2.  **聊天任務：**
    * 設計一個模擬客戶服務諮詢的多輪對話，目標是解決客戶關於產品退貨的問題。
    * 對話至少包含 5 輪，並包含以下元素：
        * 客戶提出退貨原因
        * 客服詢問詳細資訊 (例如：訂單編號、產品狀況)
        * 客服說明退貨流程與注意事項
        * 客戶追問相關問題
        * 客服總結並提供協助

3.  **模型比較與分析：**
    * 使用選擇的兩個模型分別執行上述多輪對話。
    * 比較兩個模型的輸出，並分析其在以下方面的差異：
        * **準確性：** 是否正確理解客戶問題並提供正確資訊？
        * **流暢度：** 語言是否自然、符合客服人員的說話方式？
        * **相關性：** 回答是否切題、符合對話上下文？
        * **效率：** 回應速度如何？
    * 使用表格或簡短報告總結比較結果，並說明哪個模型更適合用於客戶服務情境，以及原因。

**評分標準：**

* 模型選擇與設定 (20%)
* 多輪對話設計 (30%)
* 模型輸出與比較 (30%)
* 分析與結論 (20%)