In [None]:
import os
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv())

PPLX_API_KEY = os.getenv("PPLX_API_KEY")

from llama_index.llms.perplexity import Perplexity

llm = Perplexity(api_key=PPLX_API_KEY, model="sonar-pro", temperature=0.0)

In [None]:
Perplexity?

In [None]:
# Import the ChatMessage class from the llama_index library.
from llama_index.core.llms import ChatMessage

# Create a list of dictionaries where each dictionary represents a chat message.
# Each dictionary contains a 'role' key (e.g., system or user) and a 'content' key with the corresponding message.
messages_dict = [
    {"role": "system", "content": "使用繁體中文回復使用者"},
    {
        "role": "user",
        "content": "可以告訴我特斯拉的股價嗎?",
    },
]

# Convert each dictionary in the list to a ChatMessage object using unpacking (**msg) in a list comprehension.
messages = [ChatMessage(**msg) for msg in messages_dict]

# Print the list of ChatMessage objects to verify the conversion.
print(messages)

In [None]:
response = llm.chat(messages)
print(f"response type: {type(response)}")
print(response)

In [None]:
def get_response_message(response):
    rv = {}

    messages = response.message
    rv['role'] = messages.role
    num_blocks = len(response.message.blocks)
    blocks = []
    for block_idx in range(num_blocks):
        block = messages.blocks[block_idx]
        blocks.append({
            'block_type': block.block_type,
            'text': block.text
        })
    rv['blocks'] = blocks
    return rv

def get_response_raw(response):
    rv = {}

    raw = response.raw  # dictionary
    rv['model'] = raw['model']
    rv['num_urls'] = len(raw['citations'])
    rv['search_results'] = raw['search_results']  # 'title', 'url', 'date', last_updated, snippet
    rv['urls'] = raw['citations']
    return rv
    

In [None]:
get_response_message(response)

In [None]:
rv = get_response_message(response)
rv['blocks'][0]['text'].replace('\n', '').split('。')

In [None]:
get_response_raw(response)

# unstructured fact check

In [None]:
system_prompt = (
    "You are a professional fact-checker with extensive research capabilities. "
    "Your task is to evaluate claims or articles for factual accuracy. "
    "Focus on identifying false, misleading, or unsubstantiated claims."
)

In [None]:
system_prompt

In [None]:
# 目前還是假新聞，五年後不知道
text = (
    "特斯拉已經漲到1700美元，手握 280 股的投資人財富自由啦！"
)

In [None]:
user_prompt = f"Fact check the following text and identify any false or misleading claims:\n\n{text}"

In [None]:
user_prompt

In [None]:
messages_dict = [
    {"role": "system", "content": system_prompt},
    {
        "role": "user",
        "content": f"Fact check the following text and identify any false or misleading claims:\n\n{text}",
    },
]

# Convert each dictionary in the list to a ChatMessage object using unpacking (**msg) in a list comprehension.
messages = [ChatMessage(**msg) for msg in messages_dict]

# Print the list of ChatMessage objects to verify the conversion.
print(messages)

In [None]:
response = llm.chat(messages)
print(f"response type: {type(response)}")
print(response)

In [None]:
get_response_raw(response)

# # structured fact check

In [None]:
from pydantic import BaseModel, Field
from typing import Dict, List, Optional, Any

class Claim(BaseModel):
    """Model for representing a single claim and its fact check."""
    claim: str = Field(description="The specific claim extracted from the text")
    rating: str = Field(description="Rating of the claim: TRUE, FALSE, MISLEADING, or UNVERIFIABLE")
    explanation: str = Field(description="Detailed explanation with supporting evidence")
    sources: List[str] = Field(description="List of sources used to verify the claim")


class FactCheckResult(BaseModel):
    """Model for the complete fact check result."""
    overall_rating: str = Field(description="Overall rating: MOSTLY_TRUE, MIXED, or MOSTLY_FALSE")
    summary: str = Field(description="Brief summary of the overall findings")
    claims: List[Claim] = Field(description="List of specific claims and their fact checks")

In [None]:
llm = Perplexity(
    api_key=PPLX_API_KEY,
    model="sonar-pro",
    temperature=0.0,
    additional_kwargs={
        "response_format": {
            "type": "json_schema",
            "json_schema": {"schema": FactCheckResult.model_json_schema()},
        }
    },
)

In [None]:
messages_dict = [
    {"role": "system", "content": "你是專業事實查核助手，只能輸出 JSON。"},
    {
        "role": "user",
        "content": "請檢查以下句子：地球是平的",
    },
]

# Convert each dictionary in the list to a ChatMessage object using unpacking (**msg) in a list comprehension.
messages = [ChatMessage(**msg) for msg in messages_dict]

# Print the list of ChatMessage objects to verify the conversion.
print(messages)

In [None]:
response = llm.chat(
    messages=messages,
)

print(response)

In [None]:
get_response_message(response)

In [None]:
get_response_raw(response)

In [None]:
messages_dict = [
    {"role": "system", "content": system_prompt},
    {
        "role": "user",
        "content": f"Fact check the following text and identify any false or misleading claims:\n\n{text}",
    },
]

# Convert each dictionary in the list to a ChatMessage object using unpacking (**msg) in a list comprehension.
messages = [ChatMessage(**msg) for msg in messages_dict]

# Print the list of ChatMessage objects to verify the conversion.
print(messages)

In [None]:
response = llm.chat(messages)
response

In [None]:
from pprint import pprint
pprint(response.message.blocks[0].text)