In [2]:
import os
import time
import psycopg2
from psycopg2.extras import execute_values
import openai
from datetime import datetime, timezone
import json

# 1. 設定環境 / 建立連線
OPENAI_API_KEY =  "OpenAIAPIKEY"
openai.api_key = OPENAI_API_KEY


DB_CONFIG = {
    'dbname': 'booklyn',
    'user': 'postgres',
    'password': 'postgres',
    'host': 'localhost',
    'port': 5433  # 你的 Docker 是 5433
}

conn = psycopg2.connect(**DB_CONFIG)
conn.autocommit = True
cur = conn.cursor()

# 2. 取出所有還沒被分析過的 review
cur.execute("""
    SELECT r.review_id,
           r.content     AS review_text
      FROM reviews r
 LEFT JOIN review_ai ra ON ra.review_id = r.review_id
     WHERE ra.review_id IS NULL
""")
pending = cur.fetchall()  # List of (review_id, review_text)

# 3. 定義呼叫 OpenAI 的函式
def analyze_with_openai(text: str) -> dict:
    prompt = f"""
1. Distinguish whether the following user review is generated by AI, and return a score between 0 and 1 indicating the "AI generation probability".
2. Also, summarize the main point of this review in one or two sentences.
return with a JSON object containing only the following:
- "credibility_score": a float between 0 and 1
- "summary_ai": a string summarizing the review
""" + text

    resp = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
    )
    content = resp.choices[0].message.content.strip()
    print(content)

    # 假設回傳內容長這樣（JSON 格式）：
    # {
    #   "credibility_score": 0.23,
    #   "summary_ai": "這段評論主要在抱怨送餐速度太慢。",
    #   "model_version": "gpt-4o-mini"
    # }
    # 如果 API 回的是純文字，就要自己解析。這裡示範簡單用 eval (生產上請用 json.loads)
    print("OpenAI 回應內容:", content)
    # find first {
    try:
        start = content.index('{')
        content = content[start:]
    except ValueError:
        print("No JSON object found in response:", content)
        raise
    # find last }
    try:
        end = content.rindex('}') + 1
        content = content[:end]
    except ValueError:
        print("No closing brace found in response:", content)
        raise
    try:
        result = json.loads(content)
    except json.JSONDecodeError:
        print("Failed to parse JSON response:", content)
        raise
    result["model_version"] = resp.model
    result["analyzed_at"] = datetime.now(timezone.utc).isoformat()  # ISO 格式的時間戳

    return result

# 4. 逐筆呼叫、並批次寫入
batch = []
print(f"共有 {len(pending)} 條評論需要分析")
cnt = 0
for review_id, review_text in pending:
    try:
        res = analyze_with_openai(review_text)
        print(f"分析結果: {res}")   
        batch.append((
            review_id,
            res["credibility_score"],
            res["summary_ai"],
            res["model_version"],
            res["analyzed_at"],
        ))
        cnt += 1
        print(f"已處理 {cnt} 條評論")
    except Exception as e:
        print(f"Error processing {review_id}: {e}")
        continue
    # 為了避免 API rate limit，可以稍微 sleep
    time.sleep(0.5)

# 5. 把結果插入 REVIEW_AI
sql = """
INSERT INTO review_ai
  (review_id, credibility_score, summary_ai, model_version, analyzed_at)
VALUES %s
ON CONFLICT (review_id) DO UPDATE
  SET credibility_score = EXCLUDED.credibility_score,
      summary_ai      = EXCLUDED.summary_ai,
      model_version   = EXCLUDED.model_version,
      analyzed_at     = EXCLUDED.analyzed_at
"""
execute_values(cur, sql, batch)

cur.close()
conn.close()
print("所有評論分析完畢並已存入 REVIEW_AI")


{
  "credibility_score": 0.2,
  "summary_ai": "The reviewer bought a book as a gift for Secret Santa, which was well-received despite their inability to read German, and noted that it arrived on time and in great condition."
}
OpenAI 回應內容: {
  "credibility_score": 0.2,
  "summary_ai": "The reviewer bought a book as a gift for Secret Santa, which was well-received despite their inability to read German, and noted that it arrived on time and in great condition."
}
分析結果: {'credibility_score': 0.2, 'summary_ai': 'The reviewer bought a book as a gift for Secret Santa, which was well-received despite their inability to read German, and noted that it arrived on time and in great condition.', 'model_version': 'gpt-4o-mini-2024-07-18', 'analyzed_at': '2025-05-30T06:00:05.182673+00:00'}
{
  "credibility_score": 0.2,
  "summary_ai": "The reviewer enjoys the book for its engaging story and finds it suitable for German language learners, noting that they can read it with high fluency while occasion

KeyboardInterrupt: 

In [None]:
# 5. 把結果插入 REVIEW_AI
sql = """
INSERT INTO review_ai
  (review_id, credibility_score, summary_ai, model_version, analyzed_at)
VALUES %s
ON CONFLICT (review_id) DO UPDATE
  SET credibility_score = EXCLUDED.credibility_score,
      summary_ai      = EXCLUDED.summary_ai,
      model_version   = EXCLUDED.model_version,
      analyzed_at     = EXCLUDED.analyzed_at
"""
execute_values(cur, sql, batch)

cur.close()
conn.close()
print("所有評論分析完畢並已存入 REVIEW_AI")


所有評論分析完畢並已存入 REVIEW_AI


1730
共有 623910 條評論需要分析
