<a href="https://colab.research.google.com/github/chenkaiwen111811/PL-Repo/blob/main/HW4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip -q install gspread gspread_dataframe google-auth google-auth-oauthlib google-auth-httplib2 \
               gradio pandas beautifulsoup4 google-generativeai python-dateutil \
               jieba scikit-learn

In [2]:
import os, time, uuid, re, json, datetime
from datetime import datetime as dt, timedelta
from dateutil.tz import gettz
import pandas as pd
import gradio as gr
import requests
from bs4 import BeautifulSoup
from collections import Counter, defaultdict # <-- 確保 Counter 和 defaultdict 都已匯入

import google.generativeai as genai

# Google Auth & Sheets
from google.colab import auth
import gspread
from gspread_dataframe import set_with_dataframe, get_as_dataframe
from google.auth.transport.requests import Request
from google.oauth2 import service_account
from google.auth import default

In [3]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

from google.colab import userdata

# 從 Colab Secrets 中獲取 API 金鑰
api_key = userdata.get('gemini')
model = None # 初始化

if api_key:
    genai.configure(api_key=api_key)
    # 立即初始化 model，供後續 AI Plan 使用
    try:
        model = genai.GenerativeModel('gemini-2.5-pro')
        print("Gemini 'gemini-2.5-pro' 初始化成功。")
    except Exception as e:
        print(f"⚠️ Gemini 'gemini-2.5-pro' 初始化失敗: {e}。嘗試 'gemini-1.5-flash'...")
        try:
            model = genai.GenerativeModel('gemini-1.5-flash')
            print("Gemini 'gemini-1.5-flash' 初始化成功。")
        except Exception as e2:
            print(f"⚠️ Gemini 'gemini-1.5-flash' 初始化失敗: {e2}。AI Plan 功能將無法使用。")
else:
    print("⚠️ 未在 Colab Secrets 中找到 'gemini' API 金鑰。AI Plan 功能將無法使用。")

Gemini 'gemini-2.5-pro' 初始化成功。


In [4]:
SHEET_URL = "https://docs.google.com/spreadsheets/d/1jR3qRQr2ZvWYKNuv8wen_-eTZWdc5a-LLvH7iymn2zw/edit?usp=sharing"
WORKSHEET_NAME = "工作表4" # 這是您的試算表檔案名稱
TIMEZONE = "Asia/Taipei"

PTT_HEADER = [
    "post_id","title","url","date","author","nrec","created_at",
    "fetched_at","content"
]
# 【已恢復】使用 TF-IDF 版本的完整 Header
TERMS_HEADER = ["term","freq","df_count","tfidf_mean","examples"]

def ensure_spreadsheet(name):
    try:
        sh = gc.open(name)
    except gspread.SpreadsheetNotFound:
        sh = gc.create(name)
    return sh

sh = ensure_spreadsheet(WORKSHEET_NAME)

def ensure_worksheet(sh, title, header):
    try:
        ws = sh.worksheet(title)
    except gspread.WorksheetNotFound:
        ws = sh.add_worksheet(title=title, rows="1000", cols=str(len(header)+5))
        ws.update([header])
    # 若沒有表頭或表頭不符就重建
    data = ws.get_all_values()
    if not data or (data and data[0] != header):
        print(f"工作表 '{title}' 表頭不符或為空，正在重建...")
        ws.clear()
        ws.update([header])
    return ws

In [5]:
# ==============
# PTT 八卦版爬蟲
# ==============
PTT_BOARD_INDEX = "https://www.ptt.cc/bbs/Gossiping/index.html" # <-- 已更改網址
PTT_COOKIES = {"over18": "1"}

def tznow(): # 提早定義
    return dt.now(gettz(TIMEZONE))

def _get_soup(url):
    r = requests.get(url, timeout=15, headers={"User-Agent":"Mozilla/5.0"}, cookies=PTT_COOKIES)
    r.raise_for_status()
    return BeautifulSoup(r.text, "html.parser")

def _get_prev_index_url(soup):
    btns = soup.select("div.btn-group-paging a.btn.wide")
    for a in btns:
        if "上頁" in a.get_text(strip=True):
            href = a.get("href")
            if href:
                return "https://www.ptt.cc" + href
    return None

def _parse_nrec(nrec_span):
    if not nrec_span: return 0
    txt = nrec_span.get_text(strip=True)
    if txt == "爆": return 100
    if txt.startswith("X"):
        try: return -int(txt[1:])
        except: return -10
    try: return int(txt)
    except: return 0

def _extract_post_list(soup):
    posts = []
    for r in soup.select("div.r-ent"):
        a = r.select_one("div.title a")
        if not a: continue
        title = a.get_text(strip=True)
        url = "https://www.ptt.cc" + a.get("href")
        author = r.select_one("div.author").get_text(strip=True)
        date = r.select_one("div.date").get_text(strip=True)
        nrec = _parse_nrec(r.select_one("div.nrec span"))
        posts.append({
            "title": title, "url": url, "author": author, "date": date, "nrec": nrec
        })
    return posts

def _clean_ptt_content(soup):
    for p in soup.select("div.push"): p.decompose()
    main = soup.select_one("#main-content")
    if not main: return "", ""
    metas = main.select("div.article-metaline, div.article-metaline-right")
    for m in metas: m.decompose()
    text = main.get_text("\n", strip=True)
    if "--" in text:
        text = text.split("--")[0].strip()
    title_tag = soup.select_one("span.article-meta-value")
    meta_title = title_tag.get_text(strip=True) if title_tag else ""
    return text, meta_title

# 更改工作表名稱
ws_ptt_posts = ensure_worksheet(sh, "ptt_gossiping_posts", PTT_HEADER)
ws_ptt_terms = ensure_worksheet(sh, "ptt_gossiping_terms", TERMS_HEADER)

# 更改函式名稱
def crawl_ptt_board(index_pages=3, min_push=0, keyword=""):
    """從最新 index.html 往前翻 index_pages 頁，抓滿足條件的文章"""
    global ptt_posts_df
    url = PTT_BOARD_INDEX
    all_rows = []
    seen_urls = set(ptt_posts_df["url"].tolist()) if not ptt_posts_df.empty else set()

    # 確保 Gradio 輸入是數字
    try: index_pages = int(index_pages)
    except: index_pages = 1
    try: min_push = int(min_push)
    except: min_push = 0

    for i in range(index_pages):
        print(f"正在爬取第 {i+1}/{index_pages} 頁...")
        try:
            soup = _get_soup(url)
        except Exception as e:
            print(f"抓取索引頁失敗: {e}")
            break

        posts = _extract_post_list(soup)
        for p in posts:
            if p["nrec"] < min_push: continue
            if keyword and (keyword not in p["title"]): continue
            if p["url"] in seen_urls: continue

            try:
                art_soup = _get_soup(p["url"])
                content, meta_title = _clean_ptt_content(art_soup)
            except Exception as e:
                print(f"抓取內文失敗 ({p['url']}): {e}")
                content, meta_title = "", ""

            final_title = p["title"] if p["title"] else (meta_title or "（無標題）")

            all_rows.append({
                "post_id": str(uuid.uuid4())[:8],
                "title": final_title[:200],
                "url": p["url"], "date": p["date"], "author": p["author"],
                "nrec": str(p["nrec"]),
                "created_at": tznow().isoformat(),
                "fetched_at": tznow().isoformat(),
                "content": content
            })
            seen_urls.add(p["url"])

        prev = _get_prev_index_url(soup)
        if not prev: break
        url = prev
        time.sleep(0.2) # 禮貌

    if all_rows:
        new_df = pd.DataFrame(all_rows, columns=PTT_HEADER)
        ptt_posts_df = pd.concat([ptt_posts_df, new_df], ignore_index=True)
        write_df(ws_ptt_posts, ptt_posts_df, PTT_HEADER)
        return f"✅ 取得 {len(all_rows)} 篇新文章（已寫入 Sheet）", ptt_posts_df
    else:
        return "ℹ️ 沒有新文章符合條件（或內容已在 Sheet）", ptt_posts_df

In [6]:
# ==============
# 文本分析（jieba + TF/IDF + bigram + Gemini 摘要）
# ==============
import re

# 【停用詞列表】
STOP_WORDS = set([
    # --- 網址/圖片/英數 ---
    "www", "com", "http", "https", "jpg", "png", "gif", "imgur", "jpeg",
    "Ptt", "ptt", "CC", "cc", "ip", "ios", "apk", "net",

    # --- PTT 八卦版常用詞 (領域停用詞) ---
    "新聞", "標題", "作者", "時間", "看板", "八卦", "問卦", "有沒有", "是不是",
    "爆", "Fw", "Re", "Live", "R", "G", "B", "https://", "http",

    # --- 中文常用停用詞 ---
    "的", "是", "在", "了", "我", "你", "他", "她", "它", "們", "人", "也",
    "這個", "那個", "一個", "中", "上", "下", "啦", "吧", "嗎", "啊", "喔",
    "表示", "覺得", "認為", "知道", "可能", "真的", "怎麼", "什麼", "所以",
    "就是", "今天", "明天", "昨天", "到底", "剛剛", "如果", "這樣", "那樣",
    "一堆", "一堆人", "一堆", "為何", "到底"
])


try:
    import jieba
except:
    jieba = None

# 【強化版斷詞器】
def _tokenize_zh(text):
    text = re.sub(r"[a-zA-Z0-9\.@\-_/:?=&%#]+", " ", text)
    text = re.sub(r"[^\u4e00-\u9fff]+", " ", text)

    if not jieba:
        tokens = [t for t in text.split() if len(t) > 1]
    else:
        tokens = [w.strip() for w in jieba.lcut(text) if len(w.strip()) > 1]

    filtered_tokens = []
    for t in tokens:
        if t not in STOP_WORDS:
            filtered_tokens.append(t)

    return filtered_tokens


# 【已升級】加入 Gemini AI 摘要的 analyze_ptt_texts 函式
def analyze_ptt_texts(topk=5, min_df=2):
    global ptt_posts_df, terms_df, model # <-- 取得全域的 Gemini model
    if ptt_posts_df.empty:
        return "📭 尚無已抓取的文章，請先在『Crawler』分頁取得文章。", pd.DataFrame(columns=TERMS_HEADER), ""

    docs = []
    for _, r in ptt_posts_df.iterrows():
        docs.append((r["title"] or "") + "\n" + (r["content"] or ""))

    # --- 1. 詞頻 (freq) 和 文件頻率 (df_cnt) 統計 ---
    freq = Counter()
    df_cnt = defaultdict(int)
    token_docs = []

    print(f"正在分析 {len(docs)} 篇文章...")
    for doc in docs:
        toks = _tokenize_zh(doc) # <-- 使用強化版斷詞
        token_docs.append(toks)
        freq.update(toks)
        for t in set(toks):
            df_cnt[t] += 1

    # --- 2. TF-IDF 分析 ---
    try:
        from sklearn.feature_extraction.text import TfidfVectorizer
        try: min_df = int(min_df)
        except: min_df = 2

        vec = TfidfVectorizer(tokenizer=_tokenize_zh, lowercase=False, min_df=min_df)
        X = vec.fit_transform(docs)
        terms = vec.get_feature_names_out()
        tfidf_mean = X.mean(axis=0).A1
        tfidf_map = dict(zip(terms, tfidf_mean))
    except Exception as e:
        print(f"TF-IDF 失敗: {e}")
        if 'empty vocabulary' in str(e):
             return "⚠️ TF-IDF 分析失敗: 所有詞彙似乎都被停用詞過濾掉了。", terms_df, ""
        return f"⚠️ TF-IDF 分析失敗: {e}", terms_df, ""

    # --- 3. Bigram (雙詞) 統計 ---
    from itertools import tee
    def pairwise(iterable):
        a, b = tee(iterable)
        next(b, None)
        return zip(a, b)
    bigram_freq = Counter()
    for toks in token_docs:
        bigram_freq.update([" ".join(bg) for bg in pairwise(toks)])

    # --- 4. 排序：依 TF-IDF 優先，詞頻次之 ---
    candidates = list(freq.keys())
    candidates.sort(key=lambda t: (round(tfidf_map.get(t, 0.0), 6), freq[t]), reverse=True)

    try: topk = int(topk)
    except: topk = 5

    top_terms = candidates[:topk]

    # --- 5. 擷取範例句 ---
    examples = {}
    for term in top_terms:
        ex = ""
        for doc in docs:
            if term in doc:
                i = doc.find(term)
                s = max(0, i-15)
                e = min(len(doc), i+len(term)+15)
                ex = doc[s:e].replace("\n"," ")
                break
        examples[term] = ex

    # --- 6. 準備回寫 Sheet ---
    rows = []
    for t in top_terms:
        rows.append({
            "term": t,
            "freq": str(freq[t]),
            "df_count": str(df_cnt[t]),
            "tfidf_mean": f"{tfidf_map.get(t,0.0):.6f}",
            "examples": examples.get(t, "")
        })

    terms_df = pd.DataFrame(rows, columns=TERMS_HEADER)
    write_df(ws_ptt_terms, terms_df, TERMS_HEADER)

    # --- 7. 【新增】呼叫 Gemini 產生洞察 ---
    gemini_summary = ""
    if model and top_terms: # 確保模型存在且有關鍵詞
        try:
            print("正在呼叫 Gemini 產生洞察摘要...")

            # 準備 Prompt 內容
            prompt_keywords = ", ".join(top_terms)
            prompt_bigrams = ", ".join([f"{bg} ({c}次)" for bg, c in bigram_freq.most_common(20)])

            system_prompt = (
                "你是一位專業的 PTT 八卦版（Gossiping）輿情分析師。"
                "使用者剛剛爬取了 PTT 八卦版並進行了文本分析。"
                "請你根據以下提供的「Top K 關鍵詞」和「常見雙詞搭配」結果，"
                "用**繁體中文**產出：\n"
                "1. 五句條列式的「洞察摘要」。\n"
                "2. 一段約 120 字的「總結」。\n\n"
                "請不要提及「根據你提供的資料」，要聽起來像是你自己的分析。\n"
                "你的目標是總結目前 PTT 上的熱議焦點是什麼。\n"
                "---"
            )

            user_content = (
                f"分析來源：PTT 八卦版\n"
                f"Top {len(top_terms)} 關鍵詞 (依 TF-IDF): {prompt_keywords}\n\n"
                f"Top 20 常見雙詞搭配: {prompt_bigrams}\n\n"
                "---"
                "請開始你的分析 (五句洞察 + 120 字總結)："
            )

            # 使用在第 3 段初始化的 model
            resp = model.generate_content([system_prompt, user_content])

            gemini_summary = f"### 🤖 Gemini AI 輿情洞察\n\n{resp.text}\n\n---\n"
            print("Gemini 洞察產出成功。")

        except Exception as e:
            print(f"⚠️ 呼叫 Gemini 失敗: {e}")
            gemini_summary = f"### 🤖 Gemini AI 輿情洞察\n\n(呼叫失敗: {e})\n\n---\n"
    else:
        if not model:
            gemini_summary = "### 🤖 Gemini AI 輿情洞察\n\n(Gemini 模型未成功初始化，略過分析)\n\n---\n"
        else:
            gemini_summary = "### 🤖 Gemini AI 輿情洞察\n\n(沒有找到關鍵詞，略過分析)\n\n---\n"

    # --- 8. 產生 Markdown 摘要 ---
    md_lines = []
    md_lines.append(gemini_summary) # <-- 將 Gemini 摘要加到最前面

    md_lines.append(f"### 關鍵詞 Top {len(top_terms)} (依 TF-IDF 優先，詞頻次之)")
    if not top_terms:
        md_lines.append("(沒有找到符合條件的關鍵詞)")
    for i, t in enumerate(top_terms, 1):
        md_lines.append(f"{i}. **{t}** — tfidf≈{float(tfidf_map.get(t,0.0)):.4f}；freq={freq[t]}；df={df_cnt[t]}")

    md_lines.append("\n### 常見雙詞搭配 (前 20)")
    if not bigram_freq.most_common(20): # 檢查是否有內容
         md_lines.append("(沒有找到雙詞搭配)")
    for i, (bg, c) in enumerate(bigram_freq.most_common(20), 1):
        md_lines.append(f"{i}. {bg} — {c}")

    return f"✅ 已完成 TF-IDF 與 Gemini 分析 (已過濾停用詞)，共 {len(docs)} 篇文章。", terms_df, "\n".join(md_lines)

In [7]:
TASKS_HEADER = [
    "id","task","status","priority","est_min","start_time","end_time",
    "actual_min","pomodoros","due_date","labels","notes",
    "created_at","updated_at","completed_at","planned_for"
]
LOGS_HEADER = [
    "log_id","task_id","phase","start_ts","end_ts","minutes","cycles","note"
]
# CLIPS 相關已移除

ws_tasks = ensure_worksheet(sh, "tasks", TASKS_HEADER)
ws_logs  = ensure_worksheet(sh, "pomodoro_logs", LOGS_HEADER)
# ws_clips 已移除

def read_df(ws, header):
    try:
        df = get_as_dataframe(ws, evaluate_formulas=True, header=0)
    except Exception as e:
        print(f"讀取 Sheet '{ws.title}' 失敗: {e}")
        return pd.DataFrame(columns=header)

    if df is None or df.empty:
        return pd.DataFrame(columns=header)
    df = df.fillna("")

    # 確保欄位齊全
    for c in header:
        if c not in df.columns:
            df[c] = ""

    # 型別微調
    if "est_min" in df.columns:
        df["est_min"] = pd.to_numeric(df["est_min"], errors="coerce").fillna(0).astype(int)
    if "actual_min" in df.columns:
        df["actual_min"] = pd.to_numeric(df["actual_min"], errors="coerce").fillna(0).astype(int)
    if "pomodoros" in df.columns:
        df["pomodoros"] = pd.to_numeric(df["pomodoros"], errors="coerce").fillna(0).astype(int)

    return df[header]

def write_df(ws, df, header):
    if df.empty:
        ws.clear()
        ws.update([header])
        return

    df_out = df.copy()

    # 確保只寫入 header 中存在的欄位
    cols_to_write = [c for c in header if c in df_out.columns]
    df_to_write = df_out[cols_to_write]

    # 轉字串
    for c in df_to_write.columns:
        df_to_write[c] = df_to_write[c].astype(str)

    ws.clear()
    ws.update([cols_to_write] + df_to_write.values.tolist())

# refresh_all 已更新
def refresh_all():
    print("正在從 Google Sheet 重新整理所有資料...")
    tasks = read_df(ws_tasks, TASKS_HEADER).copy()
    logs = read_df(ws_logs, LOGS_HEADER).copy()
    ptt_posts = read_df(ws_ptt_posts, PTT_HEADER).copy()
    terms = read_df(ws_ptt_terms, TERMS_HEADER).copy()
    print("重新整理完成。")
    return (tasks, logs, ptt_posts, terms)

In [8]:
def add_task(task, priority, est_min, due_date, labels, notes, planned_for):
    global tasks_df
    _now = tznow().isoformat()
    new = pd.DataFrame([{
        "id": str(uuid.uuid4())[:8], "task": task.strip(), "status": "todo",
        "priority": priority or "M",
        "est_min": int(est_min) if est_min else 25,
        "start_time": "", "end_time": "", "actual_min": 0, "pomodoros": 0,
        "due_date": due_date or "", "labels": labels or "", "notes": notes or "",
        "created_at": _now, "updated_at": _now, "completed_at": "",
        "planned_for": planned_for or ""
    }])
    tasks_df = pd.concat([tasks_df, new], ignore_index=True)
    write_df(ws_tasks, tasks_df, TASKS_HEADER)
    # 【修改】回傳更新後的選項列表
    return "✅ 已新增任務", tasks_df, list_task_choices()

def update_task_status(task_id, new_status):
    global tasks_df
    idx = tasks_df.index[tasks_df["id"] == task_id]
    if len(idx)==0:
        return "⚠️ 找不到任務", tasks_df, list_task_choices()
    i = idx[0]
    tasks_df.loc[i, "status"] = new_status
    tasks_df.loc[i, "updated_at"] = tznow().isoformat()
    if new_status == "done" and not tasks_df.loc[i, "completed_at"]:
        tasks_df.loc[i, "completed_at"] = tznow().isoformat()
    write_df(ws_tasks, tasks_df, TASKS_HEADER)
    # 【修改】回傳更新後的選項列表
    return "✅ 狀態已更新", tasks_df, list_task_choices()

def mark_done(task_id):
    return update_task_status(task_id, "done")

def recalc_task_actuals(task_id):
    global tasks_df, logs_df
    work_logs = logs_df[(logs_df["task_id"]==task_id) & (logs_df["phase"]=="work")]
    total_min = work_logs["minutes"].astype(float).sum() if not work_logs.empty else 0
    pomos = int(round(total_min / 25.0))
    idx = tasks_df.index[tasks_df["id"]==task_id]
    if len(idx)==0: return
    i = idx[0]
    tasks_df.loc[i,"actual_min"] = int(total_min)
    tasks_df.loc[i,"pomodoros"] = pomos
    tasks_df.loc[i,"updated_at"] = tznow().isoformat()

def list_task_choices():
    global tasks_df
    if tasks_df.empty:
        return []

    # 優先顯示未完成的
    todo = tasks_df[tasks_df['status'] != 'done'].sort_values("created_at", ascending=False)
    done = tasks_df[tasks_df['status'] == 'done'].sort_values("created_at", ascending=False)
    sorted_df = pd.concat([todo, done], ignore_index=True)

    def row_label(r):
        return f"[{r['status']}] (P:{r['priority']}) {r['task']} — {r['id']}"
    return [(row_label(r), r["id"]) for _, r in sorted_df.iterrows()]

In [9]:
_active_sessions = {}

def start_phase(task_id, phase, cycles):
    if not task_id: return "⚠️ 請先選擇任務"
    _active_sessions[task_id] = {
        "phase": phase,
        "start_ts": tznow().isoformat(),
        "cycles": int(cycles) if cycles else 1
    }
    return f"▶️ 已開始：{phase}（task: {task_id}）"

def end_phase(task_id, note):
    global logs_df, tasks_df
    if task_id not in _active_sessions:
        return "⚠️ 尚未開始任何階段"
    sess = _active_sessions.pop(task_id)
    start = pd.to_datetime(sess["start_ts"])
    end = tznow()
    minutes = round((end - start).total_seconds() / 60.0, 2)
    log = pd.DataFrame([{
        "log_id": str(uuid.uuid4())[:8], "task_id": task_id,
        "phase": sess["phase"], "start_ts": start.isoformat(),
        "end_ts": end.isoformat(), "minutes": minutes,
        "cycles": int(sess["cycles"]), "note": note or ""
    }])
    logs_df = pd.concat([logs_df, log], ignore_index=True)
    write_df(ws_logs, logs_df, LOGS_HEADER)

    if sess["phase"] == "work":
        recalc_task_actuals(task_id)
        write_df(ws_tasks, tasks_df, TASKS_HEADER)

    return f"⏹️ 已結束：{sess['phase']}，紀錄 {minutes} 分鐘"

In [10]:
def generate_today_plan():
    global tasks_df, model # <-- 使用全域 model

    today = tznow().date().isoformat()
    cand = tasks_df[
        ((tasks_df["due_date"]==today) | (tasks_df["planned_for"].str.lower()=="today")) &
        (tasks_df["status"]!="done")
    ].copy()
    if cand.empty:
        return "📭 今天沒有標記的任務。請在 Tasks 分頁把任務的 due_date 設為今天或 planned_for 設為 today。"

    pr_order = {"H":0, "M":1, "L":2}
    cand["p_ord"] = cand["priority"].map(pr_order).fillna(3)
    cand = cand.sort_values(["p_ord","est_min"], ascending=[True, True])

    plan_md = ""

    # 【修改】使用全域 model 物件
    if model:
        try:
            sys_prompt = (
                "你是一位任務規劃助理。請把輸入的任務（含估時與優先級）排成三段：morning、afternoon、evening，"
                "並給出每段的重點、順序、每項的時間預估與備註。總時數請大致符合任務估時總和。"
                "回傳以 Markdown 條列，格式：\n"
                "### Morning\n- [任務ID] 任務名稱（預估 xx 分）— 備註\n..."
                "### Afternoon\n...\n### Evening\n...\n"
            )
            items = []
            for _, r in cand.iterrows():
                items.append({
                    "id": r["id"], "task": r["task"], "est_min": int(r["est_min"]),
                    "priority": r["priority"]
                })
            user_content = json.dumps({"today": today, "tasks": items}, ensure_ascii=False)

            resp = model.generate_content(sys_prompt + "\n\n" + user_content)
            plan_md = resp.text
        except Exception as e:
            plan_md = f"⚠️ Gemini 失敗：{e}\n\n改用規則式規劃。"
    else:
        plan_md = "🔧 未設定 Gemini API 金鑰或模型初始化失敗，使用規則式規劃。\n\n"

    # 規則式（後備）
    buckets = {"morning": [], "afternoon": [], "evening": []}
    for i, (_, r) in enumerate(cand.iterrows()):
        if i % 3 == 0: buckets["morning"].append(r)
        elif i % 3 == 1: buckets["afternoon"].append(r)
        else: buckets["evening"].append(r)

    def sec_md(name, rows):
        if not rows: return f"### {name.title()}\n（無）\n"
        lines = [f"### {name.title()}"]
        for r in rows:
            lines.append(f"- [{r['id']}] {r['task']}（預估 {int(r['est_min'])} 分，P:{r['priority']}）")
        return "\n".join(lines) + "\n"

    rule_md = sec_md("morning", buckets["morning"]) + "\n" + \
              sec_md("afternoon", buckets["afternoon"]) + "\n" + \
              sec_md("evening", buckets["evening"])

    if "Gemini 失敗" in plan_md or "未設定" in plan_md:
        return (plan_md + "\n---\n" + rule_md).strip()
    else:
        return plan_md

def today_summary():
    global tasks_df
    today = tznow().date().isoformat()
    planned = tasks_df[
        ((tasks_df["due_date"]==today) | (tasks_df["planned_for"].str.lower()=="today"))
    ]
    done = planned[planned["status"]=="done"]
    total = len(planned)
    done_n = len(done)
    rate = (done_n/total*100) if total>0 else 0
    return f"📅 今日計畫任務：{total}；✅ 完成：{done_n}；📈 完成率：{rate:.1f}%"

In [11]:
def generate_today_plan():
    global tasks_df, model # <-- 使用全域 model

    today = tznow().date().isoformat()
    cand = tasks_df[
        ((tasks_df["due_date"]==today) | (tasks_df["planned_for"].str.lower()=="today")) &
        (tasks_df["status"]!="done")
    ].copy()
    if cand.empty:
        return "📭 今天沒有標記的任務。請在 Tasks 分頁把任務的 due_date 設為今天或 planned_for 設為 today。"

    pr_order = {"H":0, "M":1, "L":2}
    cand["p_ord"] = cand["priority"].map(pr_order).fillna(3)
    cand = cand.sort_values(["p_ord","est_min"], ascending=[True, True])

    plan_md = ""

    # 【修改】使用全域 model 物件
    if model:
        try:
            sys_prompt = (
                "你是一位任務規劃助理。請把輸入的任務（含估時與優先級）排成三段：morning、afternoon、evening，"
                "並給出每段的重點、順序、每項的時間預估與備註。總時數請大致符合任務估時總和。"
                "回傳以 Markdown 條列，格式：\n"
                "### Morning\n- [任務ID] 任務名稱（預估 xx 分）— 備註\n..."
                "### Afternoon\n...\n### Evening\n...\n"
            )
            items = []
            for _, r in cand.iterrows():
                items.append({
                    "id": r["id"], "task": r["task"], "est_min": int(r["est_min"]),
                    "priority": r["priority"]
                })
            user_content = json.dumps({"today": today, "tasks": items}, ensure_ascii=False)

            resp = model.generate_content(sys_prompt + "\n\n" + user_content)
            plan_md = resp.text
        except Exception as e:
            plan_md = f"⚠️ Gemini 失敗：{e}\n\n改用規則式規劃。"
    else:
        plan_md = "🔧 未設定 Gemini API 金鑰或模型初始化失敗，使用規則式規劃。\n\n"

    # 規則式（後備）
    buckets = {"morning": [], "afternoon": [], "evening": []}
    for i, (_, r) in enumerate(cand.iterrows()):
        if i % 3 == 0: buckets["morning"].append(r)
        elif i % 3 == 1: buckets["afternoon"].append(r)
        else: buckets["evening"].append(r)

    def sec_md(name, rows):
        if not rows: return f"### {name.title()}\n（無）\n"
        lines = [f"### {name.title()}"]
        for r in rows:
            lines.append(f"- [{r['id']}] {r['task']}（預估 {int(r['est_min'])} 分，P:{r['priority']}）")
        return "\n".join(lines) + "\n"

    rule_md = sec_md("morning", buckets["morning"]) + "\n" + \
              sec_md("afternoon", buckets["afternoon"]) + "\n" + \
              sec_md("evening", buckets["evening"])

    if "Gemini 失敗" in plan_md or "未設定" in plan_md:
        return (plan_md + "\n---\n" + rule_md).strip()
    else:
        return plan_md

def today_summary():
    global tasks_df
    today = tznow().date().isoformat()
    planned = tasks_df[
        ((tasks_df["due_date"]==today) | (tasks_df["planned_for"].str.lower()=="today"))
    ]
    done = planned[planned["status"]=="done"]
    total = len(planned)
    done_n = len(done)
    rate = (done_n/total*100) if total>0 else 0
    return f"📅 今日計畫任務：{total}；✅ 完成：{done_n}；📈 完成率：{rate:.1f}%"

In [12]:
# 讀取所有資料
tasks_df, logs_df, ptt_posts_df, terms_df = refresh_all()
print("初始資料載入完畢。")

正在從 Google Sheet 重新整理所有資料...
重新整理完成。
初始資料載入完畢。


In [13]:
def _refresh():
    """Gradio 專用的重整函式"""
    global tasks_df, logs_df, ptt_posts_df, terms_df
    tasks_df, logs_df, ptt_posts_df, terms_df = refresh_all()
    # 返回所有需要更新的元件
    return (
        tasks_df, logs_df, list_task_choices(), today_summary(),
        ptt_posts_df, terms_df, list_task_choices()
    )

with gr.Blocks(title="待辦清單＋番茄鐘＋AI 計畫") as demo:
    gr.Markdown("# ✅ 待辦清單與番茄鐘（Google Sheet＋Gradio＋PTT分析＋AI 計計畫）")
    with gr.Row():
        btn_refresh = gr.Button("🔄 重新整理（從 Sheet 載入所有資料）")
        out_summary = gr.Markdown(today_summary())

    with gr.Tab("Tasks"):
        with gr.Row():
            with gr.Column(scale=2):
                task = gr.Textbox(label="任務名稱", placeholder="寫 HW3 報告 / 修正 SQL / …")
                priority = gr.Dropdown(["H","M","L"], value="M", label="優先級")
                est_min = gr.Number(value=25, label="預估時間（分鐘）", precision=0)
                due_date = gr.Textbox(label="到期日（YYYY-MM-DD，可空白）")
                labels = gr.Textbox(label="標籤（逗號分隔，可空白）")
                notes = gr.Textbox(label="備註（可空白）")
                planned_for = gr.Dropdown(["","today","tomorrow"], value="", label="規劃歸屬")
                btn_add = gr.Button("➕ 新增任務")
                msg_add = gr.Markdown()
            with gr.Column(scale=3):
                # 【修正】移除 height 參數
                grid_tasks = gr.Dataframe(value=tasks_df, label="任務清單（直接從 Sheet 來）", interactive=False)

        with gr.Row():
            task_choice = gr.Dropdown(choices=list_task_choices(), label="選取任務（用於更新）")
            new_status = gr.Dropdown(["todo","in-progress","done"], value="in-progress", label="更新狀態")
            btn_update = gr.Button("✏️ 更新狀態")
            btn_done = gr.Button("✅ 直接標記完成")
            msg_update = gr.Markdown()

    with gr.Tab("Pomodoro"):
        with gr.Row():
            sel_task = gr.Dropdown(choices=list_task_choices(), label="選擇任務")
            cycles = gr.Number(value=1, precision=0, label="番茄數（僅作紀錄）")
        with gr.Row():
            btn_start_work = gr.Button("▶️ 開始工作")
            note_work = gr.Textbox(label="工作備註（可空白）")
            btn_end_work = gr.Button("⏹️ 結束工作並記錄")
        with gr.Row():
            btn_start_break = gr.Button("🍵 開始休息")
            note_break = gr.Textbox(label="休息備註（可空白）")
            btn_end_break = gr.Button("⏹️ 結束休息並記錄")
        msg_pomo = gr.Markdown()
        # 【修正】移除 height 參數
        grid_logs = gr.Dataframe(value=logs_df, label="番茄鐘紀錄", interactive=False)

    with gr.Tab("AI Plan"):
        gr.Markdown("把**今天的任務**排成 **morning / afternoon / evening** 三段行動計畫。若未設 GEMINI_API_KEY，會用規則式。")
        btn_plan = gr.Button("🧠 產生今日計畫")
        out_plan = gr.Markdown()

    with gr.Tab("Crawler"):
        gr.Markdown("# PTT 八卦版分析器")
        gr.Markdown("從 PTT 八卦版爬取最新文章，並進行關鍵詞分析。")

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 1. 爬取文章")
                ptt_pages = gr.Number(value=3, label="爬取頁數", precision=0)
                ptt_push = gr.Number(value=20, label="最少推文數", precision=0)
                ptt_keyword = gr.Textbox(label="標題關鍵詞 (可空白)")
                btn_crawl_ptt = gr.Button("🕷️ 開始爬取 PTT")
                msg_ptt_crawl = gr.Markdown()

            with gr.Column(scale=1):
                gr.Markdown("### 2. 分析文本")
                gr.Markdown("（會自動分析下方「爬蟲原始文章」列表中的所有內容）")
                # 【修改】預設值改為 5
                ptt_topk = gr.Number(value=5, label="Top K 關鍵詞", precision=0)
                ptt_mindf = gr.Number(value=2, label="最小文件頻率 (min_df)", precision=0)
                btn_analyze_ptt = gr.Button("📊 開始分析文本")
                msg_ptt_analyze = gr.Markdown()

        gr.Markdown("---")
        gr.Markdown("### 分析摘要 (Top K 與 Bigrams)")
        out_ptt_summary = gr.Markdown(" (點擊「開始分析文本」後顯示)")

        gr.Markdown("### 關鍵詞統計表 (Terms)")
        # 【修正】移除 height 參數
        grid_ptt_terms = gr.Dataframe(value=terms_df, label="關鍵詞統計", interactive=False)

        gr.Markdown("### 爬蟲原始文章 (Posts)")
        # 【修正】移除 height 參數
        grid_ptt_posts = gr.Dataframe(value=ptt_posts_df, label="爬取文章列表", interactive=False)

    with gr.Tab("Summary"):
        btn_summary = gr.Button("📊 重新計算今日完成率")
        out_summary2 = gr.Markdown()

    # === 綁定動作 (已修正縮排) ===

    btn_refresh.click(
        _refresh,
        outputs=[
            grid_tasks, grid_logs, task_choice, out_summary,
            grid_ptt_posts, grid_ptt_terms, sel_task
        ]
    )

    btn_add.click(
        add_task,
        inputs=[task, priority, est_min, due_date, labels, notes, planned_for],
        outputs=[msg_add, grid_tasks, task_choice, sel_task]
    )

    btn_update.click(
        update_task_status,
        inputs=[task_choice, new_status],
        outputs=[msg_update, grid_tasks, task_choice, sel_task]
    )

    btn_done.click(
        mark_done,
        inputs=[task_choice],
        outputs=[msg_update, grid_tasks, task_choice, sel_task]
    )

    btn_start_work.click(
        start_phase, inputs=[sel_task, gr.State("work"), cycles], outputs=[msg_pomo]
    )
    btn_end_work.click(
        end_phase, inputs=[sel_task, note_work], outputs=[msg_pomo]
    )
    btn_start_break.click(
        start_phase, inputs=[sel_task, gr.State("break"), cycles], outputs=[msg_pomo]
    )
    btn_end_break.click(
        end_phase, inputs=[sel_task, note_break], outputs=[msg_pomo]
    )

    btn_plan.click(generate_today_plan, outputs=[out_plan])

    btn_crawl_ptt.click(
        crawl_ptt_board,
        inputs=[ptt_pages, ptt_push, ptt_keyword],
        outputs=[msg_ptt_crawl, grid_ptt_posts]
    )

    btn_analyze_ptt.click(
        analyze_ptt_texts,
        inputs=[ptt_topk, ptt_mindf],
        outputs=[msg_ptt_analyze, grid_ptt_terms, out_ptt_summary]
    )

    btn_summary.click(today_summary, outputs=[out_summary2])

In [None]:
demo.launch(debug=True)

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://10f33ab9b84858dfca.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


正在爬取第 1/3 頁...
正在爬取第 2/3 頁...
正在爬取第 3/3 頁...


Building prefix dict from the default dictionary ...
DEBUG:jieba:Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
DEBUG:jieba:Loading model from cache /tmp/jieba.cache


正在分析 6 篇文章...


Loading model cost 1.084 seconds.
DEBUG:jieba:Loading model cost 1.084 seconds.
Prefix dict has been built successfully.
DEBUG:jieba:Prefix dict has been built successfully.


正在呼叫 Gemini 產生洞察摘要...
Gemini 洞察產出成功。
