In [None]:
# ===== 1セル実行でOK（Colab/ローカル両対応） =====
!pip -q install feedparser trafilatura gradio

import feedparser, trafilatura, textwrap, re, hashlib, html
from datetime import datetime as dt, timezone, timedelta
import gradio as gr

# ---- Colab判定 ----
try:
    import google.colab  # type: ignore
    IS_COLAB = True
except Exception:
    IS_COLAB = False

# ---- Utility ----
JST = timezone(timedelta(hours=9))
ELLIPSIS = "…"

DEFAULT_TOPICS = [
    "python","csharp","wpf","unity","dotnet",
    "security","react","nextjs","ai","llm",
    "aws","java","c","docker"
]

def normalize_list_csv(csv_text):
    if not csv_text: return []
    return [s.strip().lower() for s in csv_text.split(",") if s.strip()]

def build_feeds(topics):
    feeds = set()
    if topics:
        for t in topics:
            zt = t.lower()
            feeds.add(f"https://zenn.dev/topics/{zt}/feed")
            tag = t.replace("#", "%23")
            feeds.add(f"https://qiita.com/tags/{tag}/feed")
    return sorted(feeds)

def hit_by_keywords_or_topics(title, body, keywords, topics):
    hay = f"{title} {body}".lower()
    if keywords and any(k in hay for k in keywords):
        return True
    if topics and any(t in hay for t in topics):
        return True
    return False

def item_key(url):
    return hashlib.md5(url.encode("utf-8")).hexdigest()

def shorten(text, width=260):
    text = re.sub(r"\s+", " ", (text or "")).strip()
    return textwrap.shorten(text, width=width, placeholder=ELLIPSIS)

def fetch_and_summarize(selected_topics, keywords_csv, max_per_feed, max_total):
    topics = [t.lower() for t in (selected_topics or [])]
    keywords = normalize_list_csv(keywords_csv)

    if not topics and not keywords:
        return []

    feeds = build_feeds(topics)
    if not feeds:
        return []

    seen = set()
    picked = []
    for url in feeds:
        d = feedparser.parse(url)
        c = 0
        for e in d.entries:
            if c >= max_per_feed: break
            link = getattr(e, "link", "")
            title = html.unescape(getattr(e, "title", "(no title)"))
            if not link or not title: continue

            try:
                raw = trafilatura.fetch_url(link)
                body = trafilatura.extract(raw, favor_recall=True) or getattr(e, "summary", "")
            except Exception:
                body = getattr(e, "summary", "")

            if not hit_by_keywords_or_topics(title, body, keywords, topics):
                continue

            key = item_key(link)
            if key in seen: continue
            seen.add(key)

            picked.append({
                "title": title.strip(),
                "link": link,
                "summary": shorten(body, 260),
                "source": url
            })
            c += 1
            if len(picked) >= max_total: break
        if len(picked) >= max_total: break
    return picked

def to_markdown(items, date_str):
    head = [f"# Zenn / Qiita 技術記事ダイジェスト（{date_str}）", ""]
    def sort_key(x): return (x["source"], x["title"].lower())
    body = []
    for it in sorted(items, key=sort_key):
        body += [f"## {it['title']}", f"[{it['link']}]({it['link']})", "", it["summary"], ""]
    return "\n".join(head + body)

def run_md(selected_topics, keywords_csv, max_per_feed, max_total):
    today = dt.now(JST).strftime("%Y-%m-%d")
    ts = dt.now(JST).strftime("%Y%m%d_%H%M")
    items = fetch_and_summarize(selected_topics, keywords_csv, max_per_feed, max_total)

    if not items:
        if IS_COLAB:
            return "（該当記事なし）", gr.update(value="/content/（保存なし）", visible=True)
        else:
            return "（該当記事なし）", gr.update(visible=False)

    md = to_markdown(items, today)
    md_name = f"技術記事まとめ_{ts}.md"
    with open(md_name, "w", encoding="utf-8") as f:
        f.write(md)

    if IS_COLAB:
        return md, gr.update(value=f"/content/{md_name}", visible=True)
    else:
        return md, gr.update(value=md_name, visible=True)

# ---- Theme & CSS（ライト固定・青紫ボタン＋フォント強調）----
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", neutral_hue="slate")
css = """
.gradio-container { max-width: 880px !important; margin: 0 auto; font-size: 15px; }
button, .gr-button--primary {
  background: linear-gradient(90deg, #6366F1, #8B5CF6) !important;
  color: #fff !important; border: none !important;
  box-shadow: 0 6px 18px rgba(99,102,241,.35) !important;
}
label[for="tags"], label[for="keywords"] {
  font-weight: bold !important;
  font-size: 16px !important;
  color: #111827 !important;
}
.gr-checkbox, .gr-textbox { font-size: 15px !important; }
"""

# ---- UI ----
with gr.Blocks(title="技術記事ダイジェスト（Zenn/Qiita）", theme=theme, css=css) as demo:
    gr.Markdown("### 📰 技術記事ダイジェスト（Zenn / Qiita）\n指定した **タグ or キーワード一致** の記事だけを収集します。")

    topics = gr.CheckboxGroup(
    choices=DEFAULT_TOPICS,
    value=["ai","llm"],   # ← デフォルトをAIとLLMに設定
    label="タグ（複数選択）",
    elem_id="tags"
    )
    keywords = gr.Textbox(label="キーワード（カンマ区切り・任意）", placeholder="例: C#, Unity, セキュリティ", elem_id="keywords")
    with gr.Row():
        max_per_feed = gr.Slider(1, 20, value=8, step=1, label="各フィード上限")
        max_total    = gr.Slider(5, 100, value=40, step=5, label="全体上限")

    # 生成ボタン
    btn_md = gr.Button("▶ 生成（MDのみ）", variant="primary")

    # 生成ボタン直下に出力（DLボタン or パス）
    if IS_COLAB:
        md_path = gr.Textbox(label="MD保存先（Colab）", interactive=False, visible=True)
    else:
        md_dl = gr.DownloadButton(label="⬇ MDをダウンロード", visible=False)

    # プレビュー
    md_preview = gr.Markdown(label="プレビュー")

    if IS_COLAB:
        btn_md.click(run_md, [topics, keywords, max_per_feed, max_total], [md_preview, md_path])
    else:
        btn_md.click(run_md, [topics, keywords, max_per_feed, max_total], [md_preview, md_dl])

demo.launch(share=False)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

