In [None]:
!pip install newsdataapi newspaper3k lxml_html_clean

In [None]:
from newsdataapi import NewsDataApiClient
from newspaper import Article
import json
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

# 1️⃣ ตั้งค่า API
api = NewsDataApiClient(apikey="pub_70b63fee186742658db1dfb76d4a52c7")

# 2️⃣ กำหนดคีย์เวิร์ด
keywords = ["Nvidia", "AMD", "Intel"]

# 3️⃣ เตรียม dictionary
all_articles = {key: [] for key in keywords}

# ฟังก์ชันโหลด article
def fetch_article(news):
    url = news.get('link')
    source_name = news.get('source_id', '')

    # ใช้วันที่จาก API
    pub_date_str = news.get('pubDate')
    if pub_date_str:
        try:
            timestamp = datetime.strptime(pub_date_str, "%Y-%m-%d %H:%M:%S").date().isoformat()
        except ValueError:
            timestamp = pub_date_str
    else:
        timestamp = None

    try:
        # โหลด full text ด้วย newspaper
        article = Article(url)
        article.download()
        article.parse()
        content = article.text.strip()

        # ตรวจสอบ content ว่างหรือ copyright warning
        if content == "" or "Copyright" in content or "All rights reserved" in content:
            headline = news.get('title', '')
            content = news.get('description', '')
        else:
            headline = article.title

        return {
            "headline": headline,
            "content": content,
            "source": source_name,
            "url": url,
            "timestamp": timestamp
        }

    except Exception as e:
        print(f"Error downloading {url}: {e}")
        # fallback เป็น API data
        return {
            "headline": news.get('title', ''),
            "content": news.get('description', ''),
            "source": source_name,
            "url": url,
            "timestamp": timestamp
        }

# 4️⃣ ดึงข่าวแต่ละ keyword
for keyword in keywords:
    print(f"Fetching news for {keyword}...")
    response = api.news_api(
        q=keyword,
        category="business,lifestyle,technology,top,science",
        language="en",
        country="us"
    )

    articles = response.get('results', [])
    # ใช้ ThreadPoolExecutor โหลดหลายบทความพร้อมกัน
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(fetch_article, news) for news in articles]
        for future in as_completed(futures):
            article_data = future.result()
            all_articles[keyword].append(article_data)
            print(f"Added: {article_data['headline']}")

# 5️⃣ สร้างชื่อไฟล์
today_str = datetime.now().strftime("%d%m%Y")
filename = f"{today_str}.json"

# 6️⃣ บันทึก JSON
with open(filename, "w", encoding="utf-8") as f:
    json.dump(all_articles, f, ensure_ascii=False, indent=2)

print(f"Saved to {filename}")
