In [12]:
import feedparser
import requests
import time
from datetime import datetime
from urllib.parse import urlparse, parse_qs, unquote, quote_plus
import hashlib
import re
import json

In [2]:
def get_bing_news_rss(topic, max_articles=5):
    query = quote_plus(topic)
    rss_url = f"https://www.bing.com/news/search?q={query}&format=rss"
    feed = feedparser.parse(rss_url)

    articles = []
    seen = set()

    for entry in feed.entries:
        match = re.search(r'url=(https?%3[a-zA-Z0-9%._\-\/]+)', entry.link)
        if not match:
            continue
        url = re.sub(r'%3a', ':', match.group(1), flags=re.IGNORECASE)
        url = re.sub(r'%2f', '/', url, flags=re.IGNORECASE)
        if url in seen:
            continue
        seen.add(url)
        articles.append({
            "title": entry.title,
            "url": url,
            "published_at": entry.get("published", ""),
            "source": "Bing",
            "topic": topic
        })
        if len(articles) >= max_articles:
            break
    return articles

In [3]:
def get_yahoo_news_rss(topic, max_articles=5):
    query = quote_plus(topic)
    rss_url = f"https://news.search.yahoo.com/rss?p={query}"
    feed = feedparser.parse(rss_url)

    articles = []
    seen = set()

    for entry in feed.entries:
        url = entry.link
        if url in seen:
            continue
        seen.add(url)
        articles.append({
            "title": entry.title,
            "url": url,
            "published_at": entry.get("published", ""),
            "source": "Yahoo",
            "topic": topic
        })
        if len(articles) >= max_articles:
            break
    return articles

In [4]:
def fetch_articles_for_topic(topic, max_articles=10):
    # Prefer Bing first
    bing_articles = get_bing_news_rss(topic, max_articles)
    remaining = max_articles - len(bing_articles)
    yahoo_articles = get_yahoo_news_rss(topic, remaining) if remaining > 0 else []

    combined = bing_articles + yahoo_articles

    # Deduplicate based on URL
    seen_urls = set()
    deduped = []
    for art in combined:
        if art["url"] not in seen_urls:
            deduped.append(art)
            seen_urls.add(art["url"])

    print(f"\n🔍 Topic: {topic}")
    print(f"✅ Articles fetched (deduplicated): {len(deduped)}\n")
    for i, art in enumerate(deduped, 1):
        print(f"{i}. {art['title']}\n🔗 {art['url']}\n📰 Source: {art['source']} | 📅 {art['published_at']} | 📌 Topic: {art['topic']}\n")

    return deduped

In [9]:
# Example usage:
topics = ["tech"]
all_articles = []
for t in topics:
    arts = fetch_articles_for_topic(t, max_articles=10)
    all_articles.extend(arts)


🔍 Topic: tech
✅ Articles fetched (deduplicated): 10

1. Five Keys To Helping Traditional Businesses Embrace New Tech
🔗 https://www.forbes.com/councils/forbestechcouncil/2025/06/25/five-keys-to-helping-traditional-businesses-embrace-new-tech/
📰 Source: Bing | 📅 Wed, 25 Jun 2025 04:30:00 GMT | 📌 Topic: tech

2. Feds wasted millions on tech to detect fentanyl at the border, report finds
🔗 https://www.usatoday.com/story/news/politics/2025/06/25/feds-wasted-money-border-technology-fentanyl/84287735007/
📰 Source: Bing | 📅 Wed, 25 Jun 2025 15:36:00 GMT | 📌 Topic: tech

3. This Experimental Tech Allows Surgeons to See Through Blood
🔗 https://gizmodo.com/this-experimental-tech-allows-surgeons-to-see-through-blood-2000620162
📰 Source: Bing | 📅 Wed, 25 Jun 2025 08:40:00 GMT | 📌 Topic: tech

4. Texas Tech Shooting: What We Know
🔗 https://www.msn.com/en-us/news/crime/texas-tech-shooting-what-we-know/ar-AA1HkjAR
📰 Source: Bing | 📅 Tue, 24 Jun 2025 08:59:00 GMT | 📌 Topic: tech

5. Tech stocks power 

In [13]:
# Save to resolved_articles.json
with open("resolved_articles.json", "w", encoding="utf-8") as f:
    json.dump(all_articles, f, indent=2, ensure_ascii=False)

print("✅ Saved to resolved_articles.json")

✅ Saved to resolved_articles.json


In [16]:
# ✅ Load and preview resolved articles
try:
    with open("resolved_articles.json", "r", encoding="utf-8") as f:
        data = json.load(f)

    print(f"✅ Loaded {len(data)} articles from JSON.")

    # Preview first few articles
    for i, item in enumerate(data[:5]):
        print(f"\n{i+1}. {item.get('title', '[No Title]')}")
        print(f"🔗 {item.get('url', '[No URL]')}")
        print(f"📅 {item.get('published_at', '[No Date]')}")
        print(f"📌 Topic: {item.get('topic', '[No Topic]')} | 📰 Source: {item.get('source', '[No Source]')}")

except FileNotFoundError:
    print("❌ File 'resolved_articles.json' not found.")
except json.JSONDecodeError:
    print("❌ Error decoding JSON. Check file format.")


✅ Loaded 10 articles from JSON.

1. Five Keys To Helping Traditional Businesses Embrace New Tech
🔗 https://www.forbes.com/councils/forbestechcouncil/2025/06/25/five-keys-to-helping-traditional-businesses-embrace-new-tech/
📅 Wed, 25 Jun 2025 04:30:00 GMT
📌 Topic: tech | 📰 Source: Bing

2. Feds wasted millions on tech to detect fentanyl at the border, report finds
🔗 https://www.usatoday.com/story/news/politics/2025/06/25/feds-wasted-money-border-technology-fentanyl/84287735007/
📅 Wed, 25 Jun 2025 15:36:00 GMT
📌 Topic: tech | 📰 Source: Bing

3. This Experimental Tech Allows Surgeons to See Through Blood
🔗 https://gizmodo.com/this-experimental-tech-allows-surgeons-to-see-through-blood-2000620162
📅 Wed, 25 Jun 2025 08:40:00 GMT
📌 Topic: tech | 📰 Source: Bing

4. Texas Tech Shooting: What We Know
🔗 https://www.msn.com/en-us/news/crime/texas-tech-shooting-what-we-know/ar-AA1HkjAR
📅 Tue, 24 Jun 2025 08:59:00 GMT
📌 Topic: tech | 📰 Source: Bing

5. Tech stocks power Nasdaq 100 to a record high a