<a href="https://colab.research.google.com/github/dibend/Colab/blob/main/Investing_News.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary libraries
!pip install feedparser gradio

Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting gradio
  Downloading gradio-4.40.0-py3-none-any.whl.metadata (15 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.112.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.2.0 (from gradio)
  Downloading gradio_client-1.2.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Import libraries
import feedparser
import gradio as gr
from datetime import datetime

# Define the RSS feed URLs and their categories
rss_feeds = {
    "Top Stories": "https://feeds.content.dowjones.io/public/rss/mw_topstories",
    "Real-Time Headlines": "https://feeds.content.dowjones.io/public/rss/mw_realtimeheadlines",
    "Bulletins": "http://feeds.marketwatch.com/marketwatch/bulletins",
    "Market Pulse": "https://feeds.content.dowjones.io/public/rss/mw_marketpulse",
    "Nasdaq Original": "https://www.nasdaq.com/feed/nasdaq-original/rss.xml",
    "Commodities": "https://www.nasdaq.com/feed/rssoutbound?category=Commodities",
    "Cryptocurrencies": "https://www.nasdaq.com/feed/rssoutbound?category=Cryptocurrencies",
    "Dividends": "https://www.nasdaq.com/feed/rssoutbound?category=Dividends",
    "Earnings": "https://www.nasdaq.com/feed/rssoutbound?category=Earnings",
    "ETFs": "https://www.nasdaq.com/feed/rssoutbound?category=ETFs",
    "IPOs": "https://www.nasdaq.com/feed/rssoutbound?category=IPOs",
    "Markets": "https://www.nasdaq.com/feed/rssoutbound?category=Markets",
    "Options": "https://www.nasdaq.com/feed/rssoutbound?category=Options",
    "Stocks": "https://www.nasdaq.com/feed/rssoutbound?category=Stocks"
}

# Set User-Agent to mimic an iPhone
user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1"

# Function to parse a single RSS feed and return the latest headlines
def parse_feed(url):
    headers = {'User-Agent': user_agent}
    feed = feedparser.parse(url, request_headers=headers)
    return feed.entries

# Function to get the latest headlines for each category
def get_latest_headlines():
    headlines = {}
    all_entries = []
    for category, url in rss_feeds.items():
        try:
            print(f"Parsing feed: {url}")
            entries = parse_feed(url)
            if entries:
                headlines[category] = entries  # Get all entries for each category
                all_entries.extend(entries)  # Add to all entries list
                print(f"Successfully parsed {len(entries)} entries from feed: {url}")
            else:
                print(f"No entries found in feed: {url}")
        except Exception as e:
            print(f"Error parsing feed {url}: {e}")
            headlines[category] = []
    # Sort all entries by published date
    sorted_all_entries = sorted(all_entries, key=lambda x: x.published_parsed, reverse=True)
    headlines["All"] = sorted_all_entries  # Include all entries sorted by recency in the All tab
    return headlines

# Function to format the headlines for display
def format_headlines():
    headlines = get_latest_headlines()
    formatted_headlines = {category: "" for category in rss_feeds.keys()}
    formatted_headlines["All"] = ""
    for category, entries in headlines.items():
        for entry in entries:
            entry_info = f"Title: {entry.title}\nLink: {entry.link}\nPublished: {entry.published}\n\n"
            formatted_headlines[category] += entry_info
            formatted_headlines["All"] += entry_info
    return formatted_headlines

# Function to create Gradio interface with tabs for each category
def create_interface():
    with gr.Blocks() as iface:
        with gr.Row():
            refresh_button = gr.Button("Refresh Feeds")

        tabs = gr.Tabs()
        with tabs:
            headlines = format_headlines()
            for category in headlines.keys():
                with gr.TabItem(category):
                    gr.Markdown(headlines[category])

        refresh_button.click(fn=create_interface, inputs=[], outputs=tabs)
    return iface

# Create the Gradio interface
iface = create_interface()

# Launch the Gradio interface
iface.launch(share=True, debug=True)

Parsing feed: https://feeds.content.dowjones.io/public/rss/mw_topstories
Successfully parsed 10 entries from feed: https://feeds.content.dowjones.io/public/rss/mw_topstories
Parsing feed: https://feeds.content.dowjones.io/public/rss/mw_realtimeheadlines
Successfully parsed 10 entries from feed: https://feeds.content.dowjones.io/public/rss/mw_realtimeheadlines
Parsing feed: http://feeds.marketwatch.com/marketwatch/bulletins
Successfully parsed 10 entries from feed: http://feeds.marketwatch.com/marketwatch/bulletins
Parsing feed: https://feeds.content.dowjones.io/public/rss/mw_marketpulse
Successfully parsed 30 entries from feed: https://feeds.content.dowjones.io/public/rss/mw_marketpulse
Parsing feed: https://www.nasdaq.com/feed/nasdaq-original/rss.xml
Successfully parsed 15 entries from feed: https://www.nasdaq.com/feed/nasdaq-original/rss.xml
Parsing feed: https://www.nasdaq.com/feed/rssoutbound?category=Commodities
Successfully parsed 15 entries from feed: https://www.nasdaq.com/feed