In [1]:
pip install requests pandas


Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
import pandas as pd
import time
import os

# RapidAPI credentials
RAPIDAPI_KEY = "8d4c87454fmsh34632789d84864ap1c1b12jsn9b500e1e3ebb"  # Replace with your API key
RAPIDAPI_HOST = "google-news13.p.rapidapi.com"

# API Endpoint
URL = "https://google-news13.p.rapidapi.com"

# Query Parameters (Modify as needed)
PARAMS = {
    "country": "us",  # Change for other countries (e.g., 'in', 'gb')
    "category": "technology",  # News category (e.g., 'sports', 'health')
}

# File to store news dataset
CSV_FILE = "real_time_news.csv"

def fetch_news():
    """Fetch latest news articles from the API."""
    headers = {
        "X-RapidAPI-Key": RAPIDAPI_KEY,
        "X-RapidAPI-Host": RAPIDAPI_HOST
    }
    
    response = requests.get(URL, headers=headers, params=PARAMS)
    
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        print(f"Error fetching news: {response.status_code}")
        return []

def update_dataset():
    """Fetch latest news and update the dataset."""
    print("Fetching latest news...")
    articles = fetch_news()

    if not articles:
        print("No new articles found.")
        return
    
    # Convert API response to DataFrame
    df_new = pd.DataFrame(articles)[["title", "description", "url", "publishedAt", "source"]]
    df_new["source"] = df_new["source"].apply(lambda x: x["name"])  # Extract source name
    
    # Check if CSV file exists
    if os.path.exists(CSV_FILE):
        df_existing = pd.read_csv(CSV_FILE)
        
        # Avoid duplicates by removing already saved articles
        df_new = df_new[~df_new["title"].isin(df_existing["title"])]

        if df_new.empty:
            print("No new updates. Dataset is up to date.")
            return
        
        # Append new articles to existing dataset
        df_new.to_csv(CSV_FILE, mode='a', header=False, index=False)
    else:
        # Create new dataset file
        df_new.to_csv(CSV_FILE, index=False)

    print(f"Dataset updated! {len(df_new)} new articles added.")

# Run script continuously every 60 seconds
if __name__ == "__main__":
    while True:
        update_dataset()
        time.sleep(60)  # Wait 60 seconds before fetching again


Fetching latest news...
Error fetching news: 404
No new articles found.
Fetching latest news...
Error fetching news: 404
No new articles found.
Fetching latest news...
Error fetching news: 404
No new articles found.


KeyboardInterrupt: 

📡 Fetching latest news...
❌ Error fetching news: 403
ℹ️ No new articles found.


KeyboardInterrupt: 

In [13]:
import requests
import pandas as pd
import time

# API Configuration
API_KEY = "37eaf5fe6f85b63687fd531a31428cd3"  # Replace with your NewsAPI key
URL = "http://api.mediastack.com"
CSV_FILENAME = "realtime_news_data.csv"
INTERVAL = 300  # Fetch data every 5 minutes

params = {
    "country": "us",
    "apiKey": API_KEY
}

def fetch_and_store_news():
    """Fetch real-time news and append it to a CSV file."""
    response = requests.get(URL, params=params)

    print("Response Status Code:", response.status_code)

    if response.status_code == 200:
        try:
            # Check if response is empty
            if not response.text.strip():
                print("Warning: API returned an empty response.")
                return

            # Try to parse JSON
            data = response.json()

            # Check if "articles" exist
            if "articles" not in data:
                print("Error: No 'articles' key in response.")
                return
            
            articles = data["articles"]

            if not articles:
                print("No new articles found.")
                return

            # Convert "source" dictionary into a name column
            for article in articles:
                article["source"] = article["source"]["name"]

            df = pd.DataFrame(articles, columns=["title", "description", "url", "publishedAt", "source"])

            # Append new data to the CSV file
            df.to_csv(CSV_FILENAME, mode="a", index=False, encoding="utf-8", header=not pd.io.common.file_exists(CSV_FILENAME))

            print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] Data updated successfully.")

        except requests.exceptions.JSONDecodeError:
            print("Error: API returned invalid JSON.")

    elif response.status_code == 401:
        print("Error: Unauthorized. Check if your API key is valid.")
    elif response.status_code == 403:
        print("Error: Access Forbidden. Your API key might be blocked.")
    elif response.status_code == 429:
        print("Error: Rate limit exceeded. Reduce request frequency.")
    else:
        print(f"Error: {response.status_code}")

# Run the fetch function in a loop for real-time updates
while True:
    fetch_and_store_news()
    time.sleep(INTERVAL)


Response Status Code: 200
Error: API returned invalid JSON.


KeyboardInterrupt: 

In [15]:


import requests
import feedparser
import pandas as pd
import datetime

# Set up your API key (Get it from https://newsapi.org/)
NEWS_API_KEY = "ff190a48a285413e921b5a91343dd0cd"

# Function to fetch news from NewsAPI
def fetch_newsapi_news(query="latest", sources="bbc-news,cnn", language="en"):
    url = f"https://newsapi.org/v2/everything?q={query}&sources={sources}&language={language}&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    data = response.json()
    
    articles = []
    if data.get("articles"):
        for article in data["articles"]:
            articles.append({
                "source": article["source"]["name"],
                "title": article["title"],
                "description": article["description"],
                "url": article["url"],
                "published_at": article["publishedAt"]
            })
    return articles

# Function to fetch news from RSS Feeds
def fetch_rss_news(feed_url="https://rss.cnn.com/rss/edition.rss"):
    feed = feedparser.parse(feed_url)
    articles = []
    for entry in feed.entries:
        articles.append({
            "source": "CNN RSS",
            "title": entry.title,
            "description": entry.summary,
            "url": entry.link,
            "published_at": entry.published
        })
    return articles

# Combine news from both sources
def fetch_all_news():
    news_data = []
    
    # Fetch from NewsAPI
    news_data.extend(fetch_newsapi_news(query="breaking news"))

    # Fetch from RSS feed
    news_data.extend(fetch_rss_news())

    return news_data

# Save news to CSV
def save_news_to_csv(news_data, filename="news_data.csv"):
    df = pd.DataFrame(news_data)
    df.to_csv(filename, index=False)
    print(f"✅ News saved to {filename}")

# Run the script
if __name__ == "__main__":
    news = fetch_all_news()
    if news:
        save_news_to_csv(news)
    else:
        print("No news found.")


SyntaxError: invalid syntax (1676247699.py, line 1)