In [51]:
import requests
import json 
from datetime import datetime, timedelta
import os
import math

In [52]:
API_KEY = '7eb571b97f8440118c46dc8c74279e0e'
# QUERY = ["NVIDIA", "AMD", "Intel"]
QUERY = ["NVIDIA"]
# DATE_FROM = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
DATE_FROM = '2025-09-20'
DATE_NOW = datetime.now().strftime('%Y-%m-%d')

In [53]:
def fetch_news(api_key, query, date_from, date_to):
    if date_from > date_to:
        raise ValueError("date_from must be earlier than date_to")
    if not is_valid_date(date_from) or  not is_valid_date(date_to):
        raise ValueError("Dates must be in YYYY-MM-DD format")

    url = f"https://newsapi.org/v2/everything?q={query}&from={date_from}&to={date_to}&language=en&apiKey={api_key}"
    response = requests.get(url)
    total_articles = response.json().get('totalResults', 0)
    total_pages = int(math.ceil(total_articles / 100))
    articles = response.json().get('articles', [])

    if response.status_code == 200:
        return (articles,total_pages, total_articles)
    else:
        return {"error": "Failed to fetch news"}

def fetch_news_all_pages(url, total_pages):
    all_articles = []
    for page in range(2, total_pages + 1):
        paged_url = f"{url}&page={page}"
        response = requests.get(paged_url)
        if response.status_code == 200:
            data = response.json()
            articles = data.get('articles', [])
            print(len(articles))

            all_articles.extend(data.get('articles', []))
        else:
            print(f"Failed to fetch page {page}, {response.text}")
            break
    return all_articles

def is_valid_date(date_string):
    """
    Checks if a string can be parsed into a date according to a specific format.
    """
    try:
        for fmt in ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"):
            try:
                datetime.strptime(date_string, fmt)
                return True
            except ValueError:
                continue
        return False
    except ValueError:
        return False

In [54]:
def main():
    all_articles = []

    for company in QUERY:
        articles, total_pages, total_articles = fetch_news(API_KEY, query=company, date_from=DATE_FROM, date_to=DATE_NOW)
        
        print(f"Total Articles for {company}: {total_articles}")
        print(f"Total Pages for {company}: {total_pages}")
        print(f"Articles on First Page for {company}: {len(articles)}")
        if total_pages >= 2:
            print(f"Fetching additional {total_pages - 1} pages for {company}...")
            url = f"https://newsapi.org/v2/everything?q={company}&from={DATE_FROM}&to={DATE_NOW}&language=en&apiKey={API_KEY}"
            additional_articles = fetch_news_all_pages(url, total_pages)
            print(f"Total Articles fetched for {company}: {len(articles) + len(additional_articles)}")
        
            company_articles = articles + additional_articles
        else:
            company_articles = articles

        all_articles.extend(company_articles)

        file_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{company}.json"

        if not os.path.exists(r'../data/raw/news_api'):
            os.makedirs(r'../data/raw/news_api')
        path = os.path.join(r'../data/raw/news_api', file_name)

        with open(path, "w", encoding="utf-8") as f:
            json.dump(company_articles, f, ensure_ascii=False, indent=4)
        print(f"Saved {len(company_articles)} articles to {path}")

In [55]:
print(f"Fetching news from {DATE_FROM} to {DATE_NOW}")
print(f"Queries: {', '.join(QUERY)}")
changes_made = input("Change the date range? (y/n): ")
if changes_made.lower() == 'y':
    while True:
        new_date_from = input("Enter new start date (YYYY-MM-DD): ")
        new_date_to = input("Enter new end date (YYYY-MM-DD): ")
        if is_valid_date(new_date_from) and is_valid_date(new_date_to):
            if new_date_from <= new_date_to:
                DATE_FROM, DATE_NOW = new_date_from, new_date_to
                break
            else:
                print("Error: Start date must be earlier than end date.")
        else:
            print("Error: Invalid date format. Please use YYYY-MM-DD.")
main()

Fetching news from 2025-09-20 to 2025-09-22
Queries: NVIDIA
Total Articles for NVIDIA: 159
Total Pages for NVIDIA: 2
Articles on First Page for NVIDIA: 100
Fetching additional 1 pages for NVIDIA...
57
Total Articles fetched for NVIDIA: 157
Saved 157 articles to ../data/raw/news_api\20250922_145607_NVIDIA.json
