In [60]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv
from time import sleep

In [61]:
# Load API key
load_dotenv()
api_key = os.getenv("ALPHA_VANTAGE_KEY")

In [62]:
# Debug: Print the API key to verify it's loaded correctly
print(f"Loaded API key: {api_key}")

Loaded API key: UX22I8KUVTLLRXFQ


In [63]:
# Function to fetch news articles
def fetch_news(api_key, keywords, from_date, to_date, page=1):
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "NEWS_SENTIMENT",
        "apikey": api_key,
        "tickers": keywords,
        "topics": "economy,financial_markets",
        "time_from": from_date,
        "time_to": to_date,
        "sort": "LATEST",
        "limit": 200,
        "page": page
    }
    response = requests.get(url, params=params)
    try:
        data = response.json()
    except ValueError:
        print(f"Error decoding JSON for date range {from_date} to {to_date} on page {page}")
        return {}
    
    return data

In [69]:
# Function to collect a sample of articles within a date range for given keywords
def collect_sample_articles(keywords, from_date, to_date, max_articles=5):
    all_articles = []
    requests_made = 0
    articles_per_request = 10  # Maximum articles per request

    page = 1

    while requests_made < max_articles // articles_per_request:
        news_data = fetch_news(api_key, keywords, from_date, to_date, page)
        
        # Debug: Print the raw response for debugging
        print(f"Response for {from_date} to {to_date} on page {page}: {news_data}")

        # Check if there are articles in the response
        if "feed" in news_data and news_data["feed"]:
            all_articles.extend(news_data["feed"])
            page += 1
            requests_made += 1
            sleep(12)  
            
            # Check if the sample limit is reached
            if len(all_articles) >= max_articles:
                return all_articles
        else:
            break

    return all_articles


In [70]:
# Define date range for a smaller sample
from_date = "2023-01-01"
to_date = "2023-12-31"

In [66]:
# Collect a smaller sample of articles about economic factors
economic_keywords = "Inflation"
economic_articles = collect_sample_articles(economic_keywords, from_date, to_date)

In [71]:
# Collect a smaller sample of articles about the S&P 500
sp_keywords = "SPY"
sp_articles = collect_sample_articles(sp_keywords, from_date, to_date)

In [72]:
# Convert articles to DataFrames
economic_df = pd.DataFrame(economic_articles)
sp_df = pd.DataFrame(sp_articles)

In [73]:
# Display the first few rows of the DataFrames
print("Economic Articles DataFrame:")
print(economic_df.head())
print(f"Total economic articles fetched: {len(economic_df)}")

Economic Articles DataFrame:
Empty DataFrame
Columns: []
Index: []
Total economic articles fetched: 0


In [53]:
print("\nS&P 500 Articles DataFrame:")
print(sp_df.head())
print(f"Total S&P 500 articles fetched: {len(sp_df)}")


S&P 500 Articles DataFrame:
Empty DataFrame
Columns: []
Index: []
Total S&P 500 articles fetched: 0
