In [92]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv
from time import sleep

In [116]:
# Load API key
load_dotenv()
api_key = os.getenv("ALPHA_VANTAGE_KEY")

In [117]:
# Debug: Print the API key to verify it's loaded correctly
print(f"Loaded API key: {api_key}")

Loaded API key: None


In [111]:
# Define function to fetch news articles
def fetch_news(api_key, keywords, from_date, to_date, page=1):
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "NEWS_SENTIMENT",
        "apikey": api_key,
        "tickers": keywords,
        "topics": "technology,financial_markets",
        "time_from": from_date,
        "time_to": to_date,
        "sort": "LATEST",
        "limit": 200,
        "page": page
    }
    response = requests.get(url, params=params)
    try:
        data = response.json()
    except ValueError:
        print(f"Error decoding JSON for date range {from_date} to {to_date} on page {page}")
        return {}
    
    return data


In [112]:
# Function to collect articles within a date range for given keywords
def collect_articles(keywords, from_year, to_year):
    all_articles = []
    for year in range(from_year, to_year + 1):
        from_date = f"{year}-01-01"
        to_date = f"{year}-12-31"
        page = 1

        while True:
            news_data = fetch_news(api_key, keywords, from_date, to_date, page)
            
            # Print the raw response for debugging
            print(f"Response for {from_date} to {to_date} on page {page}: {news_data}")

            # Check if there are articles in the response
            if "feed" in news_data and news_data["feed"]:
                all_articles.extend(news_data["feed"])
                page += 1
                sleep(12)  # Respect API rate limit
            else:
                break

    return all_articles


In [113]:
# Define date range (past 10 years)
start_year = 2014
end_year = 2024

In [114]:
# Collect articles about economic factors
economic_keywords = "economy,gdp,inflation,unemployment,interest rates"
economic_articles = collect_articles(economic_keywords, start_year, end_year)

Response for 2014-01-01 to 2014-12-31 on page 1: {'Error Message': 'the parameter apikey is invalid or missing. Please claim your free API key on (https://www.alphavantage.co/support/#api-key). It should take less than 20 seconds.'}
Response for 2015-01-01 to 2015-12-31 on page 1: {'Error Message': 'the parameter apikey is invalid or missing. Please claim your free API key on (https://www.alphavantage.co/support/#api-key). It should take less than 20 seconds.'}
Response for 2016-01-01 to 2016-12-31 on page 1: {'Error Message': 'the parameter apikey is invalid or missing. Please claim your free API key on (https://www.alphavantage.co/support/#api-key). It should take less than 20 seconds.'}
Response for 2017-01-01 to 2017-12-31 on page 1: {'Error Message': 'the parameter apikey is invalid or missing. Please claim your free API key on (https://www.alphavantage.co/support/#api-key). It should take less than 20 seconds.'}
Response for 2018-01-01 to 2018-12-31 on page 1: {'Error Message': '

In [104]:
# Convert articles to DataFrame
articles_df = pd.DataFrame(all_articles)

In [105]:
# Display the first few rows of the DataFrame
print(articles_df.head())
print(f"Total articles fetched: {len(articles_df)}")

Empty DataFrame
Columns: []
Index: []
Total articles fetched: 0


Index(['title', 'url', 'time_published', 'authors', 'summary', 'banner_image',
       'source', 'category_within_source', 'source_domain', 'topics',
       'overall_sentiment_score', 'overall_sentiment_label',
       'ticker_sentiment'],
      dtype='object')
                                                title  \
0   Magnificent 7 Earnings Preview: Can Investors ...   
1   Arm Holdings  ( ARM )  Stock: Buy or Sell Befo...   
2   Should Investors Buy Microsoft's Stock Ahead o...   
3   Perion Network  ( PERI )  to Report Q2 Earning...   
4   onsemi  ( ON )  Q2 Earnings Beat Estimates, Re...   
5   Should You Buy the Dip in "Mag 7" and Big Tech...   
6   Small Caps Beat Nvidia, Amazon, And Tesla; Pro...   
7   ETSY Gears Up to Report Q2 Earnings: What's in...   
8   What's in Store for Booking Holdings  ( BKNG )...   
9   Fair Isaac  ( FICO )  to Report Q3 Earnings: W...   
10  Unexpected Shift In Apple's Strategy As 'Apple...   
11  Gen Digital  ( GEN )  to Report Q1 Earnings: W...   

In [75]:
# Rename the date column
news_agg.rename(columns={'time_published': 'date'}, inplace=True)

In [76]:
# Convert 'date' back to datetime for merging
news_agg['date'] = pd.to_datetime(news_agg['date'])

In [77]:
# Merge the SMA and CPI DataFrames first
combined_df = pd.merge(sma_df, cpi_df, on='date', how='inner')

In [78]:
# Now merge the aggregated news sentiment data with the combined SMA and CPI DataFrame
combined_df = pd.merge(combined_df, news_agg, on='date', how='left')

In [79]:
# Display the final combined DataFrame
print(combined_df)

        date 2024-07-29.SMA 2024-07-26.SMA 2024-07-25.SMA 2024-07-24.SMA  \
0 1970-01-01       225.0145       224.6335       224.4405       224.2285   

  2024-07-23.SMA 2024-07-22.SMA 2024-07-19.SMA 2024-07-18.SMA 2024-07-17.SMA  \
0       223.7550       222.9115       222.0880       221.3565       220.8620   

   ... 1999-12-07.SMA 1999-12-06.SMA 1999-12-03.SMA 1999-12-02.SMA  \
0  ...         0.7339         0.7258         0.7153         0.7035   

  1999-12-01.SMA 1999-11-30.SMA 1999-11-29.SMA cpi_value avg_sentiment_score  \
0         0.6927         0.6840         0.6764      37.8                 NaN   

  article_count  
0           NaN  

[1 rows x 6209 columns]
