In [None]:
import requests
import json
#This is used to create the list of S&P 500. 
api_key = 'faraz_key' #set to an alternative here. I used the one I purchased

# API endpoint for S&P 500 constituents
url = f'https://eodhistoricaldata.com/api/fundamentals/GSPC.INDX?api_token={api_key}'

try:
    # Make the API request
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for bad status codes

    # Parse the JSON response
    data = json.loads(response.text)

    # Extract ticker symbols from the 'Components' field
    components = data.get('Components', {})
    tickers = []
    for component_data in components.values():
        if isinstance(component_data, dict) and 'Code' in component_data:
            tickers.append(component_data['Code'])

    if not tickers:
        print("No tickers found in the API response.")
    else:
        # Write tickers to a .txt file
        with open('sp500_tickers.txt', 'w') as f:
            for ticker in tickers:
                f.write(f"{ticker}\n")

        print(f"{len(tickers)} S&P 500 tickers have been saved to 'sp500_tickers.txt'")

except requests.exceptions.RequestException as e:
    print(f"An error occurred while making the API request: {e}")
except json.JSONDecodeError:
    print("Failed to parse the API response as JSON")
except KeyError:
    print("The API response doesn't contain the expected data structure")
except IOError:
    print("An error occurred while writing to the file")

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# API token and endpoint
api_url = 'https://eodhd.com/api/news'
api_token =  'faraz_key' #set to an alternative here

# Read tickers from the sp500_tickers.txt file
with open('sp500_tickers.txt', 'r') as f:
    ticker_list = [line.strip() for line in f]

# Function to generate date ranges
def generate_date_ranges(start_date, end_date, interval_months):
    date_ranges = []
    start = start_date
    while start < end_date:
        end = start + timedelta(days=interval_months*30)  # Approximate month duration
        if end > end_date:
            end = end_date
        date_ranges.append((start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')))
        start = end  # Move to next interval
    return date_ranges

# Define the start and end dates
start_date = datetime(2018, 1, 1)
end_date = datetime(2024, 6, 30)

# Generate date ranges
interval_months = 6
date_ranges = generate_date_ranges(start_date, end_date, interval_months)

# Initialize a list to store all news data
news_data = []

# Fetch news for each ticker and each date range
for start_date, end_date in date_ranges:
    for ticker in ticker_list:
        params = {
            's': ticker,
            'from': start_date,
            'to': end_date,
            'api_token': api_token,
            'limit': 1000,
            'fmt': 'json',
            'language': 'en'
        }
        response = requests.get(api_url, params=params)
        if response.status_code == 200:
            data = response.json()
            for news in data:
                news_data.append({
                    'date': news['date'],
                    'ticker': ticker,
                    'title': news['title'],
                    'content': news['content'],
                    'link': news['link']
                })
            print(f"Fetched news for {ticker} from {start_date} to {end_date}")
        else:
            print(f"Failed to retrieve data for {ticker} from {start_date} to {end_date}: {response.status_code}")

# Create a DataFrame and save to CSV
news_df = pd.DataFrame(news_data)
news_df.to_csv('news_data.csv', index=False, columns=['date', 'ticker', 'title', 'content', 'link'])

print(f"News data saved to news_data.csv")