# Target only specific tickers to get financial news

In [19]:
from eodhd import APIClient
import json
import time
from datetime import datetime
import os

# Initialize the API client with your API key
api = APIClient(KEY)  # Assuming KEY is defined elsewhere in your code

# Configuration
FROM_DATE = "2019-01-01"
LIMIT = 1000  # Maximum allowed per request
MAX_ITERATIONS = 100  # Use 100 iterations for all specified tickers

# List of specific tickers to process
TARGET_TICKERS = [
    "AAPL.US", 
    "TSLA.US", 
    "GOOGL.US", 
    "NVDA.US", 
    "AMZN.US", 
    "MSFT.US"
]

# API Limits
CALLS_PER_MINUTE_LIMIT = 1000
CALLS_PER_DAY_LIMIT = 100000
OUTPUT_DIR = "sp500_news_data"  # Directory to store output files

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Initialize tracking variables
api_calls = 0
start_time = time.time()
minute_start_time = start_time
minute_calls = 0
processed_companies = 0
total_news_items = 0

# Function to enforce rate limits
def check_rate_limits():
    global minute_calls, minute_start_time, api_calls
    
    current_time = time.time()
    elapsed_minute = current_time - minute_start_time
    
    # Reset minute counter if a minute has passed
    if elapsed_minute >= 60:
        minute_calls = 0
        minute_start_time = current_time
    
    # If we're close to the per-minute limit, wait until the next minute
    if minute_calls >= CALLS_PER_MINUTE_LIMIT - 5:  # Buffer of 5 calls
        wait_time = 60 - elapsed_minute
        print(f"Approaching per-minute rate limit. Waiting {wait_time:.2f} seconds...")
        time.sleep(wait_time + 1)  # Add 1 second buffer
        minute_calls = 0
        minute_start_time = time.time()
    
    # Check if we're approaching the daily limit
    if api_calls >= CALLS_PER_DAY_LIMIT - 100:  # Buffer of 100 calls
        print("WARNING: Approaching daily API call limit!")
        return False
    
    return True

# Function to fetch news for a single company
def fetch_company_news(symbol):
    global api_calls, minute_calls, total_news_items
    
    company_news = []
    
    print(f"\nStarting data collection for {symbol} from {FROM_DATE}")
    print(f"Using max iterations: {MAX_ITERATIONS}")
    
    # Main loop for pagination
    for i in range(MAX_ITERATIONS):
        offset = i * LIMIT
        
        # Check rate limits
        if not check_rate_limits():
            print(f"Stopping further requests for {symbol} due to API limits.")
            break
        
        try:
            print(f"Company {symbol} - Batch {i+1}/{MAX_ITERATIONS}: Fetching news with offset {offset}...")
            
            # Make the API call
            news_batch = api.financial_news(
                s=symbol, 
                from_date=FROM_DATE, 
                offset=str(offset), 
                limit=str(LIMIT)
            )
            
            # Update counters
            api_calls += 1
            minute_calls += 1
            
            # Check if we got any results
            if not news_batch:
                print(f"No more news items found for {symbol} after offset {offset}.")
                break
            
            # Add the batch to our collection
            company_news.extend(news_batch)
            total_news_items += len(news_batch)
            
            # Display progress
            print(f"Retrieved {len(news_batch)} news items for {symbol}. Total for this company: {len(company_news)}")
            
            # If we got fewer items than requested, we've reached the end
            if len(news_batch) < LIMIT:
                print(f"Reached the end of available news for {symbol} at offset {offset}.")
                break
                
        except Exception as e:
            print(f"Error for {symbol} at offset {offset}: {str(e)}")
            
            # If the error might be rate-limit related, pause
            if "rate" in str(e).lower() or "limit" in str(e).lower():
                print("Possible rate limit reached. Pausing for 60 seconds...")
                time.sleep(60)
                minute_calls = 0
                minute_start_time = time.time()
            else:
                # For other errors, add a small delay before continuing
                print("Continuing to next batch after a short delay...")
                time.sleep(5)
        
        # Add a small delay between requests to be courteous
        time.sleep(0.5)
    
    return company_news

# Main execution
try:
    total_companies = len(TARGET_TICKERS)
    
    # Create a summary log file
    summary_file = os.path.join(OUTPUT_DIR, f"high_interest_news_summary.json")
    company_summaries = []
    
    # Process each of the specified tickers
    for ticker in TARGET_TICKERS:
        # Check if we're approaching API limits before starting a new company
        if not check_rate_limits():
            print(f"Approaching API limits. Stopping after processing {processed_companies} companies.")
            break
        
        print(f"\n{'='*80}")
        print(f"Processing company {processed_companies+1}/{total_companies}: {ticker}")
        print(f"Using {MAX_ITERATIONS} max iterations for deep news collection")
        print(f"{'='*80}")
        
        # Fetch news for this company
        company_news = fetch_company_news(ticker)
        
        # If we got news, save it to a file
        if company_news:
            company_filename = f"{ticker.replace('.', '_')}_news_extended.json"
            company_filepath = os.path.join(OUTPUT_DIR, company_filename)
            
            with open(company_filepath, 'w', encoding='utf-8') as f:
                json.dump(company_news, f, ensure_ascii=False, indent=4)
            
            print(f"Saved {len(company_news)} news items for {ticker} to {company_filepath}")
            
            # Add to summary
            company_summaries.append({
                'symbol': ticker,
                'news_count': len(company_news),
                'file': company_filename
            })
        else:
            print(f"No news found for {ticker}. Skipping file creation.")
            
            # Add to summary
            company_summaries.append({
                'symbol': ticker,
                'news_count': 0,
                'file': None
            })
        
        processed_companies += 1
        
        # Save the summary after each company to keep track of progress
        with open(summary_file, 'w', encoding='utf-8') as f:
            summary_data = {
                'timestamp': datetime.now().isoformat(),
                'companies_processed': processed_companies,
                'total_companies': total_companies,
                'total_news_items': total_news_items,
                'api_calls': api_calls,
                'target_tickers': TARGET_TICKERS,
                'max_iterations': MAX_ITERATIONS,
                'company_summaries': company_summaries
            }
            json.dump(summary_data, f, ensure_ascii=False, indent=4)
        
        # Brief pause between companies
        time.sleep(1)

except Exception as e:
    print(f"Critical error in main execution: {str(e)}")

finally:
    # Final summary
    total_time = time.time() - start_time
    print(f"\n{'='*80}")
    print(f"Data collection complete:")
    print(f"- Made {api_calls} API calls")
    print(f"- Processed {processed_companies} of {total_companies} high-interest companies")
    print(f"- Collected {total_news_items} total news items")
    print(f"- Used {MAX_ITERATIONS} iterations per company for deep news collection")
    print(f"- Total runtime: {total_time:.2f} seconds ({total_time/3600:.2f} hours)")
    print(f"- Summary saved to {summary_file}")
    print(f"{'='*80}")


Processing company 1/6: AAPL.US
Using 100 max iterations for deep news collection

Starting data collection for AAPL.US from 2019-01-01
Using max iterations: 100
Company AAPL.US - Batch 1/100: Fetching news with offset 0...
Retrieved 1000 news items for AAPL.US. Total for this company: 1000
Company AAPL.US - Batch 2/100: Fetching news with offset 1000...
Retrieved 1000 news items for AAPL.US. Total for this company: 2000
Company AAPL.US - Batch 3/100: Fetching news with offset 2000...
Retrieved 1000 news items for AAPL.US. Total for this company: 3000
Company AAPL.US - Batch 4/100: Fetching news with offset 3000...
Retrieved 1000 news items for AAPL.US. Total for this company: 4000
Company AAPL.US - Batch 5/100: Fetching news with offset 4000...
Retrieved 1000 news items for AAPL.US. Total for this company: 5000
Company AAPL.US - Batch 6/100: Fetching news with offset 5000...
Retrieved 1000 news items for AAPL.US. Total for this company: 6000
Company AAPL.US - Batch 7/100: Fetching ne