In [2]:
import requests
import pandas as pd
import logging
from datetime import datetime, timedelta
from textblob import TextBlob
import time

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# NewsAPI key
api_key = 'afc3fe9ac08745439bf521cb5b974fbc'

# List of tickers to search news for
tickers = ['AAPL']

# Date range for news query
today = datetime.now().strftime('%Y-%m-%d')
seven_days_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')

# Function for TextBlob sentiment analysis
def textblob_sentiment(text):
    if text:
        return TextBlob(text).sentiment.polarity  # Sentiment polarity from -1 to 1
    return 0

# Function to fetch market news
def get_market_news(ticker):
    url = (
        f'https://newsapi.org/v2/everything?q={ticker}&from={seven_days_ago}&to={today}&sortBy=publishedAt&apiKey={api_key}'
    )
    try:
        logger.info(f"Requesting URL: {url}")
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raise an exception for bad status codes
        articles = response.json().get('articles', [])
        if not articles:
            logger.warning(f"No articles returned for {ticker}. Full response: {response.json()}")
        return articles
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching data for {ticker}: {e}")
        return []

# Main function to fetch news and save to CSV
def fetch_and_save_news():
    all_news = []
    for ticker in tickers:
        logger.info(f"Fetching news for {ticker}...")
        articles = get_market_news(ticker)

        for article in articles:
            title = article.get('title', '')
            summary = article.get('description', '')

            # Sentiment analysis using TextBlob
            summary_textblob_sentiment = textblob_sentiment(summary)

            # Article schema
            news_entry = {
                'ticker': ticker,
                'title': title,
                'summary': summary,
                'summary_textblob_sentiment': summary_textblob_sentiment,
                'publisher': article.get('source', {}).get('name', ''),
                'link': article.get('url', ''),
                'publish_date': article.get('publishedAt', ''),
                'source': 'NewsAPI',
            }
            all_news.append(news_entry)

        # Avoid rate limiting
        time.sleep(1)

    # Convert data to a DataFrame
    df = pd.DataFrame(all_news)

    if not df.empty:
        # Save DataFrame to CSV
        output_file = 'market_news.csv'
        df.to_csv(output_file, index=False)
        logger.info(f"Data successfully saved to {output_file}")
    else:
        logger.warning("No news articles found within the specified date range.")

if __name__ == "__main__":
    fetch_and_save_news()


2024-12-13 09:06:09,539 - INFO - Fetching news for AAPL...
2024-12-13 09:06:09,543 - INFO - Requesting URL: https://newsapi.org/v2/everything?q=AAPL&from=2024-12-06&to=2024-12-13&sortBy=publishedAt&apiKey=afc3fe9ac08745439bf521cb5b974fbc
2024-12-13 09:06:11,413 - INFO - Data successfully saved to market_news.csv
