In [None]:
import requests
import pandas as pd
import time
from datetime import datetime
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
from transformers import pipeline

# Replace with your NewsAPI key
api_key = 'afc3fe9ac08745439bf521cb5b974fbc'

# Initialize sentiment analysis tools
vader_analyzer = SentimentIntensityAnalyzer()
bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

# List of tickers to search news for
tickers = [
    'AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR',
    'BWXT', 'ARBK', 'AMD', 'NVDA', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG',
    'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZN', 'CRM', 'NOW', 'CHTR', 'TDS', 'META'
]

# Get today's date in ISO format
today = datetime.utcnow().strftime('%Y-%m-%d')

# Functions for sentiment analysis
def vader_sentiment(text):
    if text:
        return vader_analyzer.polarity_scores(text)['compound']
    return 0

def textblob_sentiment(text):
    if text:
        return TextBlob(text).sentiment.polarity
    return 0

def bert_sentiment_analysis(text):
    if text:
        result = bert_sentiment(text)[0]
        return result['label'], result['score']  # Returns sentiment label and confidence
    return "NEUTRAL", 0.0

def bert_to_vader_scale(label, confidence):
    label_to_score = {
        "1 star": -1.0,
        "2 stars": -0.5,
        "3 stars": 0.0,
        "4 stars": 0.5,
        "5 stars": 1.0
    }
    return label_to_score.get(label, 0.0) * confidence

# Function to fetch market news for the current day
def get_market_news(ticker):
    url = (
        f'https://newsapi.org/v2/everything?q={ticker}&from={today}&to={today}&sortBy=publishedAt&apiKey={api_key}'
    )
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get('articles', [])
    elif response.status_code == 429:
        print(f"Rate limit exceeded for {ticker}, retrying after delay...")
        time.sleep(5)
        return []
    else:
        print(f"Error fetching data for {ticker}: {response.status_code}")
        return []

# Save data in the required schema
def save_to_csv(news_data, filename="news_data_today.csv"):
    df = pd.DataFrame(news_data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

# Fetch and process news for all tickers
all_news = []
for ticker in tickers:
    print(f"Fetching news for {ticker}...")
    articles = get_market_news(ticker)
    
    for article in articles:
        title = article.get('title', '')
        summary = article.get('description', '')
        
        # Sentiment analysis
        headline_vader_sentiment = vader_sentiment(title)
        summary_textblob_sentiment = textblob_sentiment(summary)
        summary_vader_sentiment = vader_sentiment(summary)
        summary_bert_sentiment, bert_confidence = bert_sentiment_analysis(summary)
        summary_bert_vader_scaled = bert_to_vader_scale(summary_bert_sentiment, bert_confidence)
        
        # Article schema
        news_entry = {
            'ticker': ticker,
            'title': title,
            'headline_vader_sentiment': headline_vader_sentiment,
            'summary': summary,
            'summary_textblob_sentiment': summary_textblob_sentiment,
            'summary_vader_sentiment': summary_vader_sentiment,
            'summary_bert_sentiment': summary_bert_sentiment,
            'bert_confidence': bert_confidence,
            'summary_bert_vader_scaled': summary_bert_vader_scaled,
            'publisher': article.get('source', {}).get('name', ''),
            'link': article.get('url', ''),
            'publish_date': article.get('publishedAt', ''),
            'type': 'general',  # Default value
            'related_tickers': '',  # Default empty
            'source': 'NewsAPI',  # Identify source
        }
        all_news.append(news_entry)
    
    # Avoid rate limiting
    time.sleep(1)

# Save the formatted data to a CSV file
if all_news:
    save_to_csv(all_news)
else:
    print("No news data available.")








In [19]:
import pandas as pd
import time
from datetime import datetime, timedelta
from textblob import TextBlob  # For sentiment analysis
from newsapi.newsapi_client import NewsApiClient  # Correct import

# Replace this with your NewsAPI key
NEWS_API_KEY = 'afc3fe9ac08745439bf521cb5b974fbc'

# Initialize NewsAPI client correctly
newsapi = NewsApiClient(api_key=NEWS_API_KEY)

# List of tickers to fetch news for
tickers = [
    'AAPL'
]

# Get today's and yesterday's dates
today = datetime.now()
yesterday = today - timedelta(days=1)

# Format dates for NewsAPI
today_str = today.strftime('%Y-%m-%d')
yesterday_str = yesterday.strftime('%Y-%m-%d')

def get_market_news(tickers, from_date, to_date):
    """
    Fetch market news for specified tickers and date range.
    """
    all_articles = []
    for ticker in tickers:
        try:
            articles = newsapi.get_everything(
                q=ticker,
                from_param=from_date,
                to=to_date,
                language='en',
                sort_by='relevancy',
                page_size=100
            )
            print(f"Total articles found for {ticker}: {len(articles['articles'])}")
            
            for article in articles['articles']:
                all_articles.append({
                    'Ticker': ticker,
                    'Source': article['source']['name'],
                    'Author': article.get('author', 'N/A'),
                    'Title': article['title'],
                    'Description': article.get('description', article.get('title', '')),
                    'URL': article['url'],
                    'Published At': article['publishedAt']
                })
            # To avoid hitting API rate limits
            time.sleep(1)
        except Exception as e:
            print(f"Error fetching news for {ticker}: {e}")
    return all_articles

def textblob_sentiment(text):
    """
    Analyze sentiment using TextBlob.
    """
    if text:
        return TextBlob(text).sentiment.polarity
    return 0  # Return 0 instead of None

# Fetch news articles
print("Fetching news articles...")
articles = get_market_news(tickers, yesterday_str, today_str)

# Check if articles were fetched
if not articles:
    print("No articles found. Check your API key, date range, or network connection.")
    exit()

# Convert articles to DataFrame
print("Processing articles...")
df = pd.DataFrame(articles)

# Print columns to verify
print("DataFrame columns:", list(df.columns))

# Add sentiment analysis
print("Performing sentiment analysis...")
df['Sentiment'] = df['Description'].apply(textblob_sentiment)

# Save to CSV
output_file = 'market_news.csv'
df.to_csv(output_file, index=False)
print(f"Saved news data to {output_file}")

Fetching news articles...
Error fetching news for AAPL: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}
No articles found. Check your API key, date range, or network connection.
Processing articles...
DataFrame columns: []
Performing sentiment analysis...


KeyError: 'Description'

In [17]:
!pip install newsapi-python




[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip
