In [None]:
!pip install -q gnews==0.3.5 newspaper3k
!pip install --upgrade requests urllib3 chardet charset_normalizer

import warnings
import requests
import time
from datetime import datetime
import pandas as pd
from gnews import GNews

# Suppress warnings
warnings.filterwarnings("ignore")

# API Configuration
API_URL = "https://api-inference.huggingface.co/models/mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
HEADERS = {"Authorization": "Bearer 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"}

# Query Hugging Face Sentiment API
def query(payload):
    response = requests.post(API_URL, headers=HEADERS, json=payload)
    return response.json()

# Save Sentiment Data
def save_sentiment(start, end, topic, filename):
    # Initialize Google News API
    google_news = GNews()
    google_news.exclude_websites = [
        'costar.com', 'investorplace', 'teslaoracle', 'barrons', 'bizjournals',
        'forbes', 'pymnts', 'thestreet', 'investing', 'mining', 'statecollege',
        'realmoney.thestreet', 'fastcompany', 'crainsnewyork', 'investors'
    ]
    google_news.country = 'United States'
    google_news.language = 'english'
    google_news.start_date = start
    google_news.end_date = end

    # Fetch news articles
    articles = google_news.get_news(topic)
    sentiments = []

    for article in articles:
        full_article = google_news.get_full_article(article['url'])
        if not full_article or not full_article.publish_date:
            continue

        # Query title and text sentiment
        title_sentiment = query_with_retries(full_article.title)
        content = " ".join(full_article.text.split()[:300])
        text_sentiment = query_with_retries(content)

        if title_sentiment and text_sentiment and full_article.publish_date is not None:
            sentiments.append({
                "Date": full_article.publish_date.date().isoformat(),
                "NegTitle": extract_score(title_sentiment, 'negative'),
                "NeuTitle": extract_score(title_sentiment, 'neutral'),
                "PosTitle": extract_score(title_sentiment, 'positive'),
                "NegText": extract_score(text_sentiment, 'negative'),
                "NeuText": extract_score(text_sentiment, 'neutral'),
                "PosText": extract_score(text_sentiment, 'positive')
            })

    # Check if sentiments were collected
    if not sentiments:
        print(f"No sentiments collected for {topic} between {start} and {end}.")
        return

    # Create DataFrame and save
    df = pd.DataFrame(sentiments)
    if 'Date' not in df.columns:
        print(f"'Date' column missing in DataFrame. Collected data: {df.head()}")
        return

    df = df.groupby('Date').mean().reset_index()
    df.to_csv(f"{filename}.csv", index=False)

# Helper function for retries
def query_with_retries(payload, max_retries=3):
    for _ in range(max_retries):
        response = query(payload)
        if isinstance(response, list):
            return response
    return None

# Helper function to extract sentiment score
def extract_score(sentiment_data, label):
    for item in sentiment_data[0]:
        if item['label'] == label:
            return item['score']
    return 0

In [None]:
sent = [
    'Jan2022', 'Feb2022', 'Mar2022'
]

# Define start and end dates for each month
dates = [
    ((2022, 1, 1), (2022, 2, 1)),
    ((2022, 2, 1), (2022, 3, 1)),
    ((2022, 3, 1), (2022, 4, 1))
]

# Loop through each date range and save sentiment
for i, date_range in enumerate(dates):
    start_date, end_date = date_range
    save_sentiment(start_date, end_date, 'TSLA', 'TSLA' + sent[i])

