In [1]:
import os                    #A
import openai                #B
import requests              #C
import pandas as pd          #D
import logging               #E
from datetime import datetime, timedelta  #F
from dotenv import load_dotenv            #G

# Load environment variables from .env file
load_dotenv()                #H

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')  #I

# Retrieve API keys from environment
NEWS_API_KEY = os.getenv("NEWS_API_KEY")          #J
openai.api_key = os.getenv("OPENAI_API_KEY")      #K

# Step 1: Extract articles using NewsAPI
today = datetime.now().date()                     #L
yesterday = today - timedelta(days=1)             #M

def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):       #N
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'
    response = requests.get(url)

    if response.status_code == 200:
        articles = response.json().get('articles', [])
        logging.info(f"Successfully extracted {len(articles)} articles.")
        return articles
    else:
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")
        return []

# Step 2: Preprocess the articles
def preprocess_articles(articles):                #O
    data = []
    for article in articles[:5]:  # Limit for testing
        title = article.get('title', '')
        description = article.get('description', '')
        content = article.get('content', '')
        full_text = f"{title} {description} {content}".replace('\n', ' ').strip()

        data.append({
            'title': title,
            'description': description,
            'content': full_text
        })

    df = pd.DataFrame(data)
    logging.info(f"Preprocessed {len(df)} articles.")
    return df

# Step 3: Perform sentiment analysis with normalized output
def perform_sentiment_analysis(article_content):  #P
    prompt = f"Analyze the sentiment of the following article content and return a numerical sentiment score from -1 (very negative) to 1 (very positive). Return only the number: {article_content}"
    
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.5
        )
        sentiment = response.choices[0].message.content.strip()
        return sentiment
    except Exception as e:
        logging.error(f"Error performing sentiment analysis: {e}")
        return None

# Step 4: Update the DataFrame with sentiment results
def update_with_sentiment(df):                    #Q
    sentiments = []
    for index, content in enumerate(df['content']):
        sentiment = perform_sentiment_analysis(content)
        sentiments.append(sentiment)
        logging.info(f"Processed article {index + 1}/{len(df)}: Sentiment = {sentiment}")
    
    df['sentiment'] = sentiments
    return df

# Run the pipeline
articles = extract_articles('Tesla')              #R
df_articles = preprocess_articles(articles)       #S
df_with_sentiment = update_with_sentiment(df_articles)  #T

# Display the final DataFrame
df_with_sentiment[['title', 'sentiment']]         #U


INFO: Successfully extracted 99 articles.
INFO: Preprocessed 5 articles.
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 1/5: Sentiment = 0.2
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 2/5: Sentiment = -0.5
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 3/5: Sentiment = -0.8
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 4/5: Sentiment = -0.3
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 5/5: Sentiment = 0


Unnamed: 0,title,sentiment
0,Tesla’s latest Cybertruck has longer range and...,0.2
1,Tesla Halts Sales of Model S and X in China as...,-0.5
2,"Elon Musk Is Annoying, Unfunny, and Should Pro...",-0.8
3,A record number of used Teslas have flooded th...,-0.3
4,"Tesla, Apple, Jamie Dimon comments: Trending T...",0.0


In [None]:
#A–#G: Standard imports, including dotenv for secure key management.

#H: Loads any .env file present in the project directory—required to access secrets using os.getenv().

#I: Logging config for visibility during notebook execution.

#J–#K: API keys pulled from environment variables.

#L–#M: Date handling to keep queries dynamic and recent.

#N–#O: Extract and preprocess articles into a clean DataFrame.

#P: Modified sentiment prompt—returns just "Positive", "Neutral", or "Negative" for consistent downstream use.

#Q: Iterates through article text and appends sentiment results.

#R–#T: Executes each step of the pipeline in sequence.

#U: Displays output showing article title alongside AI-determined sentiment.

