# Chapter 5 Lab

1. Change part of the working code in the companion notebook to replace Tesla with another popular company (e.g., Microsoft, Disney, or NVIDIA). Run the results and note any changes in sentiment across the articles returned.

In [3]:
import os                    #A
import openai                #B
import requests              #C
import pandas as pd          #D
import logging               #E
from datetime import datetime, timedelta  #F
from dotenv import load_dotenv            #G

# Load environment variables from .env file
load_dotenv()                #H

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')  #I

# Retrieve API keys from environment
NEWS_API_KEY = os.getenv("NEWS_API_KEY")          #J
openai.api_key = os.getenv("OPENAI_API_KEY")      #K

# Step 1: Extract articles using NewsAPI
today = datetime.now().date()                     #L
yesterday = today - timedelta(days=1)             #M

def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):       #N
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'
    response = requests.get(url)

    if response.status_code == 200:
        articles = response.json().get('articles', [])
        logging.info(f"Successfully extracted {len(articles)} articles.")
        return articles
    else:
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")
        return []

# Step 2: Preprocess the articles
def preprocess_articles(articles):                #O
    data = []
    for article in articles[:5]:  # Limit for testing
        title = article.get('title', '')
        description = article.get('description', '')
        content = article.get('content', '')
        full_text = f"{title} {description} {content}".replace('\n', ' ').strip()

        data.append({
            'title': title,
            'description': description,
            'content': full_text
        })

    df = pd.DataFrame(data)
    logging.info(f"Preprocessed {len(df)} articles.")
    return df

# Step 3: Perform sentiment analysis with normalized output
def perform_sentiment_analysis(article_content):  #P
    prompt = f"Analyze the sentiment of the following article content and return 'Positive', 'Neutral', or 'Negative' only: {article_content}"
    
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.5
        )
        sentiment = response.choices[0].message.content.strip()
        return sentiment
    except Exception as e:
        logging.error(f"Error performing sentiment analysis: {e}")
        return None

# Step 4: Update the DataFrame with sentiment results
def update_with_sentiment(df):                    #Q
    sentiments = []
    for index, content in enumerate(df['content']):
        sentiment = perform_sentiment_analysis(content)
        sentiments.append(sentiment)
        logging.info(f"Processed article {index + 1}/{len(df)}: Sentiment = {sentiment}")
    
    df['sentiment'] = sentiments
    return df

# Run the pipeline
articles = extract_articles('NVIDIA')              #R
df_articles = preprocess_articles(articles)       #S
df_with_sentiment = update_with_sentiment(df_articles)  #T

# Display the final DataFrame
df_with_sentiment[['title', 'sentiment']]         #U


INFO: Successfully extracted 100 articles.
INFO: Preprocessed 5 articles.
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 1/5: Sentiment = Positive
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 2/5: Sentiment = Neutral
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 3/5: Sentiment = Neutral
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 4/5: Sentiment = Neutral
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 5/5: Sentiment = Negative


Unnamed: 0,title,sentiment
0,El milagro de NVIDIA: va a poder seguir vendie...,Positive
1,Nvidia on NixOS WSL – Ollama up 24/7 on your g...,Neutral
2,How's Apple going to get out of its China jam?,Neutral
3,Actualité : Top produit – La carte graphique N...,Neutral
4,GPUs and tariffs — Why I recommend buying a ne...,Negative


2. Modify the prompt sent to the Chat Completions endpoint so it returns a numerical sentiment score from -1 (very negative) to 1 (very positive), instead of a categorical label. Run the pipeline and verify that the new responses are numeric and well-formed.

In [4]:
import os                    #A
import openai                #B
import requests              #C
import pandas as pd          #D
import logging               #E
from datetime import datetime, timedelta  #F
from dotenv import load_dotenv            #G

# Load environment variables from .env file
load_dotenv()                #H

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')  #I

# Retrieve API keys from environment
NEWS_API_KEY = os.getenv("NEWS_API_KEY")          #J
openai.api_key = os.getenv("OPENAI_API_KEY")      #K

# Step 1: Extract articles using NewsAPI
today = datetime.now().date()                     #L
yesterday = today - timedelta(days=1)             #M

def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):       #N
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'
    response = requests.get(url)

    if response.status_code == 200:
        articles = response.json().get('articles', [])
        logging.info(f"Successfully extracted {len(articles)} articles.")
        return articles
    else:
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")
        return []

# Step 2: Preprocess the articles
def preprocess_articles(articles):                #O
    data = []
    for article in articles[:5]:  # Limit for testing
        title = article.get('title', '')
        description = article.get('description', '')
        content = article.get('content', '')
        full_text = f"{title} {description} {content}".replace('\n', ' ').strip()

        data.append({
            'title': title,
            'description': description,
            'content': full_text
        })

    df = pd.DataFrame(data)
    logging.info(f"Preprocessed {len(df)} articles.")
    return df

# Step 3: Perform sentiment analysis with normalized output
def perform_sentiment_analysis(article_content):  #P
    prompt = f"Analyze the sentiment of the following article content and return a numerical sentiment score from -1 (very negative) to 1 (very positive). Return only the number: {article_content}"
    
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.5
        )
        sentiment = response.choices[0].message.content.strip()
        return sentiment
    except Exception as e:
        logging.error(f"Error performing sentiment analysis: {e}")
        return None

# Step 4: Update the DataFrame with sentiment results
def update_with_sentiment(df):                    #Q
    sentiments = []
    for index, content in enumerate(df['content']):
        sentiment = perform_sentiment_analysis(content)
        sentiments.append(sentiment)
        logging.info(f"Processed article {index + 1}/{len(df)}: Sentiment = {sentiment}")
    
    df['sentiment'] = sentiments
    return df

# Run the pipeline
articles = extract_articles('NVIDIA')              #R
df_articles = preprocess_articles(articles)       #S
df_with_sentiment = update_with_sentiment(df_articles)  #T

# Display the final DataFrame
df_with_sentiment[['title', 'sentiment']]         #U


INFO: Successfully extracted 100 articles.
INFO: Preprocessed 5 articles.
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 1/5: Sentiment = 0.2
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 2/5: Sentiment = 0.5
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 3/5: Sentiment = -0.3
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 4/5: Sentiment = 0.5
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 5/5: Sentiment = 0.1


Unnamed: 0,title,sentiment
0,El milagro de NVIDIA: va a poder seguir vendie...,0.2
1,Nvidia on NixOS WSL – Ollama up 24/7 on your g...,0.5
2,How's Apple going to get out of its China jam?,-0.3
3,Actualité : Top produit – La carte graphique N...,0.5
4,GPUs and tariffs — Why I recommend buying a ne...,0.1


3. Update the article preprocessing function to process up to 50 articles instead of 5. Aggregate the results using value_counts() or any simple groupby method to summarize sentiment across all articles.

In [7]:
import os                    #A
import openai                #B
import requests              #C
import pandas as pd          #D
import logging               #E
from datetime import datetime, timedelta  #F
from dotenv import load_dotenv            #G

# Load environment variables from .env file
load_dotenv()                #H

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')  #I

# Retrieve API keys from environment
NEWS_API_KEY = os.getenv("NEWS_API_KEY")          #J
openai.api_key = os.getenv("OPENAI_API_KEY")      #K

# Step 1: Extract articles using NewsAPI
today = datetime.now().date()                     #L
yesterday = today - timedelta(days=1)             #M

def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):       #N
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'
    response = requests.get(url)

    if response.status_code == 200:
        articles = response.json().get('articles', [])
        logging.info(f"Successfully extracted {len(articles)} articles.")
        return articles
    else:
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")
        return []

# Step 2: Preprocess the articles
def preprocess_articles(articles):                #O
    data = []
    for article in articles[:50]:  # Limit for testing
        title = article.get('title', '')
        description = article.get('description', '')
        content = article.get('content', '')
        full_text = f"{title} {description} {content}".replace('\n', ' ').strip()

        data.append({
            'title': title,
            'description': description,
            'content': full_text
        })

    df = pd.DataFrame(data)
    logging.info(f"Preprocessed {len(df)} articles.")
    return df

# Step 3: Perform sentiment analysis with normalized output
def perform_sentiment_analysis(article_content):  #P
    prompt = f"Analyze the sentiment of the following article content and return 'Positive', 'Neutral', or 'Negative' only: {article_content}"
    
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.5
        )
        sentiment = response.choices[0].message.content.strip()
        return sentiment
    except Exception as e:
        logging.error(f"Error performing sentiment analysis: {e}")
        return None

# Step 4: Update the DataFrame with sentiment results
def update_with_sentiment(df):                    #Q
    sentiments = []
    for index, content in enumerate(df['content']):
        sentiment = perform_sentiment_analysis(content)
        sentiments.append(sentiment)
        logging.info(f"Processed article {index + 1}/{len(df)}: Sentiment = {sentiment}")
    
    df['sentiment'] = sentiments
    return df

# Run the pipeline
articles = extract_articles('NVIDIA')              #R
df_articles = preprocess_articles(articles)       #S
df_with_sentiment = update_with_sentiment(df_articles)  #T
df_with_sentiment['sentiment'].value_counts()

INFO: Successfully extracted 100 articles.
INFO: Preprocessed 50 articles.
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 1/50: Sentiment = Positive
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 2/50: Sentiment = Neutral
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 3/50: Sentiment = Negative
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 4/50: Sentiment = Neutral
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 5/50: Sentiment = Negative
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 6/50: Sentiment = Positive
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed art

sentiment
Neutral     22
Positive    14
Negative    14
Name: count, dtype: int64

In [24]:
import os                    #A
import openai                #B
import requests              #C
import pandas as pd          #D
import logging               #E
from datetime import datetime, timedelta  #F
from dotenv import load_dotenv            #G

# Load environment variables from .env file
load_dotenv()                #H

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')  #I

# Retrieve API keys from environment
NEWS_API_KEY = os.getenv("NEWS_API_KEY")          #J
openai.api_key = os.getenv("OPENAI_API_KEY")      #K

# Step 1: Extract articles using NewsAPI
today = datetime.now().date()                     #L
yesterday = today - timedelta(days=1)             #M

def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):       #N
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'
    response = requests.get(url)

    if response.status_code == 200:
        articles = response.json().get('articles', [])
        logging.info(f"Successfully extracted {len(articles)} articles.")
        return articles
    else:
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")
        return []

# Step 2: Preprocess the articles
def preprocess_articles(articles):                #O
    data = []
    for article in articles[:50]:  # Limit for testing
        title = article.get('title', '')
        description = article.get('description', '')
        content = article.get('content', '')
        full_text = f"{title} {description} {content}".replace('\n', ' ').strip()

        data.append({
            'title': title,
            'description': description,
            'content': full_text
        })

    df = pd.DataFrame(data)
    logging.info(f"Preprocessed {len(df)} articles.")
    return df

# Step 3: Perform sentiment analysis with normalized output
def perform_sentiment_analysis(article_content):  #P
    prompt = f"Analyze the sentiment of the following article content and return a numerical sentiment score from -1 (very negative) to 1 (very positive). Return only the number: {article_content}"
    
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.5
        )
        sentiment = response.choices[0].message.content.strip()
        return sentiment
    except Exception as e:
        logging.error(f"Error performing sentiment analysis: {e}")
        return None

# Step 4: Update the DataFrame with sentiment results
def update_with_sentiment(df):                    #Q
    sentiments = []
    for index, content in enumerate(df['content']):
        sentiment = perform_sentiment_analysis(content)
        sentiments.append(sentiment)
        logging.info(f"Processed article {index + 1}/{len(df)}: Sentiment = {sentiment}")
    
    df['sentiment'] = sentiments
    return df

# Run the pipeline
articles = extract_articles('NVIDIA')              #R
df_articles = preprocess_articles(articles)       #S
df_with_sentiment = update_with_sentiment(df_articles)  #T
df_with_sentiment["sentiment"] = pd.to_numeric(df_with_sentiment["sentiment"], errors="coerce")
df_with_sentiment['sentiment'].mean()

INFO: Successfully extracted 100 articles.
INFO: Preprocessed 50 articles.
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 1/50: Sentiment = 0.2
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 2/50: Sentiment = 0.5
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 3/50: Sentiment = 0.0
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 4/50: Sentiment = 0.5
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 5/50: Sentiment = -0.2
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 6/50: Sentiment = 0.8
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: Processed article 7/50: Sentiment = -0.3

0.21199999999999997