# Chapter 5 Guide

#### note: headings in this notebook correspond with listings in the text. 

## 5.1

In [1]:
import requests  #A
import pandas as pd  
import logging  
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta  

load_dotenv()

NEWS_API_KEY = os.getenv("NEWS_API_KEY")  #B

# Dynamic date calculation: today minus one day  
today = datetime.now().date()  #C  
yesterday = today - timedelta(days=1)  

# Function to extract articles from NewsAPI  
def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):  
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'  #D
    response = requests.get(url)  
    
    if response.status_code == 200:  
        articles = response.json().get('articles', [])  #E
        logging.info(f"Successfully extracted {len(articles)} articles.")  
        return articles  
    else:  
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")  
        return None  

# Example use case  
articles = extract_articles('Tesla')  #F
print(articles[0])
#A Import the required libraries for requests, data handling, and logging.
#B Store your News API key as a constant for easy access.
#C Calculate today’s date and set yesterday for the date range.
#D Construct the URL for the NewsAPI request using the query, date range, and API key.
#E If the API call succeeds, extract and log the number of articles received.
#F Call the extract_articles function with a sample query for "Tesla"


{'source': {'id': 'the-verge', 'name': 'The Verge'}, 'author': 'Umar Shakir', 'title': 'Tesla’s latest Cybertruck has longer range and the cheapest price yet', 'description': 'Tesla has a new entry-level Cybertruck with longer range, and it’s the cheapest model yet at $69,990 before the federal $7,500 credit. Tesla pulled some significant features to achieve the price cut.\xa0 The new “long range” Cybertruck has just a single RWD moto…', 'url': 'https://www.theverge.com/news/647060/tesla-cybertruck-rwd-long-lange-model-launch-entry', 'urlToImage': 'https://platform.theverge.com/wp-content/uploads/sites/2/2025/04/2056219-00-A_01_2000.png.jpeg?quality=90&strip=all&crop=0%2C10.723165084465%2C100%2C78.55366983107&w=1200', 'publishedAt': '2025-04-11T14:22:45Z', 'content': 'This Cybertruck has no powered tonneau cover.\r\nThis Cybertruck has no powered tonneau cover.\r\nTesla has a new entry-level Cybertruck with longer range, and its the cheapest model yet at $69,990 befor… [+1861 chars]'}


## 5.2

In [2]:
# Function to preprocess articles  
def preprocess_articles(articles):  
    data = []  #A
    
    for article in articles[:5]:  # Limiting to 5 articles for testing  #B
        title = article.get('title', '')  
        description = article.get('description', '')  
        content = article.get('content', '')  
        
        # Clean and format the text  
        clean_text = f"{title} {description} {content}".replace('\n', ' ').strip()  #C
        data.append({  
            'title': title,  
            'description': description,  
            'content': clean_text  
        })  
        
    df = pd.DataFrame(data)  #D
    logging.info(f"Preprocessed {len(df)} articles.")  
    return df  

# Preprocess the extracted articles  
df_articles = preprocess_articles(articles)  #E
display(df_articles)
#A Initialize an empty list to store preprocessed article data.
#B Loop through the first 5 articles, limiting the scope for testing.
#C Clean and format the combined text fields by removing line breaks.
#D Convert the list of dictionaries into a DataFrame for structured analysis.
#E Call preprocess_articles on the extracted articles, storing the result in df_articles.


Unnamed: 0,title,description,content
0,Tesla’s latest Cybertruck has longer range and...,Tesla has a new entry-level Cybertruck with lo...,Tesla’s latest Cybertruck has longer range and...
1,Tesla Halts Sales of Model S and X in China as...,Tesla's factory in Shanghai only produces the ...,Tesla Halts Sales of Model S and X in China as...
2,"Elon Musk Is Annoying, Unfunny, and Should Pro...",A senior official describes the billionaire as...,"Elon Musk Is Annoying, Unfunny, and Should Pro..."
3,"Tesla, Apple, Jamie Dimon comments: Trending T...",,"Tesla, Apple, Jamie Dimon comments: Trending T..."
4,"Tesla Introduces New Cybertruck at $70,000 Ami...",,"Tesla Introduces New Cybertruck at $70,000 Ami..."


## 5.3

In [3]:
import openai             #A
import os                 #B
from dotenv import load_dotenv

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")  #C

prompt = "How are you today?"  #D

response = openai.chat.completions.create(    #E
    model="gpt-4o",                           #F
    messages=[                                #G
        {"role": "system", "content": "You are a helpful assistant."},  
        {"role": "user", "content": prompt}  
    ],  
    max_tokens=100,                           #H
    temperature=0.5                           #I
)

print(response)  #J

#A Import the OpenAI SDK so you can interact with the Chat Completions endpoint.
#B Import the os module to access environment variables securely.
#C Set your API key by retrieving it from the OPENAI_API_KEY environment variable.
#D Define a simple test prompt to send to the model.
#E Call the chat.completions.create() method to start the chat request.
#F Specify the model—here we're using gpt-4o.
#G Provide the message sequence: a system message and a user input prompt.
#H Limit the length of the model’s response using max_tokens.
#I Control the randomness of the output with temperature (lower is more focused).
#J Print the full response object to verify output and confirm the setup is working.

ChatCompletion(id='chatcmpl-BLYOFM56pNu2OfQHrvvgEp8MDYPfY', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Thank you for asking! As an AI, I don't have feelings or moods, but I'm here and ready to help you with whatever you need. How can I assist you today?", refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1744476783, model='gpt-4o-2024-08-06', object='chat.completion', service_tier='default', system_fingerprint='fp_432e014d75', usage=CompletionUsage(completion_tokens=37, prompt_tokens=22, total_tokens=59, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))


## 5.4

In [4]:
import openai
import os
from dotenv import load_dotenv

load_dotenv()


# Set your OpenAI API key  
openai.api_key = os.getenv("OPENAI_API_KEY") #A

# Function to perform sentiment analysis using ChatGPT  
def perform_sentiment_analysis(article_content):  
    prompt = f"Analyze the sentiment of the following article content: {article_content}. Is the sentiment positive, negative, or neutral?"
    
    try:  
        # Correcting the method and capitalization
        response = openai.chat.completions.create(  
            model="gpt-4o",  # Ensure the model name is correct
            messages=[  
                {"role": "system", "content": "You are a helpful assistant."},  
                {"role": "user", "content": prompt}  
            ],  
            max_tokens=100,  
            temperature=0.5  
        )
        
        # Corrected way to access the content
        sentiment = response.choices[0].message.content.strip()
        return sentiment  
    except Exception as e:  
        logging.error(f"Error performing sentiment analysis: {e}")  
        return None  

# Example: Perform sentiment analysis on the first article  
example_article_content = df_articles['content'].iloc[0]  
sentiment = perform_sentiment_analysis(example_article_content)  
print(f"Sentiment: {sentiment}")


#A Set the OpenAI API key required to authenticate requests to the API.
#B Create a prompt that instructs ChatGPT to analyze the sentiment of the article content.
#C Extract and clean the response to obtain the sentiment result.
#D Select the content of the first article for a sample analysis.
#E Call perform_sentiment_analysis on the example article content.
#F Print the sentiment result to verify the output.


Sentiment: The sentiment of the article content appears to be neutral. The article provides factual information about the new entry-level Cybertruck, highlighting its longer range and lower price. While it mentions that some features, such as the powered tonneau cover, have been removed to achieve the price cut, it does not express any strong positive or negative opinions about these changes. The focus is on delivering information rather than conveying a particular sentiment.


In [5]:
import openai
import os
from dotenv import load_dotenv

load_dotenv()


# Set your OpenAI API key  
openai.api_key = os.getenv("OPENAI_API_KEY") #A

# Function to perform sentiment analysis using ChatGPT  
def perform_sentiment_analysis(article_content):  
    prompt = f"Analyze the sentiment of the following article content and return 'Positive', 'Neutral', or 'Negative' only: {article_content}"
    
    try:  
        # Correcting the method and capitalization
        response = openai.chat.completions.create(  
            model="gpt-4o",  # Ensure the model name is correct
            messages=[  
                {"role": "system", "content": "You are a helpful assistant."},  
                {"role": "user", "content": prompt}  
            ],  
            max_tokens=100,  
            temperature=0.5  
        )
        
        # Corrected way to access the content
        sentiment = response.choices[0].message.content.strip()
        return sentiment  
    except Exception as e:  
        logging.error(f"Error performing sentiment analysis: {e}")  
        return None  

# Example: Perform sentiment analysis on the first article  
example_article_content = df_articles['content'].iloc[0]  
sentiment = perform_sentiment_analysis(example_article_content)  
print(f"Sentiment: {sentiment}")


#A Set the OpenAI API key required to authenticate requests to the API.
#B Create a prompt that instructs ChatGPT to analyze the sentiment of the article content.
#C Extract and clean the response to obtain the sentiment result.
#D Select the content of the first article for a sample analysis.
#E Call perform_sentiment_analysis on the example article content.
#F Print the sentiment result to verify the output.


Sentiment: Neutral


## 5.5

In [6]:
import logging  #A

#A Set up logging (if not already configured earlier)
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')  

#B Function to apply sentiment analysis to all articles and log results
def update_with_sentiment(df):
    sentiments = []  #C

    for index, content in enumerate(df['content']):  #D
        sentiment = perform_sentiment_analysis(content)  #E
        sentiments.append(sentiment)
        logging.info(f"Processed article {index + 1}/{len(df)} — Sentiment: {sentiment}")  #F

    df['sentiment'] = sentiments  #G
    return df


#A Sets up logging to track pipeline progress.
#B Defines the final pipeline step—enriching the DataFrame with sentiment labels.
#C Initializes an empty list to store each article’s sentiment result.
#D Loops through the article text in the content column.
#E Applies the AI sentiment function to each entry.
#F Logs the result of each sentiment analysis with a progress counter.
#G Appends the sentiment column to the existing DataFrame.


## full pipeline

In [9]:
import os                    #A
import openai                #B
import requests              #C
import pandas as pd          #D
import logging               #E
from datetime import datetime, timedelta  #F
from dotenv import load_dotenv            #G

# Load environment variables from .env file
load_dotenv()                #H

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')  #I

# Retrieve API keys from environment
NEWS_API_KEY = os.getenv("NEWS_API_KEY")          #J
openai.api_key = os.getenv("OPENAI_API_KEY")      #K

# Step 1: Extract articles using NewsAPI
today = datetime.now().date()                     #L
yesterday = today - timedelta(days=1)             #M

def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):       #N
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'
    response = requests.get(url)

    if response.status_code == 200:
        articles = response.json().get('articles', [])
        logging.info(f"Successfully extracted {len(articles)} articles.")
        return articles
    else:
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")
        return []

# Step 2: Preprocess the articles
def preprocess_articles(articles):                #O
    data = []
    for article in articles[:5]:  # Limit for testing
        title = article.get('title', '')
        description = article.get('description', '')
        content = article.get('content', '')
        full_text = f"{title} {description} {content}".replace('\n', ' ').strip()

        data.append({
            'title': title,
            'description': description,
            'content': full_text
        })

    df = pd.DataFrame(data)
    logging.info(f"Preprocessed {len(df)} articles.")
    return df

# Step 3: Perform sentiment analysis with normalized output
def perform_sentiment_analysis(article_content):  #P
    prompt = f"Analyze the sentiment of the following article content and return 'Positive', 'Neutral', or 'Negative' only: {article_content}"
    
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.5
        )
        sentiment = response.choices[0].message.content.strip()
        return sentiment
    except Exception as e:
        logging.error(f"Error performing sentiment analysis: {e}")
        return None

# Step 4: Update the DataFrame with sentiment results
def update_with_sentiment(df):                    #Q
    sentiments = []
    for index, content in enumerate(df['content']):
        sentiment = perform_sentiment_analysis(content)
        sentiments.append(sentiment)
        logging.info(f"Processed article {index + 1}/{len(df)}: Sentiment = {sentiment}")
    
    df['sentiment'] = sentiments
    return df

# Run the pipeline
articles = extract_articles('Tesla')              #R
df_articles = preprocess_articles(articles)       #S
df_with_sentiment = update_with_sentiment(df_articles)  #T

# Display the final DataFrame
df_with_sentiment[['title', 'sentiment']]         #U


Unnamed: 0,title,sentiment
0,Tesla’s latest Cybertruck has longer range and...,Neutral
1,Tesla Halts Sales of Model S and X in China as...,Negative
2,"Elon Musk Is Annoying, Unfunny, and Should Pro...",Negative
3,"Tesla, Apple, Jamie Dimon comments: Trending T...",Neutral
4,"Tesla Introduces New Cybertruck at $70,000 Ami...",Negative
