# Sentiment Analysis with LangChain

This notebook demonstrates how to use LangChain with OpenAI to perform sentiment analysis on news articles from the news-sentiment-data.csv dataset.

## Setup

Make sure you have:
1. Created a `.env` file in the project root with your `OPENAI_API_KEY`
2. Installed the required dependencies: `pip install -r requirements.txt`


In [1]:
# Cell 1: Import dependencies and load environment variables
import os
import pandas as pd
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

# Load environment variables from .env file
load_dotenv(override=True)

# Verify API key is loaded
api_key = os.getenv("OPENAI_API_KEY")
if api_key:
    print("‚úì OpenAI API key loaded successfully!")
else:
    print("‚úó Error: OPENAI_API_KEY not found in .env file")


  from pydantic.v1.fields import FieldInfo as FieldInfoV1


‚úì OpenAI API key loaded successfully!


In [2]:
# Cell 2: Load the news sentiment dataset
df = pd.read_csv("../datasets/news-sentiment-data.csv")

# Display dataset info
print(f"Dataset Shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")
print(f"\nSample data (excluding sentiment column for analysis):")

# Show sample without the sentiment column (we'll use it later for comparison)
display(df[['news_title', 'reddit_title', 'text', 'url']].head(3))


Dataset Shape: (848, 5)

Columns: ['news_title', 'reddit_title', 'sentiment', 'text', 'url']

Sample data (excluding sentiment column for analysis):


Unnamed: 0,news_title,reddit_title,text,url
0,Mark Cuban launches generic drug company,Billionaire Mark Cuban just launched a drug co...,Billionaire investor and Shark Tank star Mark ...,https://www.beckershospitalreview.com/pharmacy...
1,From Defendant to Defender: One Wrongfully Con...,"Man falsely imprisoned for 10 years, uses pris...",Attorney Jarrett Adams recently helped overtur...,https://www.nbcnews.com/news/us-news/defendant...
2,"Amazon Tribe Wins Lawsuit Against Big Oil, Sav...",Amazon tribe wins legal battle against oil com...,The Amazon Rainforest is well known across the...,https://www.disclose.tv/amazon-tribe-wins-laws...


In [3]:
# Cell 3: LangChain Setup - Define output schema and prompt template

# Define the output structure using Pydantic
class SentimentAnalysis(BaseModel):
    sentiment: str = Field(description="The sentiment classification: 'positive', 'negative', or 'neutral'")
    confidence: float = Field(description="Confidence score between 0 and 1")
    reasoning: str = Field(description="Brief explanation for the sentiment classification")

# Create output parser
parser = PydanticOutputParser(pydantic_object=SentimentAnalysis)

# Initialize the OpenAI LLM
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    api_key=os.getenv("OPENAI_API_KEY")
)

# Create the prompt template
prompt_template = ChatPromptTemplate.from_messages([
    ("system", """You are an expert sentiment analyst. Analyze the sentiment of news articles.
Classify the sentiment as 'positive', 'negative', or 'neutral'.
Provide a confidence score between 0 and 1, and a brief reasoning.

{format_instructions}"""),
    ("human", """Analyze the sentiment of this news article:

Title: {title}

Content: {text}

Provide your sentiment analysis.""")
])

# Create the chain
chain = prompt_template | llm | parser

print("‚úì LangChain sentiment analysis chain configured successfully!")


‚úì LangChain sentiment analysis chain configured successfully!


In [4]:
# Cell 4: Sentiment Analysis Function

def analyze_sentiment(title: str, text: str) -> SentimentAnalysis:
    """
    Analyze the sentiment of a news article using LangChain and OpenAI.
    
    Args:
        title: The news article title
        text: The full text content of the article
        
    Returns:
        SentimentAnalysis object with sentiment, confidence, and reasoning
    """
    # Truncate text if too long (to manage token limits)
    max_text_length = 3000
    if len(text) > max_text_length:
        text = text[:max_text_length] + "..."
    
    result = chain.invoke({
        "title": title,
        "text": text,
        "format_instructions": parser.get_format_instructions()
    })
    
    return result

def convert_sentiment_to_numeric(sentiment: str) -> float:
    """Convert sentiment string to numeric value for comparison."""
    mapping = {
        "positive": 1.0,
        "neutral": 0.0,
        "negative": -1.0
    }
    return mapping.get(sentiment.lower(), 0.0)

print("‚úì Sentiment analysis function defined!")


‚úì Sentiment analysis function defined!


In [5]:
# Cell 5: Interactive Sentiment Analysis

# Create widgets for interactive analysis
article_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(df) - 1,
    step=1,
    description='Article #:',
    continuous_update=False,
    style={'description_width': '80px'},
    layout=widgets.Layout(width='400px')
)

analyze_button = widgets.Button(
    description='Analyze Sentiment',
    button_style='primary',
    icon='search'
)

output_area = widgets.Output()

def display_article(index):
    """Display article details without sentiment."""
    row = df.iloc[index]
    html = f"""
    <div style="padding: 15px; background: #f5f5f5; border-radius: 8px; margin: 10px 0;">
        <h3 style="color: #333;">üì∞ {row['news_title']}</h3>
        <p><strong>Reddit Title:</strong> {row['reddit_title']}</p>
        <p><strong>Text Preview:</strong> {row['text'][:500]}...</p>
        <p><strong>URL:</strong> <a href="{row['url']}" target="_blank">{row['url'][:60]}...</a></p>
    </div>
    """
    return html

def on_analyze_click(button):
    """Handle analyze button click."""
    with output_area:
        clear_output(wait=True)
        index = article_slider.value
        row = df.iloc[index]
        
        # Display article info
        display(HTML(display_article(index)))
        
        print("üîÑ Analyzing sentiment with OpenAI...")
        
        try:
            # Perform sentiment analysis
            result = analyze_sentiment(row['news_title'], row['text'])
            
            # Display results
            sentiment_color = {
                'positive': '#28a745',
                'negative': '#dc3545',
                'neutral': '#6c757d'
            }.get(result.sentiment.lower(), '#6c757d')
            
            result_html = f"""
            <div style="padding: 15px; background: white; border: 2px solid {sentiment_color}; border-radius: 8px; margin: 10px 0;">
                <h3 style="color: {sentiment_color};">üéØ AI Sentiment Analysis Result</h3>
                <p><strong>Sentiment:</strong> <span style="color: {sentiment_color}; font-size: 1.2em; font-weight: bold;">{result.sentiment.upper()}</span></p>
                <p><strong>Confidence:</strong> {result.confidence:.2%}</p>
                <p><strong>Reasoning:</strong> {result.reasoning}</p>
                <hr>
                <p><strong>Original Dataset Sentiment:</strong> {row['sentiment']} 
                   ({('Positive' if row['sentiment'] > 0 else 'Negative' if row['sentiment'] < 0 else 'Neutral')})</p>
            </div>
            """
            display(HTML(result_html))
            
        except Exception as e:
            print(f"‚ùå Error during analysis: {str(e)}")

analyze_button.on_click(on_analyze_click)

# Display the interactive interface
print("üìä Interactive Sentiment Analysis")
print("Use the slider to select an article, then click 'Analyze Sentiment' to get AI analysis.")
display(widgets.VBox([
    widgets.HBox([article_slider, analyze_button]),
    output_area
]))


üìä Interactive Sentiment Analysis
Use the slider to select an article, then click 'Analyze Sentiment' to get AI analysis.


VBox(children=(HBox(children=(IntSlider(value=0, continuous_update=False, description='Article #:', layout=Lay‚Ä¶

In [6]:
# Cell 6: Batch Analysis (Optional)
# Analyze multiple articles and compare with original sentiment values

def batch_analyze(start_idx: int = 0, num_articles: int = 5):
    """
    Perform batch sentiment analysis on multiple articles.
    
    Args:
        start_idx: Starting index in the dataframe
        num_articles: Number of articles to analyze
    
    Returns:
        DataFrame with analysis results
    """
    results = []
    
    for i in range(start_idx, min(start_idx + num_articles, len(df))):
        row = df.iloc[i]
        print(f"Analyzing article {i + 1}/{start_idx + num_articles}: {row['news_title'][:50]}...")
        
        try:
            analysis = analyze_sentiment(row['news_title'], row['text'])
            results.append({
                'index': i,
                'title': row['news_title'][:50] + '...',
                'ai_sentiment': analysis.sentiment,
                'ai_confidence': analysis.confidence,
                'original_sentiment': row['sentiment'],
                'ai_numeric': convert_sentiment_to_numeric(analysis.sentiment),
                'reasoning': analysis.reasoning[:100] + '...'
            })
        except Exception as e:
            print(f"  ‚ùå Error: {str(e)}")
            results.append({
                'index': i,
                'title': row['news_title'][:50] + '...',
                'ai_sentiment': 'ERROR',
                'ai_confidence': 0,
                'original_sentiment': row['sentiment'],
                'ai_numeric': None,
                'reasoning': str(e)
            })
    
    return pd.DataFrame(results)

# Example: Analyze first 5 articles (uncomment to run)
# batch_results = batch_analyze(start_idx=0, num_articles=5)
# display(batch_results)

print("‚úì Batch analysis function ready!")
print("To run batch analysis, uncomment and execute:")
print("  batch_results = batch_analyze(start_idx=0, num_articles=5)")
print("  display(batch_results)")


‚úì Batch analysis function ready!
To run batch analysis, uncomment and execute:
  batch_results = batch_analyze(start_idx=0, num_articles=5)
  display(batch_results)
