<a href="https://colab.research.google.com/github/anishasingh23/sentimentAnalysis/blob/main/sentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
pip install streamlit pandas numpy matplotlib seaborn requests nltk plotly wordcloud



In [14]:
%%writefile sentiment_app.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import re
from datetime import datetime, timedelta
import time
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud
import json

# Initial setup
st.set_page_config(
    page_title="Sentiment Analysis Dashboard",
    page_icon="😊",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Download NLTK resources
@st.cache_resource
def download_nltk_resources():
    try:
        nltk.data.find('vader_lexicon')
    except LookupError:
        nltk.download('vader_lexicon')
    try:
        nltk.data.find('punkt')
    except LookupError:
        nltk.download('punkt')

download_nltk_resources()

# Initialize the VADER sentiment analyzer
@st.cache_resource
def get_analyzer():
    return SentimentIntensityAnalyzer()

analyzer = get_analyzer()

# Initialize session state variables if they don't exist
if 'analyzed_data' not in st.session_state:
    st.session_state.analyzed_data = pd.DataFrame(columns=['text', 'pos_score', 'neu_score', 'neg_score', 'compound', 'sentiment', 'confidence', 'timestamp'])
if 'api_results' not in st.session_state:
    st.session_state.api_results = []
if 'api_type' not in st.session_state:
    st.session_state.api_type = "news"
if 'show_history' not in st.session_state:
    st.session_state.show_history = False

# Custom sentiment analysis function using VADER and custom logic
def analyze_sentiment(text):
    if not text:
        return None

    # Get VADER scores
    scores = analyzer.polarity_scores(text)

    # Custom sentiment categorization with thresholds
    compound = scores['compound']

    # Define custom sentiment categories with confidence
    if compound >= 0.05:
        sentiment = "Positive"
        # Higher absolute scores indicate higher confidence
        confidence = min(abs(compound) * 2, 1.0) if compound > 0 else 0
    elif compound <= -0.05:
        sentiment = "Negative"
        confidence = min(abs(compound) * 2, 1.0) if compound < 0 else 0
    else:
        sentiment = "Neutral"
        # For neutral, confidence is highest when score is close to 0
        confidence = 1 - min(abs(compound) * 5, 1.0)

    return {
        'pos_score': scores['pos'],
        'neu_score': scores['neu'],
        'neg_score': scores['neg'],
        'compound': compound,
        'sentiment': sentiment,
        'confidence': confidence,
        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }

# Function to fetch news from News API
def fetch_news(query, api_key, days=7, language="en"):
    base_url = "https://newsapi.org/v2/everything"

    # Calculate the date range (last week)
    end_date = datetime.now()
    start_date = end_date - timedelta(days=days)

    # Format dates for the API
    from_date = start_date.strftime("%Y-%m-%d")
    to_date = end_date.strftime("%Y-%m-%d")

    params = {
        'q': query,
        'language': language,
        'from': from_date,
        'to': to_date,
        'sortBy': 'publishedAt',
        'apiKey': api_key
    }

    try:
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            error_message = f"Error: {response.status_code} - {response.reason}"
            try:
                error_data = response.json()
                if 'message' in error_data:
                    error_message += f". {error_data['message']}"
            except:
                pass
            st.error(error_message)
            return None
    except Exception as e:
        st.error(f"Error fetching news: {str(e)}")
        return None

# Function to add data to history
def add_to_history(text, analysis_result):
    new_data = pd.DataFrame({
        'text': [text],
        'pos_score': [analysis_result['pos_score']],
        'neu_score': [analysis_result['neu_score']],
        'neg_score': [analysis_result['neg_score']],
        'compound': [analysis_result['compound']],
        'sentiment': [analysis_result['sentiment']],
        'confidence': [analysis_result['confidence']],
        'timestamp': [analysis_result['timestamp']]
    })

    st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_data], ignore_index=True)

# Function to extract keywords from text
def extract_keywords(text, min_length=4):
    # Simple keyword extraction by counting word frequency
    words = re.findall(r'\b\w+\b', text.lower())
    # Remove short words
    words = [word for word in words if len(word) >= min_length]
    # Remove common stop words (simplified version)
    stop_words = {'the', 'and', 'are', 'this', 'that', 'with', 'from', 'have', 'has', 'been', 'were', 'was', 'will', 'would', 'should', 'could', 'they', 'their', 'what', 'when', 'where', 'which', 'these', 'those', 'there', 'here', 'some'}
    words = [word for word in words if word not in stop_words]

    # Count word frequencies
    word_freq = {}
    for word in words:
        if word in word_freq:
            word_freq[word] += 1
        else:
            word_freq[word] = 1

    # Sort by frequency
    sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
    return sorted_words[:10]  # Return top 10 keywords

# Dashboard UI
st.title("📊 Sentiment Analysis Dashboard")
st.markdown("Analyze the sentiment of text using VADER and custom logic")

# Sidebar
st.sidebar.title("Settings")
analysis_mode = st.sidebar.radio("Select Input Mode", ["Manual Input", "News API"])
st.sidebar.markdown("---")

if analysis_mode == "Manual Input":
    # Text input area
    text_input = st.text_area("Enter text to analyze", height=150)

    if st.button("Analyze Sentiment"):
        if text_input:
            with st.spinner("Analyzing sentiment..."):
                # Perform sentiment analysis
                analysis_result = analyze_sentiment(text_input)

                if analysis_result:
                    # Add to history
                    add_to_history(text_input, analysis_result)

                    # Display results
                    st.markdown("### Sentiment Analysis Results")

                    # Create layout with columns
                    col1, col2 = st.columns(2)

                    with col1:
                        # Display sentiment and confidence
                        sentiment_color = {
                            "Positive": "green",
                            "Neutral": "blue",
                            "Negative": "red"
                        }

                        # Sentiment badge
                        st.markdown(f"""
                        <div style="display: flex; align-items: center; margin-bottom: 15px;">
                            <div style="background-color: {sentiment_color[analysis_result['sentiment']]};
                                 color: white; padding: 8px 16px; border-radius: 20px; font-weight: bold;">
                                {analysis_result['sentiment']}
                            </div>
                            <div style="margin-left: 15px; font-size: 16px;">
                                Confidence: {analysis_result['confidence']:.2f}
                            </div>
                        </div>
                        """, unsafe_allow_html=True)

                        # Score breakdown
                        st.markdown("#### Score Breakdown")
                        score_df = pd.DataFrame({
                            'Score Type': ['Positive', 'Neutral', 'Negative', 'Compound'],
                            'Value': [
                                analysis_result['pos_score'],
                                analysis_result['neu_score'],
                                analysis_result['neg_score'],
                                analysis_result['compound']
                            ]
                        })

                        # Plot the score breakdown
                        fig = px.bar(
                            score_df,
                            x='Score Type',
                            y='Value',
                            color='Score Type',
                            color_discrete_map={
                                'Positive': 'green',
                                'Neutral': 'blue',
                                'Negative': 'red',
                                'Compound': 'purple'
                            }
                        )
                        fig.update_layout(height=300)
                        st.plotly_chart(fig, use_container_width=True)

                    with col2:
                        # Keywords extraction
                        keywords = extract_keywords(text_input)

                        if keywords:
                            st.markdown("#### Key Terms")
                            # Create word cloud
                            wordcloud_dict = {word: freq for word, freq in keywords}

                            if wordcloud_dict:
                                wordcloud = WordCloud(
                                    width=400,
                                    height=200,
                                    background_color='white',
                                    colormap='viridis',
                                    max_words=50
                                ).generate_from_frequencies(wordcloud_dict)

                                plt.figure(figsize=(10, 5))
                                plt.imshow(wordcloud, interpolation='bilinear')
                                plt.axis('off')
                                st.pyplot(plt)

                            # Keywords table
                            keyword_df = pd.DataFrame(keywords, columns=['Term', 'Frequency'])
                            st.table(keyword_df)
                        else:
                            st.info("No significant keywords found in the text.")
                else:
                    st.error("Could not analyze the text. Please try again.")
        else:
            st.warning("Please enter some text to analyze.")

elif analysis_mode == "News API":
    st.sidebar.markdown("### News API Settings")

    # News API configuration
    query = st.sidebar.text_input("Search Query", "technology")
    days = st.sidebar.slider("Days to look back", min_value=1, max_value=30, value=7)
    api_key = st.sidebar.text_input("News API Key", type="password")

    if st.sidebar.button("Fetch News"):
        if api_key and query:
            with st.spinner("Fetching news..."):
                news_data = fetch_news(query, api_key, days)

                if news_data and 'articles' in news_data:
                    articles = news_data['articles']

                    if articles:
                        st.session_state.api_results = articles
                        st.session_state.api_type = "news"
                        st.success(f"Found {len(articles)} articles about '{query}'")
                    else:
                        st.warning(f"No articles found for query: {query}")
                        st.session_state.api_results = []
                else:
                    st.error("Failed to fetch news. Please check your API key and try again.")
        else:
            st.warning("Please enter a search query and API key.")

    # Display and analyze fetched articles
    if st.session_state.api_results:
        st.markdown(f"### News Articles: {len(st.session_state.api_results)} Results")

        # Analyze all articles
        if st.button("Analyze All Articles"):
            with st.spinner("Analyzing articles..."):
                progress_bar = st.progress(0)

                for i, article in enumerate(st.session_state.api_results):
                    title = article.get('title', '')
                    description = article.get('description', '')
                    content = article.get('content', '')

                    # Combine text for analysis
                    text = f"{title}. {description} {content}"

                    # Analyze sentiment
                    analysis_result = analyze_sentiment(text)

                    if analysis_result:
                        # Add article information to the result
                        analysis_result['source'] = article.get('source', {}).get('name', 'Unknown')
                        analysis_result['title'] = title
                        analysis_result['url'] = article.get('url', '')
                        analysis_result['publishedAt'] = article.get('publishedAt', '')

                        # Add to history
                        add_to_history(title, analysis_result)

                    # Update progress
                    progress_bar.progress((i + 1) / len(st.session_state.api_results))
                    # Small delay to show progress bar updates smoothly
                    time.sleep(0.05)

                st.success("Analysis complete!")

        # Display articles in expandable sections
        for i, article in enumerate(st.session_state.api_results):
            with st.expander(f"{i+1}. {article.get('title', 'Untitled')}"):
                st.markdown(f"**Source:** {article.get('source', {}).get('name', 'Unknown')}")
                st.markdown(f"**Published:** {article.get('publishedAt', 'Unknown')}")
                st.markdown(f"**Description:** {article.get('description', 'No description')}")

                # Quick analyze button for individual article
                if st.button(f"Analyze Article {i+1}"):
                    title = article.get('title', '')
                    description = article.get('description', '')
                    content = article.get('content', '')

                    # Combine text for analysis
                    text = f"{title}. {description} {content}"

                    # Analyze sentiment
                    analysis_result = analyze_sentiment(text)

                    if analysis_result:
                        # Display sentiment badge
                        sentiment_color = {
                            "Positive": "green",
                            "Neutral": "blue",
                            "Negative": "red"
                        }

                        st.markdown(f"""
                        <div style="display: flex; align-items: center; margin: 10px 0;">
                            <div style="background-color: {sentiment_color[analysis_result['sentiment']]};
                                color: white; padding: 8px 16px; border-radius: 20px; font-weight: bold;">
                                {analysis_result['sentiment']}
                            </div>
                            <div style="margin-left: 15px; font-size: 16px;">
                                Confidence: {analysis_result['confidence']:.2f}
                            </div>
                        </div>
                        """, unsafe_allow_html=True)

                        # Add to history
                        add_to_history(title, analysis_result)

                # Link to the article
                st.markdown(f"[Read full article]({article.get('url', '#')})")

# Tabs for different visualizations of the history data
st.sidebar.markdown("---")
st.sidebar.markdown("### History & Visualization")
show_history = st.sidebar.checkbox("Show Analysis History", value=st.session_state.show_history)
st.session_state.show_history = show_history

if show_history and not st.session_state.analyzed_data.empty:
    st.markdown("## Analysis History")

    # Create tabs for different visualizations
    tab1, tab2, tab3, tab4 = st.tabs(["Summary", "Trends", "Details", "Raw Data"])

    with tab1:
        st.markdown("### Sentiment Distribution")

        # Create layout with columns
        col1, col2 = st.columns(2)

        with col1:
            # Pie chart for sentiment distribution
            sentiment_counts = st.session_state.analyzed_data['sentiment'].value_counts().reset_index()
            sentiment_counts.columns = ['Sentiment', 'Count']

            fig = px.pie(
                sentiment_counts,
                values='Count',
                names='Sentiment',
                color='Sentiment',
                color_discrete_map={
                    'Positive': 'green',
                    'Neutral': 'blue',
                    'Negative': 'red'
                },
                title="Sentiment Distribution"
            )
            st.plotly_chart(fig, use_container_width=True)

        with col2:
            # Average confidence by sentiment
            avg_confidence = st.session_state.analyzed_data.groupby('sentiment')['confidence'].mean().reset_index()
            avg_confidence.columns = ['Sentiment', 'Avg Confidence']

            fig = px.bar(
                avg_confidence,
                x='Sentiment',
                y='Avg Confidence',
                color='Sentiment',
                color_discrete_map={
                    'Positive': 'green',
                    'Neutral': 'blue',
                    'Negative': 'red'
                },
                title="Average Confidence by Sentiment"
            )
            st.plotly_chart(fig, use_container_width=True)

        # Summary statistics
        st.markdown("### Summary Statistics")

        # Create layout with columns for stats
        col1, col2, col3 = st.columns(3)

        with col1:
            total_items = len(st.session_state.analyzed_data)
            st.metric("Total Items Analyzed", total_items)

        with col2:
            if 'compound' in st.session_state.analyzed_data.columns:
                avg_compound = st.session_state.analyzed_data['compound'].mean()
                st.metric("Average Compound Score", f"{avg_compound:.2f}")

        with col3:
            if 'confidence' in st.session_state.analyzed_data.columns:
                avg_confidence = st.session_state.analyzed_data['confidence'].mean()
                st.metric("Average Confidence", f"{avg_confidence:.2f}")

    with tab2:
        st.markdown("### Sentiment Trends Over Time")

        # Convert timestamp to datetime if it's not already
        if 'timestamp' in st.session_state.analyzed_data.columns:
            try:
                st.session_state.analyzed_data['timestamp'] = pd.to_datetime(st.session_state.analyzed_data['timestamp'])
                # Sort by timestamp
                trend_data = st.session_state.analyzed_data.sort_values('timestamp')

                # Line chart for compound score over time
                fig = px.line(
                    trend_data,
                    x='timestamp',
                    y='compound',
                    title="Sentiment Compound Score Over Time",
                    labels={'compound': 'Compound Score', 'timestamp': 'Time'}
                )

                # Add horizontal lines for reference
                fig.add_shape(
                    type="line",
                    x0=trend_data['timestamp'].min(),
                    y0=0.05,
                    x1=trend_data['timestamp'].max(),
                    y1=0.05,
                    line=dict(color="green", width=1, dash="dash"),
                )

                fig.add_shape(
                    type="line",
                    x0=trend_data['timestamp'].min(),
                    y0=-0.05,
                    x1=trend_data['timestamp'].max(),
                    y1=-0.05,
                    line=dict(color="red", width=1, dash="dash"),
                )

                fig.add_annotation(
                    x=trend_data['timestamp'].min(),
                    y=0.05,
                    text="Positive Threshold",
                    showarrow=False,
                    yshift=10,
                    font=dict(size=10, color="green")
                )

                fig.add_annotation(
                    x=trend_data['timestamp'].min(),
                    y=-0.05,
                    text="Negative Threshold",
                    showarrow=False,
                    yshift=-10,
                    font=dict(size=10, color="red")
                )

                st.plotly_chart(fig, use_container_width=True)

                # Moving average of sentiment (if enough data points)
                if len(trend_data) > 5:
                    st.markdown("### Moving Average Sentiment")

                    # Create moving average
                    trend_data['compound_ma'] = trend_data['compound'].rolling(window=min(5, len(trend_data))).mean()

                    fig = px.line(
                        trend_data.dropna(),
                        x='timestamp',
                        y=['compound', 'compound_ma'],
                        title="Raw and Moving Average Sentiment",
                        labels={
                            'compound': 'Compound Score',
                            'compound_ma': 'Moving Avg (5)',
                            'timestamp': 'Time'
                        }
                    )
                    st.plotly_chart(fig, use_container_width=True)
            except Exception as e:
                st.error(f"Error processing time data: {str(e)}")

    with tab3:
        st.markdown("### Sentiment Details")

        # Distribution of scores
        st.markdown("#### Score Distributions")

        # Create a melted dataframe for the scores
        score_cols = ['pos_score', 'neu_score', 'neg_score']
        if all(col in st.session_state.analyzed_data.columns for col in score_cols):
            score_data = st.session_state.analyzed_data[score_cols].melt()
            score_data.columns = ['Score Type', 'Value']

            # Map to nicer names
            score_data['Score Type'] = score_data['Score Type'].map({
                'pos_score': 'Positive',
                'neu_score': 'Neutral',
                'neg_score': 'Negative'
            })

            # Histogram of scores
            fig = px.histogram(
                score_data,
                x='Value',
                color='Score Type',
                barmode='overlay',
                opacity=0.7,
                color_discrete_map={
                    'Positive': 'green',
                    'Neutral': 'blue',
                    'Negative': 'red'
                },
                title="Distribution of Sentiment Scores"
            )
            st.plotly_chart(fig, use_container_width=True)

        # Scatter plot of compound vs confidence
        if 'compound' in st.session_state.analyzed_data.columns and 'confidence' in st.session_state.analyzed_data.columns:
            st.markdown("#### Compound Score vs. Confidence")

            fig = px.scatter(
                st.session_state.analyzed_data,
                x='compound',
                y='confidence',
                color='sentiment',
                color_discrete_map={
                    'Positive': 'green',
                    'Neutral': 'blue',
                    'Negative': 'red'
                },
                hover_data=['text'],
                title="Compound Score vs. Confidence"
            )

            # Add vertical reference lines
            fig.add_shape(
                type="line",
                x0=0.05,
                y0=0,
                x1=0.05,
                y1=1,
                line=dict(color="green", width=1, dash="dash"),
            )

            fig.add_shape(
                type="line",
                x0=-0.05,
                y0=0,
                x1=-0.05,
                y1=1,
                line=dict(color="red", width=1, dash="dash"),
            )

            st.plotly_chart(fig, use_container_width=True)

    with tab4:
        st.markdown("### Raw Data")

        # Display raw data table
        st.dataframe(st.session_state.analyzed_data)

        # Add download button for CSV
        csv = st.session_state.analyzed_data.to_csv(index=False)
        st.download_button(
            label="Download Data as CSV",
            data=csv,
            file_name="sentiment_analysis_data.csv",
            mime="text/csv",
        )

# About section
st.sidebar.markdown("---")
with st.sidebar.expander("About This Dashboard"):
    st.markdown("""
    ### Sentiment Analysis Dashboard

    This dashboard analyzes sentiment in text using VADER Sentiment Analysis with custom logic.

    #### Features:
    - Manual text analysis
    - News API integration
    - Custom confidence scoring
    - Visualization of sentiment data
    - Historical trend analysis

    #### How It Works:
    1. Enter text or fetch from News API
    2. VADER Sentiment Analysis is applied
    3. Custom logic enhances the analysis
    4. Results are visualized in the dashboard

    #### Sentiment Categories:
    - Positive: Compound score >= 0.05
    - Neutral: Compound score between -0.05 and 0.05
    - Negative: Compound score <= -0.05
    """)

# Clear history button
if not st.session_state.analyzed_data.empty:
    if st.sidebar.button("Clear Analysis History"):
        st.session_state.analyzed_data = pd.DataFrame(columns=['text', 'pos_score', 'neu_score', 'neg_score', 'compound', 'sentiment', 'confidence', 'timestamp'])
        st.sidebar.success("History cleared!")

Writing sentiment_app.py


In [17]:
from pyngrok import ngrok
import os
ngrok.kill()
os.system('pkill ngrok')
ngrok.set_auth_token("2vbuJtKmjEPjgnqYitA5hA8QKmR_776hVR3ruaifkgxMapJDg")
public_url = ngrok.connect(addr='8501')
print(f"🚀 Fresh tunnel: {public_url}")
!streamlit run sentiment.py --server.port 8501 &>/dev/null &

🚀 Fresh tunnel: NgrokTunnel: "https://b179-34-86-25-252.ngrok-free.app" -> "http://localhost:8501"
