In [1]:
!pip install schedule pyngrok streamlit



In [None]:
# Define the path for app.py
APP_PATH = '/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/app.py'

# Correct app.py content (Streamlit application code only)
app_code = '''import streamlit as st
import pandas as pd
import pymongo
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
import re
import os
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

# Page configuration
st.set_page_config(
    page_title="Social Media Sentiment Analysis",
    page_icon="📊",
    layout="wide"
)

# Custom CSS for styling - Updated to white background and black text
st.markdown("""
    <style>
    .main {
        padding: 1rem;
        background-color: white;
        color: black;
    }
    .sidebar .sidebar-content {
        background-color: white;
        color: black;
    }
    .block-container {
        padding-top: 1rem;
        padding-bottom: 1rem;
        background-color: white;
    }
    h1, h2, h3 {
        color: black;
    }
    p, div, span {
        color: black;
    }
    .stButton>button {
        width: 100%;
        background-color: #1E3A8A;
        color: white;
    }
    .stButton>button:hover {
        background-color: #2E4A9A;
    }
    </style>
    """, unsafe_allow_html=True)

# MongoDB Connection
@st.cache_resource
def get_mongo_client():
    """Connect to MongoDB"""
    try:
        from google.colab import userdata
        # Get credentials secret keys
        username = "Database_Username"
        password = "Database_Password"
        cluster_url = "cluster0.8ad48r1.mongodb.net"
        MONGO_CONNECTION_STRING = f"mongodb+srv://{username}:{password}@{cluster_url}/?retryWrites=true&w=majority&appName=Cluster0"
        client = pymongo.MongoClient(MONGO_CONNECTION_STRING)
        return client
    except Exception as e:
        st.error(f"MongoDB Connection Error: {e}")
        return None

# Function to load RoBERTa sentiment model
@st.cache_resource
def load_sentiment_model():
    """Load a pre-trained sentiment analysis model."""
    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    return tokenizer, model

# Function to analyze sentiment using RoBERTa
def analyze_roberta_sentiment(text, tokenizer, model):
    """Analyze sentiment using RoBERTa model"""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    scores = torch.softmax(outputs.logits, dim=1).detach().numpy()[0]

    # RoBERTa sentiment labels: 0 = negative, 1 = neutral, 2 = positive
    sentiment_score = (scores[2] - scores[0]) * 2 - 1  # Convert to range [-1, 1]

    # Determine sentiment label
    if scores[2] > scores[0] and scores[2] > scores[1]:
        return "positive", sentiment_score
    elif scores[0] > scores[1] and scores[0] > scores[2]:
        return "negative", sentiment_score
    else:
        return "neutral", sentiment_score

# Function to analyze sentiment using TextBlob
def analyze_textblob_sentiment(text):
    """Analyze sentiment of text using TextBlob"""
    analysis = TextBlob(text)

    # Determine sentiment label
    if analysis.sentiment.polarity > 0.1:
        return "positive", analysis.sentiment.polarity
    elif analysis.sentiment.polarity < -0.1:
        return "negative", analysis.sentiment.polarity
    else:
        return "neutral", analysis.sentiment.polarity

# Function to get trending hashtags
def get_trending_hashtags(platform=None, limit=10):
    """Get trending hashtags from MongoDB, filtered by platform if specified"""
    client = get_mongo_client()
    if not client:
        return []

    db = client["social_media_analytics_new"]
    collection = db["batch_tag_sentiment"]

    # Get the latest batch
    latest_batch = collection.find_one(sort=[("timestamp", -1)])

    if not latest_batch or "data" not in latest_batch:
        return []

    # Get all hashtags
    hashtags = []
    for item in latest_batch["data"]:
        # Filter by platform if specified
        if platform and item["platform"].strip().lower() != platform.lower():
            continue

        hashtags.append({
            "platform": item["platform"].strip(),
            "tag": item["tag"],
            "sentiment": item["sentiment"],
            "count": item["count"],
            "timestamp": item["timestamp"]
        })

    # Sort by count (descending)
    hashtags = sorted(hashtags, key=lambda x: x["count"], reverse=True)

    return hashtags[:limit]

# Function to get Mastodon sentiment data
def get_mastodon_sentiment_data(limit=100):
    """Get Mastodon sentiment data from MongoDB"""
    client = get_mongo_client()
    if not client:
        return []

    db = client["social_media_analytics_new"]
    collection = db["mastodon_sentiment_data"]

    # Get latest posts
    mastodon_posts = list(collection.find().sort("created_at", -1).limit(limit))

    return mastodon_posts

# Function to get YouTube sentiment data
def get_youtube_sentiment_data(limit=100):
    """Get YouTube sentiment data from MongoDB"""
    client = get_mongo_client()
    if not client:
        return []

    db = client["social_media_analytics_new"]
    collection = db["youtube_sentiment_data"]

    # Get latest comments
    youtube_comments = list(collection.find().sort("published_at", -1).limit(limit))

    return youtube_comments

# Function to create a gauge chart for sentiment score
def create_gauge_chart(score, title):
    fig = go.Figure(go.Indicator(
        mode = "gauge+number",
        value = score,
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': title, 'font': {'color': 'black'}},
        gauge = {
            'axis': {'range': [-1, 1], 'tickfont': {'color': 'black'}},
            'bar': {'color': "#1E3A8A"},
            'steps': [
                {'range': [-1, -0.1], 'color': "#F44336"},
                {'range': [-0.1, 0.1], 'color': "#FFC107"},
                {'range': [0.1, 1], 'color': "#4CAF50"}
            ]
        }
    ))

    fig.update_layout(
        paper_bgcolor='white',
        font={'color': 'black'},
        height=300
    )

    return fig

# Main App
def main():
    # Initialize session state for tracking active tab
    if 'active_tab' not in st.session_state:
        st.session_state.active_tab = "Real-time Sentiment Feed"

    # Load sentiment model
    tokenizer, model = load_sentiment_model()

    # Title
    st.title("📊 Social Media Sentiment Analysis Platform")

    # Create a layout with three columns
    left_sidebar, main_content, right_sidebar = st.columns([1, 3, 1])

    # LEFT SIDEBAR: PowerBI dashboard link
    with left_sidebar:
        st.header("Actions")

        # PowerBI Dashboard link
        st.markdown("### Batch Analysis Dashboard")
        st.markdown("""
            [📈 Open Batch Analysis Dashboard](https://public.tableau.com/app/profile/jayasha.lakshani/viz/SocialMediaSentimentAnalysis_17460481514910/Dashboard1)
        """)

    # RIGHT SIDEBAR: Trending Hashtags with platform filter
    with right_sidebar:
        st.header("Trending Hashtags")

        # Platform filter
        platform_options = ["All", "YouTube", "Mastodon"]
        selected_platform = st.selectbox(
            "Filter by Platform",
            options=platform_options,
            index=0
        )

        # Get trending hashtags (filtered by platform if needed)
        trending_tags = get_trending_hashtags(
            platform=None if selected_platform == "All" else selected_platform
        )

        # Display trending hashtags
        if trending_tags:
            for tag in trending_tags:
                # Emoji based on sentiment
                emoji = "😃" if tag["sentiment"] == "positive" else "😐" if tag["sentiment"] == "neutral" else "😞"

                # Platform icon
                platform_icon = "📺" if tag["platform"].lower() == "youtube" else "🐘"

                st.markdown(f"""
                    <div style="background-color: {'#e6f7e6' if tag['sentiment'] == 'positive' else '#f7f7e6' if tag['sentiment'] == 'neutral' else '#f7e6e6'};
                            padding: 10px; border-radius: 5px; margin-bottom: 10px; color: black;">
                        <b>{platform_icon} {tag["tag"]}</b> <br>
                        {emoji} {tag["sentiment"].capitalize()} <br>
                        Count: {tag["count"]}
                    </div>
                """, unsafe_allow_html=True)
        else:
            st.info("No trending hashtags found. Try changing the platform filter.")

    # MAIN CONTENT - Tabs for different features
    with main_content:
        # Create tabs and track which tab is active
        tab1, tab2 = st.tabs(["Real-time Sentiment Feed", "Custom Text Analysis"])

        # TAB 1: Social Media Feed with Sentiment
        with tab1:
            # Update active tab in session state when this tab is clicked
            if st.session_state.active_tab != "Real-time Sentiment Feed":
                st.session_state.active_tab = "Real-time Sentiment Feed"
                st.experimental_rerun()  # Rerun to update the UI

            st.header("Real-time Social Media Sentiment")

            # Platform selection for feed
            feed_platform = st.radio(
                "Select Platform",
                options=["Mastodon", "YouTube", "Both"],
                horizontal=True
            )

            # Create placeholder for dynamic content
            feed_placeholder = st.empty()

            # Get data based on platform selection
            mastodon_data = get_mastodon_sentiment_data() if feed_platform in ["Mastodon", "Both"] else []
            youtube_data = get_youtube_sentiment_data() if feed_platform in ["YouTube", "Both"] else []

            # Combine and sort data if showing both
            if feed_platform == "Both":
                # Create standardized format for both platforms
                combined_data = []

                for post in mastodon_data:
                    combined_data.append({
                        "platform": "Mastodon",
                        "text": post.get("text", ""),
                        "tag": post.get("tag", ""),
                        "timestamp": post.get("created_at", datetime.now()),
                        "url": post.get("post_url", "")
                    })

                for comment in youtube_data:
                    combined_data.append({
                        "platform": "YouTube",
                        "text": comment.get("text", ""),
                        "tag": comment.get("tag", ""),
                        "timestamp": datetime.strptime(comment.get("published_at", ""), "%Y-%m-%dT%H:%M:%SZ") if isinstance(comment.get("published_at", ""), str) else datetime.now(),
                        "video_title": comment.get("video_title", ""),
                        "video_id": comment.get("video_id", ""),
                        "author": comment.get("author", "")
                    })

                # Sort by timestamp
                combined_data = sorted(combined_data, key=lambda x: x["timestamp"], reverse=True)

                # Only process if this tab is active
                if st.session_state.active_tab == "Real-time Sentiment Feed":
                    # Display posts/comments with sentiment analysis
                    for i, item in enumerate(combined_data[:5]):
                        with feed_placeholder.container():
                            # Perform sentiment analysis
                            sentiment, score = analyze_roberta_sentiment(item["text"], tokenizer, model)

                            # Display post/comment with sentiment
                            platform_icon = "🐘" if item["platform"] == "Mastodon" else "📺"
                            sentiment_emoji = "😃" if sentiment == "positive" else "😐" if sentiment == "neutral" else "😞"

                            st.markdown(f"""
                                <div style="background-color: {'#e6f7e6' if sentiment == 'positive' else '#f7f7e6' if sentiment == 'neutral' else '#f7e6e6'};
                                        padding: 15px; border-radius: 8px; margin-bottom: 15px; color: black;">
                                    <span style="font-size: 1.2em;"><b>{platform_icon} {item["platform"]}</b> - Tag: <i>#{item["tag"]}</i></span>
                                    <br>
                                    <span>"{item["text"]}"</span>
                                    <br>
                                    <span>{sentiment_emoji} Sentiment: <b>{sentiment.capitalize()}</b> (Score: {score:.2f})</span>
                                    <br>
                                    <span style="font-size: 0.8em;">
                                        {item.get("author", "")}
                                        {f'on "{item.get("video_title", "")}"' if item["platform"] == "YouTube" else ""}
                                        • {item["timestamp"].strftime("%Y-%m-%d %H:%M")}
                                    </span>
                                </div>
                            """, unsafe_allow_html=True)

                            # Wait for 8 seconds before showing the next item
                            # Also check if tab is still active
                            if i < len(combined_data[:5]) - 1:
                                # Use a loop with small sleep intervals to check if tab changed
                                start_time = time.time()
                                while time.time() - start_time < 2 and st.session_state.active_tab == "Real-time Sentiment Feed":
                                    time.sleep(0.5)  # Check every half second if tab changed

                                # If tab changed, break the loop
                                if st.session_state.active_tab != "Real-time Sentiment Feed":
                                    break

            # Show Mastodon data only
            elif feed_platform == "Mastodon":
                # Only process if this tab is active
                if st.session_state.active_tab == "Real-time Sentiment Feed":
                    # Display posts with sentiment analysis
                    for i, post in enumerate(mastodon_data[:5]):
                        with feed_placeholder.container():
                            # Perform sentiment analysis
                            sentiment, score = analyze_roberta_sentiment(post.get("text", ""), tokenizer, model)

                            # Display post with sentiment
                            sentiment_emoji = "😃" if sentiment == "positive" else "😐" if sentiment == "neutral" else "😞"

                            st.markdown(f"""
                                <div style="background-color: {'#e6f7e6' if sentiment == 'positive' else '#f7f7e6' if sentiment == 'neutral' else '#f7e6e6'};
                                        padding: 15px; border-radius: 8px; margin-bottom: 15px; color: black;">
                                    <span style="font-size: 1.2em;"><b>🐘 Mastodon</b> - Tag: <i>#{post.get("tag", "")}</i></span>
                                    <br>
                                    <span>"{post.get("text", "")}"</span>
                                    <br>
                                    <span>{sentiment_emoji} Sentiment: <b>{sentiment.capitalize()}</b> (Score: {score:.2f})</span>
                                    <br>
                                    <span style="font-size: 0.8em;">
                                        {post.get("created_at").strftime("%Y-%m-%d %H:%M") if isinstance(post.get("created_at"), datetime) else ""}
                                        • <a href="{post.get("post_url", "#")}" target="_blank">View Post</a>
                                    </span>
                                </div>
                            """, unsafe_allow_html=True)

                            # Wait for 8 seconds before showing the next post
                            # Also check if tab is still active
                            if i < len(mastodon_data[:5]) - 1:
                                # Use a loop with small sleep intervals to check if tab changed
                                start_time = time.time()
                                while time.time() - start_time < 2 and st.session_state.active_tab == "Real-time Sentiment Feed":
                                    time.sleep(0.5)  # Check every half second if tab changed

                                # If tab changed, break the loop
                                if st.session_state.active_tab != "Real-time Sentiment Feed":
                                    break

            # Show YouTube data only
            else:
                # Only process if this tab is active
                if st.session_state.active_tab == "Real-time Sentiment Feed":
                    # Display comments with sentiment analysis
                    for i, comment in enumerate(youtube_data[:5]):
                        with feed_placeholder.container():
                            # Perform sentiment analysis
                            sentiment, score = analyze_roberta_sentiment(comment.get("text", ""), tokenizer, model)

                            # Display comment with sentiment
                            sentiment_emoji = "😃" if sentiment == "positive" else "😐" if sentiment == "neutral" else "😞"

                            st.markdown(f"""
                                <div style="background-color: {'#e6f7e6' if sentiment == 'positive' else '#f7f7e6' if sentiment == 'neutral' else '#f7e6e6'};
                                        padding: 15px; border-radius: 8px; margin-bottom: 15px; color: black;">
                                    <span style="font-size: 1.2em;"><b>📺 YouTube</b> - Tag: <i>#{comment.get("tag", "")}</i></span>
                                    <br>
                                    <span>"{comment.get("text", "")}"</span>
                                    <br>
                                    <span>{sentiment_emoji} Sentiment: <b>{sentiment.capitalize()}</b> (Score: {score:.2f})</span>
                                    <br>
                                    <span style="font-size: 0.8em;">
                                        {comment.get("author", "")} on "{comment.get("video_title", "")}"
                                        • {comment.get("published_at", "").split("T")[0] if isinstance(comment.get("published_at", ""), str) else ""}
                                    </span>
                                </div>
                            """, unsafe_allow_html=True)

                            # Wait for 8 seconds before showing the next comment
                            # Also check if tab is still active
                            if i < len(youtube_data[:5]) - 1:
                                # Use a loop with small sleep intervals to check if tab changed
                                start_time = time.time()
                                while time.time() - start_time < 2 and st.session_state.active_tab == "Real-time Sentiment Feed":
                                    time.sleep(0.5)  # Check every half second if tab changed

                                # If tab changed, break the loop
                                if st.session_state.active_tab != "Real-time Sentiment Feed":
                                    break

        # TAB 2: Custom Text Analysis with both models
        with tab2:

            st.header("Custom Text Sentiment Analysis")

            # Text input
            custom_text = st.text_area(
                "Enter text for sentiment analysis",
                height=150,
                placeholder="Type or paste text here to analyze its sentiment..."
            )

            if st.button("Analyze Sentiment"):
                if custom_text:
                    # Analyze sentiment with both models
                    textblob_sentiment, textblob_score = analyze_textblob_sentiment(custom_text)
                    roberta_sentiment, roberta_score = analyze_roberta_sentiment(custom_text, tokenizer, model)

                    # Display TextBlob results
                    st.subheader("TextBlob Analysis")
                    col1, col2 = st.columns(2)

                    with col1:
                        # Show TextBlob sentiment result
                        st.markdown(f"""
                            <div style="background-color: {'#e6f7e6' if textblob_sentiment == 'positive' else '#f7f7e6' if textblob_sentiment == 'neutral' else '#f7e6e6'};
                                    padding: 20px; border-radius: 10px; text-align: center; color: black;">
                                <h3>{'😃' if textblob_sentiment == 'positive' else '😐' if textblob_sentiment == 'neutral' else '😞'} {textblob_sentiment.capitalize()}</h3>
                                <p>TextBlob Score: {textblob_score:.2f}</p>
                                <p>(-1: Very Negative, +1: Very Positive)</p>
                            </div>
                        """, unsafe_allow_html=True)

                    with col2:
                        # Create a gauge chart for TextBlob sentiment score
                        textblob_fig = create_gauge_chart(textblob_score, "TextBlob Sentiment Score")
                        st.plotly_chart(textblob_fig, use_container_width=True)

                    # Display RoBERTa results
                    st.subheader("RoBERTa Analysis (cardiffnlp/twitter-roberta-base-sentiment)")
                    col3, col4 = st.columns(2)

                    with col3:
                        # Show RoBERTa sentiment result
                        st.markdown(f"""
                            <div style="background-color: {'#e6f7e6' if roberta_sentiment == 'positive' else '#f7f7e6' if roberta_sentiment == 'neutral' else '#f7e6e6'};
                                    padding: 20px; border-radius: 10px; text-align: center; color: black;">
                                <h3>{'😃' if roberta_sentiment == 'positive' else '😐' if roberta_sentiment == 'neutral' else '😞'} {roberta_sentiment.capitalize()}</h3>
                                <p>RoBERTa Score: {roberta_score:.2f}</p>
                                <p>(-1: Very Negative, +1: Very Positive)</p>
                            </div>
                        """, unsafe_allow_html=True)

                    with col4:
                        # Create a gauge chart for RoBERTa sentiment score
                        roberta_fig = create_gauge_chart(roberta_score, "RoBERTa Sentiment Score")
                        st.plotly_chart(roberta_fig, use_container_width=True)

                    # Compare the results
                    st.subheader("Comparison")
                    if textblob_sentiment == roberta_sentiment:
                        st.success(f"Both models agree on the sentiment: {textblob_sentiment.capitalize()}")
                    else:
                        st.warning(f"The models disagree: TextBlob says {textblob_sentiment.capitalize()} while RoBERTa says {roberta_sentiment.capitalize()}")
                        st.info("This disagreement may indicate nuanced sentiment or context that one model captures better than the other.")
                else:
                    st.warning("Please enter some text to analyze.")

if __name__ == "__main__":
    main()
'''

# Write app.py to the correct location
import os
os.makedirs(os.path.dirname(APP_PATH), exist_ok=True)
with open(APP_PATH, 'w') as f:
    f.write(app_code)
print(f"Saved app.py to {APP_PATH}")

Saved app.py to /content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/app.py


In [3]:
# 1. First, ensure Google Drive is properly mounted
# from google.colab import drive
# drive.mount('/content/drive')

# 2. Install dependencies with pip (explicitly with --no-cache-dir to force fresh install)
!pip install --no-cache-dir streamlit pymongo pandas plotly nltk pyngrok textblob numpy matplotlib

# 3. Verify installations
import sys
print("\n--- Checking installed modules ---")
modules_to_check = ['streamlit', 'pymongo', 'pandas', 'plotly', 'nltk', 'pyngrok', 'textblob', 'numpy', 'matplotlib']
for module in modules_to_check:
    try:
        __import__(module)
        print(f"✓ {module} is installed")
    except ImportError:
        print(f"✗ {module} is NOT installed")
        !pip install --no-cache-dir {module}

# 4. Copy the app to local filesystem
APP_PATH = '/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/app.py'
LOCAL_APP_PATH = '/content/app.py'

import os
import shutil
if os.path.exists(APP_PATH):
    print(f"\n✓ Found app at {APP_PATH}")
    shutil.copy(APP_PATH, LOCAL_APP_PATH)
    print(f"✓ Copied to {LOCAL_APP_PATH}")
else:
    print(f"\n✗ App not found at {APP_PATH}")
    print("Looking for app.py files in your Drive:")
    !find /content/drive -name "app.py" -type f | head -10

# 5. Check the content of the app file
print("\n--- App Content Preview ---")
!head -20 "{LOCAL_APP_PATH}"

# 6. Kill any existing processes
!pkill -f streamlit || true
!sleep 2

# 7. Configure ngrok
from pyngrok import ngrok
NGROK_AUTH_TOKEN = "2wQfhX9LMm3GlO5MlvjUvu8t43I_58wGhuT7YUacnAR5iLijQ"
!ngrok authtoken {NGROK_AUTH_TOKEN}

# 8. Start Streamlit with explicit host and port binding
print("\n--- Starting Streamlit ---")
!nohup streamlit run "{LOCAL_APP_PATH}" --server.address=0.0.0.0 --server.port=8501 --server.headless=true > streamlit.log 2>&1 &

# 9. Give Streamlit time to start
import time
print("Waiting for Streamlit to start...")
time.sleep(10)

# 10. Check if Streamlit is running
print("\n--- Streamlit Status ---")
!grep -i error streamlit.log
!ps aux | grep streamlit
!cat streamlit.log | tail -20

# 11. Create ngrok tunnel
try:
    print("\n--- Creating ngrok tunnel ---")
    public_url = ngrok.connect(addr="8501", proto="http", bind_tls=True)
    print(f"\n🚀 Your app is running at: {public_url}")
except Exception as e:
    print(f"\nError creating ngrok tunnel: {e}")
    print("\nDebug information:")
    !cat streamlit.log


--- Checking installed modules ---
✓ streamlit is installed
✓ pymongo is installed
✓ pandas is installed
✓ plotly is installed
✓ nltk is installed
✓ pyngrok is installed
✓ textblob is installed
✓ numpy is installed
✓ matplotlib is installed

✓ Found app at /content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/app.py
✓ Copied to /content/app.py

--- App Content Preview ---
import streamlit as st
import pandas as pd
import pymongo
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
import re
import os
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

# Page configuration
st.set_page_config(
    page_title="Social Media Sentiment Analysis",
    page_icon="📊",
^C
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml

--- Starting Streamlit ---
Waiting for Streamlit to 