In [1]:
import sys
if 'google.colab' in sys.modules:
  !pip install streamlit pyngrok google-api-python-client emoji vaderSentiment

# Import necessary libraries
import streamlit as st
from pyngrok import ngrok
from IPython.display import display, Markdown

Collecting streamlit
  Downloading streamlit-1.54.0-py3-none-any.whl.metadata (9.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Collecting emoji
  Downloading emoji-2.15.0-py3-none-any.whl.metadata (5.7 kB)
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Collecting cachetools<7,>=5.5 (from streamlit)
  Downloading cachetools-6.2.6-py3-none-any.whl.metadata (5.6 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.54.0-py3-none-any.whl (9.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.1/9.1 MB[0m [31m41.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Downloading emoji-2.15.0-py3-none-any.whl (608 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ

In [2]:
%%writefile app.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# from pyngrok import ngrok # ngrok is used to tunnel, not directly in app.py

# New imports for YouTube API and sentiment analysis
from googleapiclient.discovery import build
import re
import emoji
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

#  Page Config
st.set_page_config(page_title="YouTube Sentiment Analyzer", page_icon="üéØ", layout="wide")

#  Step 2: Header Section
st.title(" YouTube Sentiment Analyzer")
st.subheader("Analyze what viewers really feel about your video")

# Input field for YouTube URL
youtube_url = st.text_input("Paste YouTube Video URL here ")
analyze_btn = st.button("Analyze Comments")

# --- Function to fetch and analyze comments ---
def fetch_and_analyze_comments(youtube_url):
    API_KEY = 'AIzaSyBmtsVe0jxC7jzirb7z_SPQctTIwkE5aGc' # Using the same key from mZD7kbb_rZUu
    youtube = build('youtube', 'v3', developerKey=API_KEY)

    match = re.search(r'(?<=v=)[a-zA-Z0-9_-]+', youtube_url)
    x = match.group(0) if match else None

    if not x:
        st.error("Error: Invalid YouTube URL. Could not extract video ID.")
        return pd.DataFrame()

    # Getting the channelId of the video uploader
    video_response = youtube.videos().list(
        part='snippet',
        id=x
    ).execute()

    uploader_channel_id = None
    if video_response and 'items' in video_response and len(video_response['items']) > 0:
        video_snippet = video_response['items'][0]['snippet']
        uploader_channel_id = video_snippet['channelId']
    else:
        st.error(f"Error: No video data found for ID '{x}'. It might be invalid, private, or deleted.")
        return pd.DataFrame()

    comments = []
    nextPageToken = None
    progress_bar = st.progress(0)
    status_text = st.empty()
    total_comments_to_fetch = 600 # Max comments to fetch

    status_text.text("Fetching Comments...")
    fetched_count = 0
    while fetched_count < total_comments_to_fetch:
        request = youtube.commentThreads().list(
            part='snippet',
            videoId=x,
            maxResults=100,
            pageToken=nextPageToken
        )
        response = request.execute()
        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            if comment['authorChannelId']['value'] != uploader_channel_id:
                comments.append(comment['textDisplay'])
                fetched_count += 1
                if fetched_count >= total_comments_to_fetch:
                    break
        nextPageToken = response.get('nextPageToken')

        progress_bar.progress(min(fetched_count, total_comments_to_fetch) / total_comments_to_fetch)
        status_text.text(f"Fetched {fetched_count} comments...")

        if not nextPageToken or fetched_count >= total_comments_to_fetch:
            break
    status_text.text(f"Finished fetching {fetched_count} comments.")

    # Filter relevant comments
    hyperlink_pattern = re.compile(
        r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    threshold_ratio = 0.65
    relevant_comments = []

    for comment_text in comments:
        comment_text = comment_text.lower().strip()
        emojis_count = emoji.emoji_count(comment_text)
        text_characters = len(re.sub(r'\s', '', comment_text))

        if (any(char.isalnum() for char in comment_text)) and not hyperlink_pattern.search(comment_text):
            if emojis_count == 0 or (text_characters / (text_characters + emojis_count)) > threshold_ratio:
                relevant_comments.append(comment_text)

    # Sentiment Analysis
    analyzer = SentimentIntensityAnalyzer()
    sentiment_data = []

    status_text.text("Analyzing Comments...")
    for i, comment_text in enumerate(relevant_comments):
        vs = analyzer.polarity_scores(comment_text)
        compound_score = vs['compound']

        sentiment_label = ""
        if compound_score >= 0.05:
            sentiment_label = "positive"
        elif compound_score <= -0.05:
            sentiment_label = "negative"
        else:
            sentiment_label = "neutral"
        sentiment_data.append({'comment': comment_text, 'sentiment': sentiment_label, 'compound_score': compound_score})
        progress_bar.progress((i + 1) / len(relevant_comments))
    status_text.text("Finished analyzing comments.")


    return pd.DataFrame(sentiment_data)


if analyze_btn and youtube_url:
    with st.spinner('Analyzing YouTube comments...'):
        df = fetch_and_analyze_comments(youtube_url)

    if not df.empty:
        # Step 3: Overview Section
        total_comments = len(df)
        positive_count = (df['sentiment'] == 'positive').sum()
        neutral_count = (df['sentiment'] == 'neutral').sum()
        negative_count = (df['sentiment'] == 'negative').sum()

        st.markdown(" Overview Summary")
        col1, col2, col3 = st.columns(3)
        col1.metric("Total Comments", total_comments)
        col2.metric("Positive", positive_count)
        col3.metric("Negative", negative_count)

        #  Generate a basic summary
        overall_sentiment = "Mixed"
        if positive_count > negative_count and positive_count > neutral_count:
            overall_sentiment = "Mostly Positive"
        elif negative_count > positive_count and negative_count > neutral_count:
            overall_sentiment = "Mostly Negative"
        elif neutral_count > positive_count and neutral_count > negative_count:
            overall_sentiment = "Mostly Neutral"


        summary_text = f"Overall viewers felt **{overall_sentiment}** about this video. "

        st.write(summary_text)

        #  Step 4: Sentiment Distribution Charts
        st.markdown(" Sentiment Distribution")
        sentiment_counts = df['sentiment'].value_counts()

        # Define custom colors for the pie chart
        colors = {'positive': 'green', 'negative': 'red', 'neutral': 'gray'}
        # Ensure the order of colors matches the order of sentiments in sentiment_counts.index
        ordered_colors = [colors[s] for s in sentiment_counts.index]

        fig, ax = plt.subplots()
        wedges, texts, autotexts = ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=ordered_colors, textprops={'color': 'white'})

        # Make the percentage labels more visible
        for autotext in autotexts:
            autotext.set_color('white')
            autotext.set_fontsize(12)
        for text in texts:
            text.set_fontsize(10)

        ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
        st.pyplot(fig)

        #  Step 5: Word Cloud / Keywords
        st.markdown("# Common Keywords")
        # Ensure there are comments to generate a word cloud
        if not df['comment'].empty:
            text = " ".join(df['comment'])
            if text: # Check if text is not empty after joining
                wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
                fig_wc, ax_wc = plt.subplots(figsize=(10,5)) # Use separate fig, ax for wordcloud
                ax_wc.imshow(wordcloud, interpolation='bilinear')
                ax_wc.axis("off")
                st.pyplot(fig_wc)
            else:
                st.info("No relevant comments to generate a word cloud.")
        else:
            st.info("No comments available to generate a word cloud.")


        #  Step 6: Sample Comments
        st.markdown(" Sample Comments")
        for i, row in df.head(10).iterrows(): # Display top 10 sample comments
            sentiment_color = {"positive":"üü¢", "negative":"üî¥", "neutral":"üü°"}.get(row['sentiment'], "‚ö™") # Default to white circle if sentiment not found
            st.write(f"{sentiment_color} {row['comment']}")

        #  Download Section
        st.download_button(
            label="üíæ Download Results as CSV",
            data=df[['comment', 'sentiment', 'compound_score']].to_csv(index=False).encode('utf-8'),
            file_name='sentiment_results.csv',
            mime='text/csv'
        )
    else:
        st.warning("No comments were fetched or analyzed for the provided URL.")

Writing app.py


In [3]:
import os
from pyngrok import ngrok
from IPython.display import display, Markdown

# Start ngrok tunnel and launch Streamlit

# Terminate any existing ngrok tunnels
ngrok.kill()

# Get ngrok authentication token (replace 'YOUR_NGROK_AUTH_TOKEN' with your actual token)
# You can get an auth token from https://ngrok.com/signup
# It's recommended to store your ngrok token in Colab secrets.
# Click the 'üîë' icon on the left panel, add a new secret named 'NGROK_AUTH_TOKEN' and paste your token.

NGROK_AUTH_TOKEN = os.environ.get("NGROK_AUTH_TOKEN", "36KSz5D47d8OFL50NyJAGErrlY7_6Ssx7WvAgSNd2GhtbdaKK")

if NGROK_AUTH_TOKEN =="YOUR_NGROK_AUTH_TOKEN":
    display(Markdown("**Please set your ngrok auth token in Colab secrets or replace 'YOUR_NGROK_AUTH_TOKEN' in the code.**"))
else:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

    # Start a ngrok tunnel for Streamlit (default port is 8501)
    public_url = ngrok.connect(8501)
    display(Markdown(f"Your Streamlit app is live at: {public_url}"))

    # Run the Streamlit app in the background using 'python -m streamlit'
    !python -m streamlit run app.py &



Your Streamlit app is live at: NgrokTunnel: "https://egal-overstoutly-jayla.ngrok-free.dev" -> "http://localhost:8501"


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.185.168.191:8501[0m
[0m
[34m  Stopping...[0m
