In [1]:
!pip install gradio transformers google-api-python-client --quiet

In [2]:
import gradio as gr
from transformers import pipeline
from googleapiclient.discovery import build
from google.colab import userdata
import re
import time

# Load API key securely from Colab
API_KEY = userdata.get('YOUTUBE_API_KEY')

# Load zero-shot classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Labels to classify comments into
LABELS = [
    "Question", "Appreciation", "Complaint",
    "Suggestion", "Joke", "Spam", "Toxic", "Story"
]

# Extract YouTube video ID from URL
def get_video_id(url):
    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(pattern, url)
    return match.group(1) if match else None

# Fetch top-level YouTube comments
def get_comments(video_id, max_results=20):
    youtube = build("youtube", "v3", developerKey=API_KEY)
    comments = []

    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=min(max_results, 100),
        textFormat="plainText"
    )

    while request and len(comments) < max_results:
        response = request.execute()
        for item in response["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
            if len(comments) >= max_results:
                break
        request = youtube.commentThreads().list_next(request, response)

    return comments

# Generator function to yield results dynamically
def classify_comments_live(youtube_url):
    video_id = get_video_id(youtube_url)
    if not video_id:
        yield "❌ Invalid YouTube URL", []
        return

    try:
        comments = get_comments(video_id)
        table = []  # ✅ No header row in data
        yield f"📥 Fetched {len(comments)} comments. Starting classification...", table

        for i, comment in enumerate(comments):
            result = classifier(comment, candidate_labels=LABELS, multi_label=True)
            top_label = result["labels"][0]
            confidence = round(result["scores"][0], 2)
            table.append([comment, top_label, confidence])
            yield f"✅ Classified comment {i+1} / {len(comments)}", table
            time.sleep(0.1)

        yield "🎉 All comments classified!", table

    except Exception as e:
        yield f"❌ Error: {str(e)}", []

# Gradio App
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🧠 YouTube Comment Classifier with Zero-Shot Learning (Live)")
    gr.Markdown("Paste a YouTube video URL. We'll classify the top 20 comments dynamically into intent-style labels.")

    youtube_url = gr.Textbox(label="🔗 YouTube Video URL")
    classify_btn = gr.Button("🚀 Classify Comments")

    status = gr.Textbox(label="Status", interactive=False)
    result_table = gr.Dataframe(
        headers=["Comment", "Top Label", "Confidence"],
        row_count=(1, "dynamic"),
        col_count=(3, "fixed"),
        wrap=True
    )

    classify_btn.click(fn=classify_comments_live, inputs=[youtube_url], outputs=[status, result_table])

app.launch()


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f0206223bd102e14cd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


