In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
from tabulate import tabulate

In [2]:
base_model_name = "AmaanP314/youtube-xlm-roberta-base-sentiment-multilingual"
finetuned_model_name = "gajula21/youtube-sentiment-model-telugu"

In [3]:
base_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_name)

finetuned_tokenizer = AutoTokenizer.from_pretrained(finetuned_model_name)
finetuned_model = AutoModelForSequenceClassification.from_pretrained(finetuned_model_name)

In [9]:
labels = {0: "Negative", 1: "Neutral", 2: "Positive"}

comments_telugu = [
    "‡∞ö‡∞æ‡∞≤‡∞æ ‡∞Æ‡∞Ç‡∞ö‡∞ø ‡∞µ‡±Ä‡∞°‡∞ø‡∞Ø‡±ã! ‡∞á‡∞®‡±ç‡∞´‡∞∞‡±ç‡∞Æ‡±á‡∞ü‡∞ø‡∞µ‡±ç ‡∞ó‡∞æ ‡∞â‡∞Ç‡∞¶‡∞ø.",  # Positive
    "‡∞∏‡±Ç‡∞™‡∞∞‡±ç ‡∞ï‡∞Ç‡∞ü‡±Ü‡∞Ç‡∞ü‡±ç! ‡∞á‡∞≤‡∞æ‡∞Ç‡∞ü‡∞ø ‡∞µ‡±Ä‡∞°‡∞ø‡∞Ø‡±ã‡∞≤‡±Å ‡∞Æ‡∞∞‡∞ø‡∞®‡±ç‡∞®‡∞ø ‡∞ö‡±á‡∞Ø‡∞Ç‡∞°‡∞ø.",  # Positive
    "‡∞Æ‡±Ä‡∞∞‡±Å ‡∞ö‡±Ü‡∞™‡±ç‡∞™‡∞ø‡∞® ‡∞µ‡∞ø‡∞ß‡∞æ‡∞®‡∞Ç ‡∞ö‡∞æ‡∞≤‡∞æ ‡∞¨‡∞æ‡∞ó‡±Å‡∞Ç‡∞¶‡∞ø, ‡∞Ö‡∞∞‡±ç‡∞•‡∞Ç ‡∞ö‡±á‡∞∏‡±Å‡∞ï‡±ã‡∞µ‡∞°‡∞æ‡∞®‡∞ø‡∞ï‡∞ø ‡∞∏‡±Å‡∞≤‡∞≠‡∞Ç‡∞ó‡∞æ ‡∞â‡∞Ç‡∞¶‡∞ø.",  # Positive
    "‡∞®‡∞µ‡±ç‡∞µ‡±Å ‡∞§‡±Ü‡∞™‡±ç‡∞™‡∞ø‡∞Ç‡∞ö‡∞ø‡∞Ç‡∞¶‡∞ø ‡∞à ‡∞µ‡±Ä‡∞°‡∞ø‡∞Ø‡±ã üòÇ",  # Positive
    "‡∞∏‡∞Ç‡∞ó‡±Ä‡∞§‡∞Ç ‡∞Ö‡∞¶‡±ç‡∞≠‡±Å‡∞§‡∞Ç‡∞ó‡∞æ ‡∞â‡∞Ç‡∞¶‡∞ø!",  # Positive
    "‡∞ß‡∞®‡±ç‡∞Ø‡∞µ‡∞æ‡∞¶‡∞æ‡∞≤‡±Å.",  # Neutral
    "‡∞ì‡∞ï‡±á ‡∞µ‡±Ä‡∞°‡∞ø‡∞Ø‡±ã.",  # Neutral
    "‡∞®‡±á‡∞®‡±Å ‡∞§‡∞∞‡±ç‡∞µ‡∞æ‡∞§ ‡∞ö‡±Ç‡∞∏‡±ç‡∞§‡∞æ‡∞®‡±Å.",  # Neutral
    "‡∞á‡∞¶‡∞ø ‡∞é‡∞≤‡∞æ ‡∞™‡∞®‡∞ø ‡∞ö‡±á‡∞∏‡±ç‡∞§‡±Å‡∞Ç‡∞¶‡∞ø?",  # Neutral
    "‡∞∏‡∞Æ‡∞Ø‡∞Ç ‡∞é‡∞Ç‡∞§ ‡∞™‡∞ü‡±ç‡∞ü‡∞ø‡∞Ç‡∞¶‡∞ø?",  # Neutral
    "‡∞á‡∞¶‡∞ø ‡∞ö‡∞æ‡∞≤‡∞æ ‡∞®‡∞ø‡∞¶‡∞æ‡∞®‡∞Ç‡∞ó‡∞æ ‡∞â‡∞Ç‡∞¶‡∞ø.",  # Negative
    "‡∞®‡∞æ‡∞ï‡±Å ‡∞®‡∞ö‡±ç‡∞ö‡∞≤‡±á‡∞¶‡±Å.",  # Negative
    "‡∞∏‡±å‡∞Ç‡∞°‡±ç ‡∞∏‡∞∞‡∞ø‡∞ó‡∞æ ‡∞≤‡±á‡∞¶‡±Å.",  # Negative
    "‡∞ö‡∞æ‡∞≤‡∞æ ‡∞§‡∞™‡±ç‡∞™‡±Å‡∞≤‡±Å ‡∞â‡∞®‡±ç‡∞®‡∞æ‡∞Ø‡∞ø.",  # Negative
    "‡∞á‡∞¶‡∞ø ‡∞∏‡∞Æ‡∞Ø‡∞Ç ‡∞µ‡±É‡∞ß‡∞æ.",  # Negative
    "‡∞ö‡±Ü‡∞§‡±ç‡∞§ ‡∞µ‡±Ä‡∞°‡∞ø‡∞Ø‡±ã!",  # Negative
]

comments_english = [
    "Absolutely loved this! Thanks for sharing.",  # Positive
    "This is so helpful, I learned a lot.",  # Positive
    "The editing is fantastic!",  # Positive
    "Made my day! üòä",  # Positive
    "Awesome content, keep it up!",  # Positive
    "Thanks.",  # Neutral
    "Interesting video.",  # Neutral
    "I'll check this out later.",  # Neutral
    "How long did this take to make?",  # Neutral
    "What software did you use?",  # Neutral
    "This was boring.",  # Negative
    "The quality is really bad.",  # Negative
    "I didn't understand anything.",  # Negative
    "Too long and repetitive.",  # Negative
    "Don't waste your time watching this.",  # Negative
    "This is terrible!",  # Negative
    "Why is it so laggy?"  # Negative (implies a problem)
]

In [10]:
def predict_sentiment(model, tokenizer, texts, label_map):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    preds = torch.argmax(outputs.logits, dim=1)
    return [label_map[p.item()] for p in preds]

In [11]:
base_preds_telugu = predict_sentiment(base_model, base_tokenizer, comments_telugu, labels)
finetuned_preds_telugu = predict_sentiment(finetuned_model, finetuned_tokenizer, comments_telugu, labels)

base_preds_english = predict_sentiment(base_model, base_tokenizer, comments_english, labels)
finetuned_preds_english = predict_sentiment(finetuned_model, finetuned_tokenizer, comments_english, labels)

In [12]:
def build_comparison_df(comments, base_preds, finetuned_preds):
    return pd.DataFrame({
        "Comment": comments,
        "Base Model Prediction": base_preds,
        "Fine-Tuned Model Prediction": finetuned_preds
    })

df_telugu = build_comparison_df(comments_telugu, base_preds_telugu, finetuned_preds_telugu)
df_english = build_comparison_df(comments_english, base_preds_english, finetuned_preds_english)

In [None]:
print("\nTelugu Comments Sentiment Comparison:\n")
print(tabulate(df_telugu, headers='keys', tablefmt='grid', showindex=False))

print("\nEnglish Comments Sentiment Comparison:\n")
print(tabulate(df_english, headers='keys', tablefmt='grid', showindex=False))


Telugu Comments Sentiment Comparison:

+------------------------------------------+-------------------------+-------------------------------+
| Comment                                  | Base Model Prediction   | Fine-Tuned Model Prediction   |
| ‡∞ö‡∞æ‡∞≤‡∞æ ‡∞Æ‡∞Ç‡∞ö‡∞ø ‡∞µ‡±Ä‡∞°‡∞ø‡∞Ø‡±ã! ‡∞á‡∞®‡±ç‡∞´‡∞∞‡±ç‡∞Æ‡±á‡∞ü‡∞ø‡∞µ‡±ç ‡∞ó‡∞æ ‡∞â‡∞Ç‡∞¶‡∞ø.                 | Positive                | Positive                      |
+------------------------------------------+-------------------------+-------------------------------+
| ‡∞∏‡±Ç‡∞™‡∞∞‡±ç ‡∞ï‡∞Ç‡∞ü‡±Ü‡∞Ç‡∞ü‡±ç! ‡∞á‡∞≤‡∞æ‡∞Ç‡∞ü‡∞ø ‡∞µ‡±Ä‡∞°‡∞ø‡∞Ø‡±ã‡∞≤‡±Å ‡∞Æ‡∞∞‡∞ø‡∞®‡±ç‡∞®‡∞ø ‡∞ö‡±á‡∞Ø‡∞Ç‡∞°‡∞ø.              | Positive                | Positive                      |
+------------------------------------------+-------------------------+-------------------------------+
| ‡∞Æ‡±Ä‡∞∞‡±Å ‡∞ö‡±Ü‡∞™‡±ç‡∞™‡∞ø‡∞® ‡∞µ‡∞ø‡∞ß‡∞æ‡∞®‡∞Ç ‡∞ö‡∞æ‡∞≤‡∞æ ‡∞¨‡∞æ‡∞ó‡±Å‡∞Ç‡∞¶‡∞ø, ‡∞Ö‡∞∞‡±ç‡∞•‡∞Ç ‡∞ö‡±á‡∞∏‡±Å‡∞ï‡±ã‡∞µ‡∞°‡∞æ‡∞®‡∞ø‡∞ï‡∞ø ‡∞∏‡±Å‡∞≤‡∞≠‡∞Ç‡∞ó