In [None]:
!pip install transformers datasets torch
!pip install huggingface_hub


In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from google.colab import drive
import os

drive.mount('/content/drive')

model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
save_directory = "/content/drive/MyDrive/sentiment_model/"

In [19]:
def setup_sentiment_model():
    """
    Set up and save the sentiment analysis model and tokenizer
    Returns the paths for model and tokenizer
    """
    # Create directory if it doesn't exist
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)

    # Load pre-trained model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Save model and tokenizer
    model.save_pretrained(save_directory + "model")
    tokenizer.save_pretrained(save_directory + "tokenizer")

    # Get absolute paths
    model_path = os.path.abspath(save_directory + "model")
    tokenizer_path = os.path.abspath(save_directory + "tokenizer")

    return model_path, tokenizer_path

In [27]:
def batch_test_sentiment(texts, model, tokenizer):
    """
    Analyze sentiment for multiple texts with confidence scores
    Args:
        texts: List of strings to analyze
        model: Loaded sentiment model
        tokenizer: Loaded tokenizer
    Returns:
        List of dictionaries containing sentiment and confidence for each text
    """
    results = []

    for text in texts:
        # Tokenize and get prediction
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)

        with torch.no_grad():
            outputs = model(**inputs)
            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)

            # Get highest probability and its index
            confidence, prediction = torch.max(probabilities, dim=1)
            rating = prediction.item() + 1
            confidence_score = confidence.item() * 100

            # Map ratings to sentiments
            sentiment_map = {
                1: "Very Negative",
                2: "Negative",
                3: "Neutral",
                4: "Positive",
                5: "Very Positive"
            }

            results.append({
                'text': text,
                'sentiment': sentiment_map[rating],
                'confidence': round(confidence_score, 2),
                'rating': rating
            })

    return results


In [28]:
test_texts = [
    "I hated the food but loved the ambience",
    "i really really loved your posts, i fell in love with your vibe cuz u speak very lovingly. lots of love love love",
    "This is the worst experience I've ever had.",
    "The product has some good features but also some drawbacks.",
]


results = batch_test_sentiment(test_texts, model, tokenizer)

for result in results:
    print("\nText:", result['text'])
    print(f"Sentiment: {result['sentiment']} (Rating: {result['rating']}/5)")
    print(f"Confidence: {result['confidence']}%")


Text: I hated the food but loved the ambience
Sentiment: Positive (Rating: 4/5)
Confidence: 34.08%

Text: i really really loved your posts, i fell in love with your vibe cuz u speak very lovingly. lots of love love love
Sentiment: Very Positive (Rating: 5/5)
Confidence: 86.09%

Text: This is the worst experience I've ever had.
Sentiment: Very Negative (Rating: 1/5)
Confidence: 93.8%

Text: The product has some good features but also some drawbacks.
Sentiment: Neutral (Rating: 3/5)
Confidence: 66.9%


In [None]:
model_path, tokenizer_path = setup_sentiment_model()
print(f"Model path: {model_path}")
print(f"Tokenizer path: {tokenizer_path}")