<a href="https://colab.research.google.com/github/Uttumon/Big_Data_Analytics/blob/main/BDA2_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
!pip install transformers pandas

from transformers import pipeline
import pandas as pd
import time

# Load Sentiment Analysis Model
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# Sample Data (excluding NEUTRAL label to match model's capabilities)
data = {
    "review": [
        "I love this product! It's amazing.",
        "The food was terrible and overpriced.",
        "Fantastic movie! Highly recommend.",
        "The service was slow and unprofessional."
    ],
    "label": [1, -1, 1, -1]  # Only POSITIVE (1) and NEGATIVE (-1)
}

# Save to CSV
df = pd.DataFrame(data)
df.to_csv("big_data_reviews.csv", index=False)
print("Dataset saved as big_data_reviews.csv")

# Evaluation Function
def evaluate_huggingface(data, sample_size=1000):
    start_time = time.time()
    correct = 0
    total = min(sample_size, len(data))

    for i, row in data[:total].iterrows():
        text = row['review']
        expected_sentiment = row['label']  # 1=Positive, -1=Negative

        try:
            response = sentiment_pipeline(text)
            predicted_label = response[0]['label']  # 'POSITIVE' or 'NEGATIVE'
            predicted_value = {'NEGATIVE': -1, 'POSITIVE': 1}.get(predicted_label)

            if predicted_value == expected_sentiment:
                correct += 1
        except Exception as e:
            print(f"Error processing review [{i}]: {e}")

    accuracy = correct / total
    execution_time = time.time() - start_time
    return {'accuracy': accuracy, 'execution_time': execution_time}

# Load dataset
data = pd.read_csv("big_data_reviews.csv")

# Run evaluation
results = evaluate_huggingface(data)
print(f"Hugging Face Model Accuracy: {results['accuracy']:.2f}, Execution Time: {results['execution_time']:.2f} seconds")




Device set to use cpu


Dataset saved as big_data_reviews.csv
Hugging Face Model Accuracy: 1.00, Execution Time: 1.39 seconds
