# Text Sentiment Analysis (DistilBERT)

In [1]:
from transformers import pipeline
import pandas as pd
from tqdm import tqdm  # For progress bar

# Load CSV data
df = pd.read_csv("cleaned_reviews.csv")

# Initialize Hugging Face sentiment pipeline (on CPU)
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    device=-1  # Use CPU to avoid GPU issues
)

# Prepare text column
texts = df['full_review'].astype(str).fillna('').tolist()

# Analyze in batches with progress bar
batch_size = 32
labels = []
scores = []

for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing Sentiments"):
    batch = texts[i:i+batch_size]
    predictions = sentiment_pipeline(batch)

    for pred, review in zip(predictions, batch):
        if review.strip():
            labels.append(pred['label'])
            scores.append(round(pred['score'], 4))  # Keep 4 decimals for confidence
        else:
            labels.append('NEUTRAL')
            scores.append(None)

# Add predictions to the DataFrame
df['bert_sentiment'] = labels
df['confidence'] = scores

# Save the updated data to CSV
df.to_csv("reviews_with_sentiment.csv", index=False)


2025-04-23 16:32:02.593245: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-23 16:32:02.630171: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9373] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-23 16:32:02.630204: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-23 16:32:02.631145: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1534] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-23 16:32:02.636875: I tensorflow/core/platform/cpu_feature_guar