In [1]:
!pip install transformers torch datasets scikit-learn matplotlib seaborn wordcloud

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curan

In [2]:
!pip install --upgrade transformers

Collecting transformers
  Downloading transformers-4.53.1-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.53.1-py3-none-any.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m83.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
Successfully installed transformers-4.53.1


In [3]:
# ✅ Enhanced and Cleaned-Up BERT Sentiment Analysis Script

import os
os.environ["WANDB_DISABLED"] = "true"  # Disable Weights & Biases logging

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import torch
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    pipeline
)
from datasets import Dataset as HFDataset
import warnings
warnings.filterwarnings('ignore')


class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.long)
        }


class BERTSentimentAnalyzer:
    def __init__(self, model_name='bert-base-uncased'):
        self.model_name = model_name
        self.tokenizer = None
        self.model = None
        self.label_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
        self.reverse_label_map = {v: k for k, v in self.label_map.items()}

    def load_model_and_tokenizer(self, num_labels=3):
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            self.model_name,
            num_labels=num_labels,
            problem_type="single_label_classification"
        )
        return self.model, self.tokenizer

    def create_sample_dataset(self, size=1500):
        positive_texts = [
            "I absolutely love this product!", "Fantastic experience overall.",
            "Great service and super quality!", "Exceeded all my expectations.",
            "Highly recommend this to everyone!", "The best I’ve ever used.",
            "Wonderful product, totally worth it!", "Customer support was amazing.",
            "Top-notch quality and fast shipping!", "This changed my life for the better."
        ]
        neutral_texts = [
            "It’s okay, nothing special.", "Average product, works fine.",
            "The experience was neutral.", "It's just what I expected.",
            "Meh. Not bad, not great.", "Fine for the price.",
            "Satisfactory, nothing more.", "It does the job.",
            "Meets basic expectations.", "Neither good nor bad."
        ]
        negative_texts = [
            "Terrible quality, very disappointed.", "I hate it, worst ever.",
            "Awful experience, waste of money.", "Broken right out of the box.",
            "Customer service was unhelpful.", "Would not buy again.",
            "Extremely dissatisfied.", "Not worth the price.",
            "Poor design and performance.", "Horrible, absolutely avoid this."
        ]

        texts, labels = [], []
        samples_per_class = size // 3
        for i in range(samples_per_class):
            texts.append(positive_texts[i % len(positive_texts)])
            labels.append(2)
            texts.append(neutral_texts[i % len(neutral_texts)])
            labels.append(1)
            texts.append(negative_texts[i % len(negative_texts)])
            labels.append(0)

        return pd.DataFrame({
            'text': texts,
            'label': labels,
            'sentiment': [self.label_map[label] for label in labels]
        })

    def preprocess_data(self, df):
        df['text'] = df['text'].str.lower().str.replace(r'[^\w\s]', '', regex=True)
        train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df['label'])
        val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['label'])
        return train_df, val_df, test_df

    def tokenize_data(self, df):
        tokenized = self.tokenizer(
            df['text'].tolist(),
            truncation=True,
            padding=True,
            max_length=128,
            return_tensors='pt'
        )
        return HFDataset.from_dict({
            'input_ids': tokenized['input_ids'],
            'attention_mask': tokenized['attention_mask'],
            'labels': df['label'].tolist()
        })

    def setup_training(self, train_dataset, val_dataset, output_dir='./bert_sentiment_model'):
        training_args = TrainingArguments(
            output_dir=output_dir,
            run_name="bert-sentiment-run",
            num_train_epochs=3,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=100,
            do_eval=True,
            save_steps=500,
            save_total_limit=1,
            remove_unused_columns=False
        )
        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            tokenizer=self.tokenizer,
        )
        return trainer

    def train_model(self, trainer):
        trainer.train()
        trainer.save_model()
        return trainer

    def evaluate_model(self, trainer, test_dataset, test_df):
        predictions = trainer.predict(test_dataset)
        y_pred = np.argmax(predictions.predictions, axis=1)
        y_true = test_df['label'].values
        accuracy = accuracy_score(y_true, y_pred)
        report = classification_report(y_true, y_pred, target_names=['negative', 'neutral', 'positive'], output_dict=True)
        print("\nEvaluation Results")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Macro F1: {report['macro avg']['f1-score']:.4f}")
        print(f"Weighted F1: {report['weighted avg']['f1-score']:.4f}")
        return accuracy, report, y_pred, y_true

    def create_inference_pipeline(self):
        return pipeline(
            "sentiment-analysis",
            model=self.model,
            tokenizer=self.tokenizer,
            return_all_scores=True
        )

    def test_inference(self, pipeline_model, test_texts):
        for text in test_texts:
            results = pipeline_model(text)
            top = max(results[0], key=lambda x: x['score'])
            print(f"\nText: {text}\nPrediction: {top['label']} ({top['score']:.4f})")


def main():
    analyzer = BERTSentimentAnalyzer()
    model, tokenizer = analyzer.load_model_and_tokenizer()
    df = analyzer.create_sample_dataset(size=600)
    train_df, val_df, test_df = analyzer.preprocess_data(df)
    train_dataset = analyzer.tokenize_data(train_df)
    val_dataset = analyzer.tokenize_data(val_df)
    test_dataset = analyzer.tokenize_data(test_df)
    trainer = analyzer.setup_training(train_dataset, val_dataset)
    trainer = analyzer.train_model(trainer)
    accuracy, report, y_pred, y_true = analyzer.evaluate_model(trainer, test_dataset, test_df)
    pipeline_model = analyzer.create_inference_pipeline()
    test_texts = [
        "I absolutely love this product!",
        "This is okay, nothing special.",
        "I hate this, it's terrible!"
    ]
    analyzer.test_inference(pipeline_model, test_texts)


if __name__ == "__main__":
    main()


2025-07-10 05:33:09.076786: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752125589.449024      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752125589.556476      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss


Device set to use cuda:0



Evaluation Results
Accuracy: 0.5667
Macro F1: 0.5159
Weighted F1: 0.5159

Text: I absolutely love this product!
Prediction: LABEL_1 (0.3891)

Text: This is okay, nothing special.
Prediction: LABEL_1 (0.4359)

Text: I hate this, it's terrible!
Prediction: LABEL_1 (0.3977)
