BERT Modeling

In [2]:
# imports
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup

In [None]:
np.random.seed(123)
torch.manual_seed(123)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(123)

class Config:
    MODEL_NAME = 'bert-base-uncased'
    MAX_LENGTH = 128
    BATCH_SIZE = 16
    EPOCHS = 4
    LEARNING_RATE = 2e-5
    WARMUP_STEPS = 0
    WEIGHT_DECAY = 0.01
    
    TRAIN_SIZE = 0.70
    VAL_SIZE = 0.15
    TEST_SIZE = 0.15
    
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
df = pd.read_csv('data/cleaned_df.csv')

# First split: separate test set
train_val_df, test_df = train_test_split(
    df, 
    test_size=Config.TEST_SIZE, 
    random_state=123,
    stratify=df['label']
)


# Second split: separate train and validation
val_size_adjusted = Config.VAL_SIZE / (Config.TRAIN_SIZE + Config.VAL_SIZE)
train_df, val_df = train_test_split(
    train_val_df,
    test_size=val_size_adjusted,
    random_state=123,
    stratify=train_val_df['label']
)

In [None]:
# Initialize BERT tokenizer
tokenizer = BertTokenizer.from_pretrained(Config.MODEL_NAME)
print(f"\nTokenizer loaded: {Config.MODEL_NAME}")
print(f"Vocabulary size: {tokenizer.vocab_size}")

In [None]:
class SentimentDataset(Dataset):
    """Custom Dataset for BERT sentiment analysis"""
    
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # Tokenize and encode
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

In [None]:
train_dataset = SentimentDataset(
    texts=train_df['text_bert'].values,
    labels=train_df['label'].values,
    tokenizer=tokenizer,
    max_length=Config.MAX_LENGTH
)

val_dataset = SentimentDataset(
    texts=val_df['text_bert'].values,
    labels=val_df['label'].values,
    tokenizer=tokenizer,
    max_length=Config.MAX_LENGTH
)

test_dataset = SentimentDataset(
    texts=test_df['text_bert'].values,
    labels=test_df['label'].values,
    tokenizer=tokenizer,
    max_length=Config.MAX_LENGTH
)

In [None]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)

print(f"\nDataLoaders created:")
print(f"  Train batches: {len(train_loader)}")
print(f"  Validation batches: {len(val_loader)}")
print(f"  Test batches: {len(test_loader)}")

In [None]:
# Model Initialization
model = BertForSequenceClassification.from_pretrained(
    Config.MODEL_NAME,
    num_labels=2,  # Binary classification
    output_attentions=False,
    output_hidden_states=False
)

model = model.to(Config.DEVICE)
print(f"\nModel loaded and moved to {Config.DEVICE}")
print(f"Number of parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")


In [None]:
# Optimizer
optimizer = AdamW(
    model.parameters(),
    lr=Config.LEARNING_RATE,
    eps=1e-8,
    weight_decay=Config.WEIGHT_DECAY
)

# Learning rate scheduler
total_steps = len(train_loader) * Config.EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=Config.WARMUP_STEPS,
    num_training_steps=total_steps
)

In [None]:
# Training and Evaluation functions

def train_epoch(model, dataloader, optimizer, scheduler, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    predictions, true_labels = [], []
    
    progress_bar = tqdm(dataloader, desc="Training")
    for batch in progress_bar:
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        # Clear gradients
        model.zero_grad()
        
        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        logits = outputs.logits
        
        # Backward pass
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
        
        # Update weights
        optimizer.step()
        scheduler.step()
        
        # Accumulate metrics
        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
        
        # Update progress bar
        progress_bar.set_postfix({'loss': loss.item()})
    
    avg_loss = total_loss / len(dataloader)
    accuracy = accuracy_score(true_labels, predictions)
    
    return avg_loss, accuracy

def evaluate(model, dataloader, device):
    """Evaluate the model"""
    model.eval()
    total_loss = 0
    predictions, true_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            
            loss = outputs.loss
            logits = outputs.logits
            
            total_loss += loss.item()
            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(dataloader)
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(
        true_labels, predictions, average='binary'
    )
    
    return avg_loss, accuracy, precision, recall, f1, predictions, true_labels

In [None]:
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': [],
    'val_precision': [],
    'val_recall': [],
    'val_f1': []
}

best_val_acc = 0
best_model_state = None

for epoch in range(Config.EPOCHS):
    print(f"Epoch {epoch + 1}/{Config.EPOCHS}")
    
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, optimizer, scheduler, Config.DEVICE)
    
    # Validate
    val_loss, val_acc, val_precision, val_recall, val_f1, _, _ = evaluate(
        model, val_loader, Config.DEVICE
    )
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['val_precision'].append(val_precision)
    history['val_recall'].append(val_recall)
    history['val_f1'].append(val_f1)
    
    # Print epoch results
    print(f"\nResults:")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
    print(f"  Val Precision: {val_precision:.4f} | Val Recall: {val_recall:.4f} | Val F1: {val_f1:.4f}")
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = model.state_dict().copy()
        print(f"Validation Accuracy: {val_acc:.4f})")

# Load best model for testing
model.load_state_dict(best_model_state)
print(f"Training completed! Best validation accuracy: {best_val_acc:.4f}")

In [None]:
# Testing and Evaluation

test_loss, test_acc, test_precision, test_recall, test_f1, test_preds, test_labels = evaluate(
    model, test_loader, Config.DEVICE
)

print(f"\nDetailed Classification Report:")
print(classification_report(test_labels, test_preds, target_names=['Negative (Cons)', 'Positive (Pros)']))


### Visualizations

In [None]:
# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Plot 1: Training History - Loss
axes[0, 0].plot(history['train_loss'], label='Train Loss', marker='o')
axes[0, 0].plot(history['val_loss'], label='Validation Loss', marker='s')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].set_title('Training and Validation Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Training History - Accuracy
axes[0, 1].plot(history['train_acc'], label='Train Accuracy', marker='o')
axes[0, 1].plot(history['val_acc'], label='Validation Accuracy', marker='s')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy')
axes[0, 1].set_title('Training and Validation Accuracy')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Confusion Matrix
cm = confusion_matrix(test_labels, test_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1, 0],
            xticklabels=['Negative', 'Positive'],
            yticklabels=['Negative', 'Positive'])
axes[1, 0].set_xlabel('Predicted')
axes[1, 0].set_ylabel('Actual')
axes[1, 0].set_title('Confusion Matrix (Test Set)')

# Plot 4: Metrics Comparison
metrics_names = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
metrics_values = [test_acc, test_precision, test_recall, test_f1]
bars = axes[1, 1].bar(metrics_names, metrics_values, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
axes[1, 1].set_ylabel('Score')
axes[1, 1].set_title('Test Set Performance Metrics')
axes[1, 1].set_ylim([0, 1])
axes[1, 1].grid(True, alpha=0.3, axis='y')

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    axes[1, 1].text(bar.get_x() + bar.get_width()/2., height,
                   f'{height:.3f}',
                   ha='center', va='bottom')

plt.tight_layout()
plt.savefig('bert_sentiment_analysis_results.png', dpi=300, bbox_inches='tight')
print("\nVisualization saved as 'bert_sentiment_analysis_results.png'")

plt.show()


In [1]:
# 11. PREDICTION FUNCTION

def predict_sentiment(text, model, tokenizer, device, max_length=128):
    """
    Predict sentiment for a single text
    
    Args:
        text: Input text string
        model: Trained BERT model
        tokenizer: BERT tokenizer
        device: torch device
        max_length: Maximum sequence length
    
    Returns:
        prediction: 0 (negative) or 1 (positive)
        probability: Confidence score
    """
    model.eval()
    
    # Tokenize
    encoding = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )
    
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    # Predict
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1)
        prediction = torch.argmax(probs, dim=1).item()
        confidence = probs[0][prediction].item()
    
    return prediction, confidence

# Example predictions
print("\n" + "=" * 70)
print("EXAMPLE PREDICTIONS")
print("=" * 70)

example_texts = [
    "This camera is absolutely amazing! Great quality and easy to use.",
    "Terrible product, broke after one week. Very disappointed.",
    "It's okay, nothing special but does the job.",
]

for i, text in enumerate(example_texts, 1):
    pred, conf = predict_sentiment(text, model, tokenizer, Config.DEVICE, Config.MAX_LENGTH)
    sentiment = "Positive (Pros)" if pred == 1 else "Negative (Cons)"
    print(f"\n{i}. Text: {text}")
    print(f"   Prediction: {sentiment} (Confidence: {conf:.2%})")


EXAMPLE PREDICTIONS


NameError: name 'model' is not defined