In [None]:
# ============================================
# CELL 1: SETUP & GPU CHECK
# ============================================

!pip install transformers accelerate -q

import torch
import os

print('='*60)
print('üñ•Ô∏è  SYSTEM INFO')
print('='*60)
print(f'PyTorch: {torch.__version__}')

if torch.cuda.is_available():
    n_gpu = torch.cuda.device_count()
    print(f'‚úÖ GPU Available: {n_gpu} GPU(s)')
    for i in range(n_gpu):
        gpu_name = torch.cuda.get_device_name(i)
        gpu_mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
        print(f'   GPU {i}: {gpu_name} ({gpu_mem:.1f} GB)')
else:
    print('‚ö†Ô∏è GPU not available!')

# List input files
print('\nüìÅ Input files:')
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(f'   {os.path.join(dirname, filename)}')

In [None]:
# ============================================
# CELL 2: IMPORTS & SEED
# ============================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support,
    classification_report, confusion_matrix, f1_score
)
import random
import copy
import json
from datetime import datetime
import time

warnings.filterwarnings('ignore')

# Reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'üéÆ Using device: {device}')

In [None]:
# ============================================
# CELL 3: LOAD DATA
# ============================================

# Auto-detect data file
DATA_PATH = None
search_patterns = ['gojek_reviews_5class_clean', 'gojek_reviews_5class', 'gojek']

for dirname, _, filenames in os.walk('/kaggle/input'):
    for pattern in search_patterns:
        for filename in filenames:
            if pattern in filename and filename.endswith('.csv'):
                DATA_PATH = os.path.join(dirname, filename)
                break
        if DATA_PATH:
            break
    if DATA_PATH:
        break

if DATA_PATH:
    print(f'‚úÖ Found: {DATA_PATH}')
    df = pd.read_csv(DATA_PATH)
else:
    print('‚ùå Data not found! Available files:')
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for f in filenames:
            print(f'   {os.path.join(dirname, f)}')
    raise FileNotFoundError('Please upload gojek_reviews_5class_clean.csv')

# Data overview
print('\n' + '='*60)
print('üìä DATA OVERVIEW')
print('='*60)
print(f'Total samples: {len(df):,}')
print(f'Columns: {df.columns.tolist()}')

# Check required columns
text_col = 'content_clean' if 'content_clean' in df.columns else 'content'
print(f'\nText column: {text_col}')
print(f'\nüìà Sentiment Distribution:')
print(df['sentiment'].value_counts())

# Visualize
fig, ax = plt.subplots(figsize=(10, 4))
colors = {
    'sangat_negatif': '#c0392b', 
    'negatif': '#e74c3c',
    'netral': '#f39c12',
    'positif': '#2ecc71',
    'sangat_positif': '#27ae60'
}
order = ['sangat_negatif', 'negatif', 'netral', 'positif', 'sangat_positif']
counts = df['sentiment'].value_counts().reindex(order)
bars = ax.bar(counts.index, counts.values, color=[colors.get(s, '#3498db') for s in counts.index])
ax.set_title('Sentiment Distribution (5-Class)', fontsize=14, fontweight='bold')
ax.set_ylabel('Count')
plt.xticks(rotation=15)
for bar, count in zip(bars, counts.values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 50, 
            f'{count:,}', ha='center', fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# ============================================
# CELL 4: CONFIGURATION
# ============================================

# Label mapping for 5-class
LABEL_MAP = {
    'sangat_negatif': 0, 
    'negatif': 1, 
    'netral': 2, 
    'positif': 3, 
    'sangat_positif': 4
}
LABEL_NAMES = ['sangat_negatif', 'negatif', 'netral', 'positif', 'sangat_positif']
NUM_CLASSES = 5

# Optimized config for Kaggle 2xT4 - 5-class needs more capacity
CONFIG = {
    # Model
    'model_name': 'indobenchmark/indobert-base-p1',
    'max_length': 128,
    'num_classes': NUM_CLASSES,
    
    # Training - Slightly lower LR for 5-class stability
    'batch_size': 32,
    'epochs': 25,  # More epochs for 5-class
    'learning_rate': 1.5e-5,  # Slightly lower for 5-class
    
    # Anti-Overfitting
    'dropout_rate': 0.35,
    'attention_dropout': 0.15,
    'weight_decay': 0.01,
    'label_smoothing': 0.1,
    'warmup_ratio': 0.1,
    'max_grad_norm': 1.0,
    'early_stopping_patience': 6,  # More patience for 5-class
    
    # Layer Freezing - Fewer frozen for 5-class (needs more capacity)
    'freeze_embeddings': True,
    'freeze_layers': 6,  # Freeze 0-5, train 6-11 (more trainable)
    
    # R-Drop regularization
    'use_rdrop': True,
    'rdrop_alpha': 0.5,
    
    # Data augmentation
    'augment_train': True,
    'word_dropout_prob': 0.1,
}

print('='*60)
print('‚öôÔ∏è  TRAINING CONFIGURATION (5-CLASS)')
print('='*60)
for k, v in CONFIG.items():
    print(f'  {k}: {v}')

In [None]:
# ============================================
# CELL 5: PREPARE DATA SPLITS
# ============================================

# Add label column
df['label'] = df['sentiment'].map(LABEL_MAP)

# Stratified split: 80% train, 10% val, 10% test
train_df, temp_df = train_test_split(
    df, test_size=0.2, random_state=42, stratify=df['label']
)
val_df, test_df = train_test_split(
    temp_df, test_size=0.5, random_state=42, stratify=temp_df['label']
)

print('='*60)
print('üìÇ DATA SPLITS (Stratified)')
print('='*60)
print(f'Train: {len(train_df):,} ({len(train_df)/len(df)*100:.0f}%)')
print(f'Val:   {len(val_df):,} ({len(val_df)/len(df)*100:.0f}%)')
print(f'Test:  {len(test_df):,} ({len(test_df)/len(df)*100:.0f}%)')

print(f'\nüìä Distribution per split:')
for name, split_df in [('Train', train_df), ('Val', val_df), ('Test', test_df)]:
    dist = split_df['sentiment'].value_counts()
    print(f'  {name}: {dict(dist)}')

In [None]:
# ============================================
# CELL 6: DATASET CLASS
# ============================================

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained(CONFIG['model_name'])
print(f'‚úÖ Tokenizer loaded: {CONFIG["model_name"]}')

class SentimentDataset(Dataset):
    """Dataset with optional text augmentation"""
    
    def __init__(self, texts, labels, tokenizer, max_length=128, 
                 augment=False, word_dropout_prob=0.1):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.augment = augment
        self.word_dropout_prob = word_dropout_prob
    
    def __len__(self):
        return len(self.texts)
    
    def _augment_text(self, text):
        if not self.augment or random.random() > 0.5:
            return text
        
        words = str(text).split()
        if len(words) <= 3:
            return text
        
        # Random word dropout
        if random.random() < 0.5:
            words = [w for w in words if random.random() > self.word_dropout_prob]
        # Random word swap
        elif len(words) > 2:
            idx = random.randint(0, len(words) - 2)
            words[idx], words[idx + 1] = words[idx + 1], words[idx]
        
        return ' '.join(words) if words else text
    
    def __getitem__(self, idx):
        text = self._augment_text(self.texts[idx])
        
        encoding = self.tokenizer.encode_plus(
            str(text),
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(self.labels[idx], dtype=torch.long)
        }

# Create datasets
train_dataset = SentimentDataset(
    train_df[text_col].values, train_df['label'].values, tokenizer,
    max_length=CONFIG['max_length'], augment=CONFIG['augment_train'],
    word_dropout_prob=CONFIG['word_dropout_prob']
)
val_dataset = SentimentDataset(
    val_df[text_col].values, val_df['label'].values, tokenizer,
    max_length=CONFIG['max_length'], augment=False
)
test_dataset = SentimentDataset(
    test_df[text_col].values, test_df['label'].values, tokenizer,
    max_length=CONFIG['max_length'], augment=False
)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=2, pin_memory=True)

print(f'\n‚úÖ DataLoaders created:')
print(f'  Train: {len(train_dataset):,} samples, {len(train_loader)} batches')
print(f'  Val:   {len(val_dataset):,} samples, {len(val_loader)} batches')
print(f'  Test:  {len(test_dataset):,} samples, {len(test_loader)} batches')

In [None]:
# ============================================
# CELL 7: MODEL ARCHITECTURE
# ============================================

class IndoBERTClassifier(nn.Module):
    """
    IndoBERT for 5-Class Sentiment Classification
    - More trainable layers for 5-class complexity
    - Dropout regularization
    - Hidden layer for better representation
    """
    
    def __init__(self, model_name, num_classes, dropout_rate=0.35,
                 freeze_embeddings=True, freeze_layers=6):
        super().__init__()
        
        self.bert = BertModel.from_pretrained(model_name)
        self.hidden_size = self.bert.config.hidden_size
        
        # Freeze embeddings
        if freeze_embeddings:
            for param in self.bert.embeddings.parameters():
                param.requires_grad = False
        
        # Freeze first N encoder layers
        for i in range(freeze_layers):
            for param in self.bert.encoder.layer[i].parameters():
                param.requires_grad = False
        
        # Classifier head with hidden layer for 5-class
        self.dropout1 = nn.Dropout(dropout_rate)
        self.hidden = nn.Linear(self.hidden_size, 256)
        self.dropout2 = nn.Dropout(dropout_rate * 0.5)
        self.classifier = nn.Linear(256, num_classes)
        
        # Initialize
        nn.init.xavier_uniform_(self.hidden.weight)
        nn.init.zeros_(self.hidden.bias)
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.zeros_(self.classifier.bias)
        
        print(f'‚úÖ Model initialized (5-Class)')
        print(f'   Embeddings frozen: {freeze_embeddings}')
        print(f'   Layers frozen: 0-{freeze_layers-1} (training {freeze_layers}-11)')
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = outputs.pooler_output
        
        x = self.dropout1(pooled)
        x = F.gelu(self.hidden(x))  # GELU activation
        x = self.dropout2(x)
        logits = self.classifier(x)
        return logits

# Initialize model
model = IndoBERTClassifier(
    model_name=CONFIG['model_name'],
    num_classes=CONFIG['num_classes'],
    dropout_rate=CONFIG['dropout_rate'],
    freeze_embeddings=CONFIG['freeze_embeddings'],
    freeze_layers=CONFIG['freeze_layers']
).to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'\nüìä Parameters:')
print(f'   Total: {total_params:,}')
print(f'   Trainable: {trainable_params:,} ({trainable_params/total_params*100:.1f}%)')
print(f'   Frozen: {total_params - trainable_params:,}')

In [None]:
# ============================================
# CELL 8: OPTIMIZER & SCHEDULER
# ============================================

# Loss with label smoothing
criterion = nn.CrossEntropyLoss(label_smoothing=CONFIG['label_smoothing'])

# Optimizer - only trainable parameters
no_decay = ['bias', 'LayerNorm.weight']
optimizer_params = [
    {
        'params': [p for n, p in model.named_parameters() 
                   if p.requires_grad and not any(nd in n for nd in no_decay)],
        'weight_decay': CONFIG['weight_decay']
    },
    {
        'params': [p for n, p in model.named_parameters() 
                   if p.requires_grad and any(nd in n for nd in no_decay)],
        'weight_decay': 0.0
    }
]

optimizer = AdamW(optimizer_params, lr=CONFIG['learning_rate'])

# Scheduler with warmup
total_steps = len(train_loader) * CONFIG['epochs']
warmup_steps = int(total_steps * CONFIG['warmup_ratio'])

scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
)

print('‚úÖ Optimizer & Scheduler configured')
print(f'   LR: {CONFIG["learning_rate"]}')
print(f'   Weight Decay: {CONFIG["weight_decay"]}')
print(f'   Warmup Steps: {warmup_steps}')
print(f'   Total Steps: {total_steps}')

In [None]:
# ============================================
# CELL 9: TRAINING FUNCTIONS
# ============================================

def compute_kl_loss(p, q):
    """KL divergence for R-Drop"""
    p_loss = F.kl_div(F.log_softmax(p, dim=-1), F.softmax(q, dim=-1), reduction='batchmean')
    q_loss = F.kl_div(F.log_softmax(q, dim=-1), F.softmax(p, dim=-1), reduction='batchmean')
    return (p_loss + q_loss) / 2

def train_epoch(model, loader, criterion, optimizer, scheduler, device, 
                use_rdrop=True, rdrop_alpha=0.5, max_grad_norm=1.0):
    model.train()
    total_loss = 0
    all_preds, all_labels = [], []
    
    pbar = tqdm(loader, desc='Training')
    for batch in pbar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        optimizer.zero_grad()
        
        if use_rdrop:
            # R-Drop: 2 forward passes
            logits1 = model(input_ids, attention_mask)
            logits2 = model(input_ids, attention_mask)
            
            ce_loss = (criterion(logits1, labels) + criterion(logits2, labels)) / 2
            kl_loss = compute_kl_loss(logits1, logits2)
            loss = ce_loss + rdrop_alpha * kl_loss
            
            logits = (logits1 + logits2) / 2
        else:
            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()
        
        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())
        
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    avg_loss = total_loss / len(loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return avg_loss, accuracy, f1

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for batch in loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)
            
            probs = F.softmax(logits, dim=1)
            preds = torch.argmax(logits, dim=1)
            
            total_loss += loss.item()
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
    
    avg_loss = total_loss / len(loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return avg_loss, accuracy, f1, all_preds, all_labels, all_probs

class EarlyStopping:
    def __init__(self, patience=6, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.best_model = None
        self.early_stop = False
    
    def __call__(self, score, model):
        if self.best_score is None or score > self.best_score + self.min_delta:
            self.best_score = score
            self.best_model = copy.deepcopy(model.state_dict())
            self.counter = 0
            return True
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
            return False

print('‚úÖ Training functions defined')

In [None]:
# ============================================
# CELL 10: TRAINING LOOP
# ============================================

history = {
    'train_loss': [], 'train_acc': [], 'train_f1': [],
    'val_loss': [], 'val_acc': [], 'val_f1': []
}

early_stopping = EarlyStopping(patience=CONFIG['early_stopping_patience'])

print('='*60)
print('üöÄ TRAINING STARTED (5-CLASS)')
print('='*60)
print(f'Epochs: {CONFIG["epochs"]} | Batch: {CONFIG["batch_size"]} | LR: {CONFIG["learning_rate"]}')
print(f'R-Drop: {CONFIG["use_rdrop"]} | Early Stop Patience: {CONFIG["early_stopping_patience"]}')
print('-'*60)

start_time = time.time()
best_val_f1 = 0
best_epoch = 0

for epoch in range(CONFIG['epochs']):
    print(f'\nüìç Epoch {epoch + 1}/{CONFIG["epochs"]}')
    
    # Train
    train_loss, train_acc, train_f1 = train_epoch(
        model, train_loader, criterion, optimizer, scheduler, device,
        use_rdrop=CONFIG['use_rdrop'], rdrop_alpha=CONFIG['rdrop_alpha'],
        max_grad_norm=CONFIG['max_grad_norm']
    )
    
    # Validate
    val_loss, val_acc, val_f1, _, _, _ = evaluate(model, val_loader, criterion, device)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['train_f1'].append(train_f1)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['val_f1'].append(val_f1)
    
    # Calculate gap
    gap = train_acc - val_acc
    
    # Print metrics
    print(f'  Train | Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | F1: {train_f1:.4f}')
    print(f'  Val   | Loss: {val_loss:.4f} | Acc: {val_acc:.4f} | F1: {val_f1:.4f}')
    print(f'  Gap   | {gap*100:.2f}%', end='')
    
    if gap > 0.10:
        print(' ‚ö†Ô∏è Overfitting!')
    elif gap > 0.05:
        print(' ‚ö° Watch gap')
    else:
        print(' ‚úÖ Good')
    
    # Early stopping check
    improved = early_stopping(val_f1, model)
    if improved:
        best_val_f1 = val_f1
        best_epoch = epoch + 1
        print(f'  ‚≠ê New best model! F1: {val_f1:.4f}')
    
    if early_stopping.early_stop:
        print(f'\nüõë Early stopping at epoch {epoch + 1}')
        break

# Load best model
model.load_state_dict(early_stopping.best_model)

total_time = time.time() - start_time
print(f'\n‚úÖ Training completed in {total_time/60:.1f} minutes')
print(f'   Best epoch: {best_epoch}')
print(f'   Best val F1: {best_val_f1:.4f}')

In [None]:
# ============================================
# CELL 11: TRAINING VISUALIZATION
# ============================================

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

epochs_range = range(1, len(history['train_loss']) + 1)

# Loss
axes[0].plot(epochs_range, history['train_loss'], 'b-o', label='Train', markersize=4)
axes[0].plot(epochs_range, history['val_loss'], 'r-s', label='Val', markersize=4)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss (5-Class)', fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(epochs_range, history['train_acc'], 'b-o', label='Train', markersize=4)
axes[1].plot(epochs_range, history['val_acc'], 'r-s', label='Val', markersize=4)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Accuracy (5-Class)', fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# F1 Score
axes[2].plot(epochs_range, history['train_f1'], 'b-o', label='Train', markersize=4)
axes[2].plot(epochs_range, history['val_f1'], 'r-s', label='Val', markersize=4)
axes[2].axhline(y=best_val_f1, color='g', linestyle='--', label=f'Best: {best_val_f1:.4f}')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('F1 Score')
axes[2].set_title('F1 Score (5-Class)', fontweight='bold')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history_5class.png', dpi=150, bbox_inches='tight')
plt.show()

# Gap analysis
plt.figure(figsize=(8, 4))
gaps = [t - v for t, v in zip(history['train_acc'], history['val_acc'])]
colors = ['red' if g > 0.10 else 'orange' if g > 0.05 else 'green' for g in gaps]
plt.bar(epochs_range, [g*100 for g in gaps], color=colors)
plt.axhline(y=10, color='red', linestyle='--', label='Overfitting threshold (10%)')
plt.axhline(y=5, color='orange', linestyle='--', label='Warning threshold (5%)')
plt.xlabel('Epoch')
plt.ylabel('Train-Val Gap (%)')
plt.title('Overfitting Analysis (5-Class)', fontweight='bold')
plt.legend()
plt.tight_layout()
plt.savefig('overfitting_analysis_5class.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# ============================================
# CELL 12: TEST EVALUATION
# ============================================

print('='*60)
print('üß™ FINAL TEST EVALUATION (5-CLASS)')
print('='*60)

test_loss, test_acc, test_f1, test_preds, test_labels, test_probs = evaluate(
    model, test_loader, criterion, device
)

print(f'\nüìä Test Results:')
print(f'   Loss: {test_loss:.4f}')
print(f'   Accuracy: {test_acc:.4f} ({test_acc*100:.2f}%)')
print(f'   F1 Score: {test_f1:.4f}')

# Classification report
print('\nüìã Classification Report:')
print(classification_report(test_labels, test_preds, target_names=LABEL_NAMES, digits=4))

# Confusion matrix
cm = confusion_matrix(test_labels, test_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=LABEL_NAMES, yticklabels=LABEL_NAMES)
plt.title('Confusion Matrix - Test Set (5-Class)', fontweight='bold')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.xticks(rotation=20)
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('confusion_matrix_5class.png', dpi=150, bbox_inches='tight')
plt.show()

# Per-class accuracy
print('\nüìä Per-Class Accuracy:')
for i, name in enumerate(LABEL_NAMES):
    class_mask = np.array(test_labels) == i
    class_acc = np.mean(np.array(test_preds)[class_mask] == i)
    print(f'   {name}: {class_acc:.4f} ({class_acc*100:.2f}%)')

In [None]:
# ============================================
# CELL 13: SAVE MODEL
# ============================================

# Save model
save_path = '/kaggle/working/indobert_sentiment_5class.pt'

torch.save({
    'model_state_dict': model.state_dict(),
    'config': CONFIG,
    'label_map': LABEL_MAP,
    'label_names': LABEL_NAMES,
    'metrics': {
        'test_accuracy': test_acc,
        'test_f1': test_f1,
        'best_val_f1': best_val_f1,
        'best_epoch': best_epoch
    },
    'history': history
}, save_path)

print(f'‚úÖ Model saved to: {save_path}')
print(f'   File size: {os.path.getsize(save_path) / 1024 / 1024:.1f} MB')

# Save training history
history_path = '/kaggle/working/training_history_5class.json'
with open(history_path, 'w') as f:
    json.dump(history, f, indent=2)
print(f'‚úÖ History saved to: {history_path}')

In [None]:
# ============================================
# CELL 14: INFERENCE FUNCTION
# ============================================

def predict_sentiment(text, model, tokenizer, device, label_names):
    """Predict sentiment for a single text"""
    model.eval()
    
    encoding = tokenizer.encode_plus(
        str(text),
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )
    
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    with torch.no_grad():
        logits = model(input_ids, attention_mask)
        probs = F.softmax(logits, dim=1)
        pred = torch.argmax(logits, dim=1).item()
    
    return {
        'sentiment': label_names[pred],
        'confidence': probs[0][pred].item(),
        'probabilities': {name: probs[0][i].item() for i, name in enumerate(label_names)}
    }

# Test predictions
print('='*60)
print('üîÆ SAMPLE PREDICTIONS (5-CLASS)')
print('='*60)

test_texts = [
    "Aplikasi sangat membantu, driver ramah dan cepat sampai. Sangat puas!",
    "Lumayan bagus, tapi masih ada yang perlu diperbaiki",
    "Biasa saja, tidak ada yang istimewa",
    "Agak kecewa dengan pelayanan driver kali ini",
    "Aplikasi error terus, driver tidak profesional, sangat mengecewakan. Parah!"
]

for text in test_texts:
    result = predict_sentiment(text, model, tokenizer, device, LABEL_NAMES)
    print(f'\nüìù "{text[:60]}..."' if len(text) > 60 else f'\nüìù "{text}"')
    print(f'   Sentiment: {result["sentiment"].upper()}')
    print(f'   Confidence: {result["confidence"]*100:.1f}%')

In [None]:
# ============================================
# CELL 15: FINAL SUMMARY
# ============================================

print('='*60)
print('üìä TRAINING SUMMARY (5-CLASS)')
print('='*60)

summary = f"""
üéØ MODEL: IndoBERT Sentiment 5-Class

üìà METRICS:
   ‚Ä¢ Test Accuracy: {test_acc*100:.2f}%
   ‚Ä¢ Test F1 Score: {test_f1:.4f}
   ‚Ä¢ Best Val F1:   {best_val_f1:.4f} (epoch {best_epoch})

‚öôÔ∏è CONFIGURATION:
   ‚Ä¢ Model: {CONFIG['model_name']}
   ‚Ä¢ Epochs trained: {len(history['train_loss'])}
   ‚Ä¢ Batch size: {CONFIG['batch_size']}
   ‚Ä¢ Learning rate: {CONFIG['learning_rate']}
   ‚Ä¢ Frozen layers: {CONFIG['freeze_layers']}/12
   ‚Ä¢ Dropout: {CONFIG['dropout_rate']}
   ‚Ä¢ R-Drop alpha: {CONFIG['rdrop_alpha']}

üìÇ DATA:
   ‚Ä¢ Train: {len(train_df):,}
   ‚Ä¢ Val: {len(val_df):,}
   ‚Ä¢ Test: {len(test_df):,}

üíæ SAVED FILES:
   ‚Ä¢ Model: indobert_sentiment_5class.pt
   ‚Ä¢ History: training_history_5class.json
   ‚Ä¢ Plots: training_history_5class.png, confusion_matrix_5class.png
"""

print(summary)

# Check overfitting
final_gap = history['train_acc'][-1] - history['val_acc'][-1]
print(f'\nüîç OVERFITTING CHECK:')
print(f'   Final train-val gap: {final_gap*100:.2f}%')
if final_gap < 0.05:
    print('   ‚úÖ Excellent generalization!')
elif final_gap < 0.10:
    print('   ‚ö° Good generalization with minor gap')
else:
    print('   ‚ö†Ô∏è Some overfitting detected')

print('\n' + '='*60)
print('‚úÖ Training complete! Download model from /kaggle/working/')
print('='*60)