# DimABSA Training - DESS Model with VA Regression

**Task**: Subtask 2 - Dimensional Aspect Sentiment Triplet Extraction (DimASTE)

**Model**: DESS (Dual-channel Enhanced Sentiment Span) adapted for VA regression

**Dataset**: Combined Restaurant + Laptop (3,727 training samples)

---

## Setup Instructions

### 1. Upload Required Files to Kaggle Dataset

Create a Kaggle dataset with these files:
```
dimabsa-dess-data/
â”œâ”€â”€ DESS/Codebase/
â”‚   â”œâ”€â”€ models/
â”‚   â”œâ”€â”€ trainer/
â”‚   â”œâ”€â”€ data/
â”‚   â”‚   â”œâ”€â”€ dimabsa_combined/
â”‚   â”‚   â”‚   â”œâ”€â”€ train_dep_triple_polarity_result.json
â”‚   â”‚   â”‚   â””â”€â”€ test_dep_triple_polarity_result.json
â”‚   â”‚   â””â”€â”€ types_va.json
â”‚   â”œâ”€â”€ Parameter.py
â”‚   â””â”€â”€ train.py
```

### 2. Enable GPU
- Go to Settings â†’ Accelerator â†’ GPU T4 x2 (or P100)

### 3. Run All Cells

## 1. Environment Setup

In [None]:
# Install dependencies
!pip install -q torch transformers scikit-learn tqdm
!pip install -q torch-geometric torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu118.html

import sys
import os
import json
import torch
import numpy as np
from pathlib import Path

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 2. Load Data and Verify

In [None]:
# Set paths (adjust based on your Kaggle dataset)
DATA_PATH = "/kaggle/input/dimabsa-dess-data/DESS/Codebase"
sys.path.insert(0, DATA_PATH)

# Verify data
train_path = f"{DATA_PATH}/data/dimabsa_combined/train_dep_triple_polarity_result.json"
test_path = f"{DATA_PATH}/data/dimabsa_combined/test_dep_triple_polarity_result.json"

train_data = json.load(open(train_path))
test_data = json.load(open(test_path))

print(f"Training samples: {len(train_data)}")
print(f"Test samples: {len(test_data)}")
print(f"\nSample structure:")
print(f"  Tokens: {len(train_data[0]['tokens'])}")
print(f"  Entities: {len(train_data[0]['entities'])}")
print(f"  Sentiments: {len(train_data[0]['sentiments'])}")
if train_data[0]['sentiments']:
    print(f"  Sample VA: {train_data[0]['sentiments'][0]['type']}")

## 3. Training Configuration

In [None]:
# Training hyperparameters
CONFIG = {
    'dataset': 'dimabsa_combined',
    'model_type': 'deberta-v3-base',  # Use base model for Kaggle (faster)
    'batch_size': 4,  # Adjust based on GPU memory
    'epochs': 10,
    'learning_rate': 5e-5,
    'max_grad_norm': 1.0,
    'warmup_proportion': 0.1,
    'weight_decay': 0.01,
    'max_span_size': 10,
    'neg_entity_count': 100,
    'neg_senti_count': 100,
    'save_path': '/kaggle/working/checkpoints',
    'log_path': '/kaggle/working/logs',
}

# Create directories
os.makedirs(CONFIG['save_path'], exist_ok=True)
os.makedirs(CONFIG['log_path'], exist_ok=True)

print("Configuration:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

## 4. Initialize Model and Training Components

In [None]:
from transformers import AutoTokenizer, AutoConfig
from models.D2E2S_Model import D2E2SModel
from trainer.input_reader import JsonInputReader
from trainer.loss import D2E2SLoss
from Parameter import train_argparser

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
print(f"Tokenizer loaded: {tokenizer.__class__.__name__}")

# Load data reader
types_path = f"{DATA_PATH}/data/types_va.json"
input_reader = JsonInputReader(
    types_path=types_path,
    tokenizer=tokenizer,
    neg_entity_count=CONFIG['neg_entity_count'],
    neg_senti_count=CONFIG['neg_senti_count'],
    max_span_size=CONFIG['max_span_size']
)

# Read datasets
dataset_paths = {
    'train': train_path,
    'test': test_path
}
input_reader.read(dataset_paths)

train_dataset = input_reader.get_dataset('train')
test_dataset = input_reader.get_dataset('test')

print(f"\nDatasets loaded:")
print(f"  Train: {len(train_dataset)} samples")
print(f"  Test: {len(test_dataset)} samples")
print(f"  Entity types: {input_reader.entity_type_count}")
print(f"  Sentiment types: {input_reader.sentiment_type_count}")

In [None]:
# Initialize model
config = AutoConfig.from_pretrained("microsoft/deberta-v3-base")

# Mock args for model initialization
class Args:
    size_embedding = 25
    prop_drop = 0.1
    freeze_transformer = False
    drop_out_rate = 0.5
    is_bidirect = True
    lstm_layers = 1
    hidden_dim = 768
    mem_dim = 300
    emb_dim = 1536
    batch_size = CONFIG['batch_size']
    deberta_feature_dim = 768
    gcn_dim = 300
    gcn_dropout = 0.5
    span_generator = "Max"

args = Args()

model = D2E2SModel(
    config=config,
    cls_token=tokenizer.cls_token_id,
    sentiment_types=2,  # VA regression: 2 outputs
    entity_types=input_reader.entity_type_count,
    args=args
)

# Move to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

print(f"Model initialized on {device}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

## 5. Setup Optimizer and Loss

In [None]:
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup

# Optimizer
optimizer = AdamW(
    model.parameters(),
    lr=CONFIG['learning_rate'],
    weight_decay=CONFIG['weight_decay']
)

# Scheduler
num_training_steps = len(train_dataset) // CONFIG['batch_size'] * CONFIG['epochs']
num_warmup_steps = int(num_training_steps * CONFIG['warmup_proportion'])

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

# Loss functions
entity_criterion = torch.nn.CrossEntropyLoss(reduction='none')
senti_criterion = torch.nn.MSELoss(reduction='none')  # MSE for VA regression

loss_fn = D2E2SLoss(
    senti_criterion=senti_criterion,
    entity_criterion=entity_criterion,
    model=model,
    optimizer=optimizer,
    scheduler=scheduler,
    max_grad_norm=CONFIG['max_grad_norm']
)

print(f"Optimizer: AdamW (lr={CONFIG['learning_rate']})")
print(f"Scheduler: Linear warmup ({num_warmup_steps} steps)")
print(f"Total training steps: {num_training_steps}")
print(f"Entity loss: CrossEntropyLoss")
print(f"Sentiment loss: MSELoss (VA regression)")

## 6. Training Loop

In [None]:
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
import time

def train_epoch(model, dataloader, loss_fn, epoch):
    model.train()
    total_loss = 0
    
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']}")
    for batch in pbar:
        # Move batch to device
        batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v 
                 for k, v in batch.items()}
        
        # Forward pass
        entity_logits, senti_logits, batch_loss = model(
            encodings=batch['encodings'],
            context_masks=batch['context_masks'],
            entity_masks=batch['entity_masks'],
            entity_sizes=batch['entity_sizes'],
            sentiments=batch['sentiments'],
            senti_masks=batch['senti_masks'],
            adj=batch['adj']
        )
        
        # Compute loss
        loss = loss_fn.compute(
            entity_logits, senti_logits, batch_loss,
            batch['entity_types'], batch['senti_types'],
            batch['entity_sample_masks'], batch['senti_sample_masks']
        )
        
        total_loss += loss
        pbar.set_postfix({'loss': f'{loss:.4f}'})
    
    return total_loss / len(dataloader)

# Create dataloader
train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG['batch_size'],
    shuffle=True,
    collate_fn=train_dataset.collate_fn
)

print(f"Starting training...")
print(f"Batches per epoch: {len(train_dataloader)}")
print(f"="*60)

In [None]:
# Training loop
best_loss = float('inf')
training_history = []

for epoch in range(CONFIG['epochs']):
    start_time = time.time()
    
    # Train
    avg_loss = train_epoch(model, train_dataloader, loss_fn, epoch)
    
    epoch_time = time.time() - start_time
    
    # Log
    print(f"\nEpoch {epoch+1}/{CONFIG['epochs']}:")
    print(f"  Avg Loss: {avg_loss:.4f}")
    print(f"  Time: {epoch_time:.2f}s")
    
    training_history.append({
        'epoch': epoch + 1,
        'loss': avg_loss,
        'time': epoch_time
    })
    
    # Save best model
    if avg_loss < best_loss:
        best_loss = avg_loss
        checkpoint_path = f"{CONFIG['save_path']}/best_model.pt"
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss,
        }, checkpoint_path)
        print(f"  âœ… Best model saved (loss: {best_loss:.4f})")
    
    print("="*60)

print("\nðŸŽ‰ Training completed!")
print(f"Best loss: {best_loss:.4f}")

## 7. Save Training History

In [None]:
# Save training history
history_path = f"{CONFIG['log_path']}/training_history.json"
with open(history_path, 'w') as f:
    json.dump(training_history, f, indent=2)

print(f"Training history saved to: {history_path}")

# Plot training curve
import matplotlib.pyplot as plt

epochs = [h['epoch'] for h in training_history]
losses = [h['loss'] for h in training_history]

plt.figure(figsize=(10, 6))
plt.plot(epochs, losses, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.grid(True)
plt.savefig(f"{CONFIG['log_path']}/training_curve.png")
plt.show()

print(f"Training curve saved to: {CONFIG['log_path']}/training_curve.png")

## 8. Model Evaluation (Optional)

In [None]:
# Load best model
checkpoint = torch.load(f"{CONFIG['save_path']}/best_model.pt")
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"Best model loaded (epoch {checkpoint['epoch']}, loss {checkpoint['loss']:.4f})")

# Quick evaluation on test set
test_dataloader = DataLoader(
    test_dataset,
    batch_size=CONFIG['batch_size'],
    shuffle=False,
    collate_fn=test_dataset.collate_fn
)

print(f"\nRunning evaluation on {len(test_dataset)} test samples...")

all_predictions = []
with torch.no_grad():
    for batch in tqdm(test_dataloader, desc="Evaluating"):
        batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v 
                 for k, v in batch.items()}
        
        entity_clf, senti_clf, sentiments = model(
            encodings=batch['encodings'],
            context_masks=batch['context_masks'],
            entity_masks=batch['entity_masks'],
            entity_sizes=batch['entity_sizes'],
            entity_spans=batch['entity_spans'],
            entity_sample_masks=batch['entity_sample_masks'],
            adj=batch['adj'],
            evaluate=True
        )
        
        # Store predictions
        all_predictions.append({
            'entity_clf': entity_clf.cpu(),
            'senti_clf': senti_clf.cpu(),
            'sentiments': sentiments.cpu()
        })

print(f"âœ… Evaluation complete! {len(all_predictions)} batches processed.")

## 9. Download Trained Model

In [None]:
# The trained model is saved at:
print(f"Trained model location: {CONFIG['save_path']}/best_model.pt")
print(f"Training logs: {CONFIG['log_path']}/")
print("\nDownload these files from Kaggle output to use for inference.")