In [1]:
import os
for dirname, _, filenames in os.walk('/kaggle'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/lib/kaggle/gcp.py


## V1

In [11]:
"""
Bond Query Classifier - Complete Kaggle Notebook
=================================================

Copy this entire file into a Kaggle notebook and run!

Setup:
1. Create new Kaggle notebook
2. Enable GPU (T4 x2 or P100)
3. Enable Internet
4. Copy this entire file into a cell
5. Run!

Model will be saved to: /kaggle/working/bond_classifier_v3/
"""

# ==================== INSTALL DEPENDENCIES ====================
import sys
import subprocess

print("=" * 60)
print("INSTALLING DEPENDENCIES")
print("=" * 60)

# Install required packages
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", 
                      "transformers", "accelerate", "scikit-learn"])

print("✓ Dependencies installed!\n")


# ==================== IMPORTS ====================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModel,
    get_cosine_schedule_with_warmup
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import random
from typing import List, Dict, Tuple, Any, Optional
from dataclasses import dataclass
from enum import Enum
from collections import deque
from pathlib import Path
from tqdm.auto import tqdm
import json
import re


# ==================== CHECK GPU ====================
print("=" * 60)
print("GPU CHECK")
print("=" * 60)

if torch.cuda.is_available():
    print(f"✓ GPU detected: {torch.cuda.get_device_name(0)}")
    print(f"✓ GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"✓ CUDA version: {torch.version.cuda}")
else:
    print("⚠ WARNING: No GPU detected! Training will be very slow.")
    print("   Go to Settings → Accelerator → Select GPU")

print()


# ==================== CONFIGURATION ====================
CONFIG = {
    'base_model': 'microsoft/deberta-v3-small',
    'num_samples_per_intent': 1000,  # 13k total
    'augmentation_factor': 0.5,  # 50% more data
    'batch_size': 32,
    'num_epochs': 10,
    'learning_rate': 2e-5,
    'warmup_ratio': 0.1,
    'max_length': 128,
    'output_dir': '/kaggle/working/bond_classifier_v3',
    'seed': 42
}

# Set seeds
random.seed(CONFIG['seed'])
np.random.seed(CONFIG['seed'])
torch.manual_seed(CONFIG['seed'])
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(CONFIG['seed'])


# ==================== ENUMS ====================

class QueryIntent(str, Enum):
    BUY_RECOMMENDATION = "buy_recommendation"
    SELL_RECOMMENDATION = "sell_recommendation"
    PORTFOLIO_ANALYSIS = "portfolio_analysis"
    REDUCE_DURATION = "reduce_duration"
    INCREASE_YIELD = "increase_yield"
    HEDGE_VOLATILITY = "hedge_volatility"
    SECTOR_REBALANCE = "sector_rebalance"
    BARBELL_STRATEGY = "barbell_strategy"
    SWITCH_BONDS = "switch_bonds"
    EXPLAIN_RECOMMENDATION = "explain_recommendation"
    MARKET_OUTLOOK = "market_outlook"
    CREDIT_ANALYSIS = "credit_analysis"
    FORECAST_PRICES = "forecast_prices"


# ==================== DATA GENERATION ====================

class SyntheticDataGenerator:
    """Generate synthetic training data"""
    
    def __init__(self, num_samples_per_intent: int = 1000):
        self.num_samples_per_intent = num_samples_per_intent
        self.templates = self._create_templates()
        self.slot_values = self._create_slot_values()
    
    def _create_templates(self) -> Dict[str, List[str]]:
        return {
            'buy_recommendation': [
                "Find {rating} rated {sector} bonds with {duration} duration",
                "Suggest {bond_type} bonds in {sector} sector",
                "Looking for {sector} bonds with {rating} rating",
                "Show me {duration} {sector} bonds rated {rating}",
                "Need {investor_type} investments in {sector}",
                "Want to buy {rating} {sector} bonds",
                "Recommend bonds for {investment_purpose}",
                "Find liquid {sector} bonds rated {rating}",
            ],
            'reduce_duration': [
                "Reduce portfolio duration from {old_duration} to {new_duration} years",
                "Lower interest rate sensitivity",
                "Decrease duration while {constraint}",
                "Shorten bond maturities due to {reason}",
                "Need to reduce duration risk",
                "Switch to shorter duration bonds",
            ],
            'increase_yield': [
                "Improve portfolio yield from {current_yield}% to {target_yield}%",
                "Find higher yielding alternatives",
                "Boost returns while maintaining {constraint}",
                "Need better yield than {current_yield}%",
                "Increase yield without sacrificing {factor}",
                "Look for yield enhancement opportunities",
            ],
            'hedge_volatility': [
                "Hedge against {risk_type} volatility",
                "Protect portfolio from rate risk",
                "Build defensive position",
                "Reduce sensitivity to interest rates",
                "Immunize portfolio against volatility",
            ],
            'sector_rebalance': [
                "Reduce {sector1} from {current_pct}% to {target_pct}%",
                "Too much concentration in {sector}",
                "Diversify away from {sector}",
                "Rebalance sector exposure",
                "Shift from {sector1} to {sector2}",
            ],
            'portfolio_analysis': [
                "Analyze my bond portfolio",
                "Review my holdings",
                "What are the risks in my portfolio?",
                "Check duration and sector exposures",
                "Evaluate credit quality distribution",
                "Is my portfolio well diversified?",
            ],
            'switch_bonds': [
                "Replace {entity} with better alternative",
                "Switch from {bond1} to {bond2}",
                "Find substitute for {entity}",
                "Swap {entity} for higher quality bond",
            ],
            'sell_recommendation': [
                "Should I sell {entity} given {reason}?",
                "Exit {entity} position",
                "Which bonds should I liquidate?",
                "Recommend bonds to sell",
            ],
            'market_outlook': [
                "What's your view on bond markets?",
                "How will {event} impact bonds?",
                "Is this good time to invest in {sector}?",
                "What's the outlook for {market_segment}?",
            ],
            'credit_analysis': [
                "Analyze credit quality of {entity}",
                "What's the default risk for {entity}?",
                "Compare credit profiles of {entity1} vs {entity2}",
                "Is {entity} likely to be downgraded?",
            ],
            'barbell_strategy': [
                "Create barbell with {short_duration} and {long_duration} bonds",
                "Build short-long strategy in {sector}",
                "Implement barbell approach",
            ],
            'forecast_prices': [
                "Forecast {entity} price for next {period}",
                "What will {entity} trade at in {timeframe}?",
                "Predict returns for {sector} bonds",
            ],
            'explain_recommendation': [
                "Why did you suggest {action}?",
                "Explain rationale behind recommendation",
                "What factors led to this suggestion?",
            ]
        }
    
    def _create_slot_values(self) -> Dict[str, List[str]]:
        return {
            'rating': ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB'],
            'sector': ['Sovereign', 'PSU Energy', 'Financial', 'Corporate', 
                      'Infrastructure', 'NBFC', 'Banking'],
            'bond_type': ['G-Sec', 'Corporate', 'PSU', 'SDL'],
            'duration': ['short', 'medium', 'long', '1-3 year', '5-7 year'],
            'investor_type': ['conservative', 'moderate', 'aggressive'],
            'investment_purpose': ['wealth preservation', 'income generation'],
            'constraint': ['maintaining yield', 'keeping liquidity'],
            'reason': ['rate risk', 'credit concerns', 'rebalancing'],
            'risk_type': ['interest rate', 'credit', 'duration'],
            'entity': ['HDFC Bank', 'ICICI Bank', 'NTPC', 'PFC'],
            'factor': ['credit quality', 'liquidity'],
            'event': ['RBI policy', 'Budget', 'inflation data'],
            'market_segment': ['government bonds', 'corporate bonds'],
            'period': ['quarter', '6 months', 'year'],
            'timeframe': ['3 months', '6 months'],
            'action': ['buying', 'selling', 'switching']
        }
    
    def generate_dataset(self) -> List[Dict]:
        """Generate complete dataset"""
        dataset = []
        
        print("=" * 60)
        print("GENERATING SYNTHETIC DATA")
        print("=" * 60)
        
        for intent, templates in tqdm(self.templates.items(), desc="Generating"):
            for _ in range(self.num_samples_per_intent):
                template = random.choice(templates)
                query = self._fill_template(template)
                
                sample = {
                    'text': query,
                    'intent': intent,
                    'sectors': self._extract_sectors(query),
                    'rating': self._extract_rating(query),
                    'duration': self._extract_duration(query),
                    'constraints': self._extract_constraints(query)
                }
                dataset.append(sample)
        
        print(f"✓ Generated {len(dataset)} samples\n")
        return dataset
    
    def _fill_template(self, template: str) -> str:
        """Fill template with random values"""
        query = template
        
        for slot_type, values in self.slot_values.items():
            placeholder = f"{{{slot_type}}}"
            if placeholder in query:
                query = query.replace(placeholder, random.choice(values))
        
        # Fill numeric placeholders
        query = re.sub(r'\{old_duration\}', str(random.randint(5, 10)), query)
        query = re.sub(r'\{new_duration\}', str(random.randint(2, 5)), query)
        query = re.sub(r'\{current_yield\}', f"{random.uniform(6.0, 7.5):.1f}", query)
        query = re.sub(r'\{target_yield\}', f"{random.uniform(7.5, 9.0):.1f}", query)
        query = re.sub(r'\{current_pct\}', str(random.randint(30, 50)), query)
        query = re.sub(r'\{target_pct\}', str(random.randint(15, 25)), query)
        
        # Fill remaining
        query = re.sub(r'\{[^}]+\}', 'bonds', query)
        return query
    
    def _extract_sectors(self, query: str) -> List[str]:
        sectors = []
        for sector in ['Sovereign', 'PSU Energy', 'Financial', 'Corporate']:
            if sector.lower() in query.lower():
                sectors.append(sector)
        return sectors
    
    def _extract_rating(self, query: str) -> Optional[str]:
        for rating in ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB']:
            if rating in query:
                return rating
        return None
    
    def _extract_duration(self, query: str) -> str:
        query_lower = query.lower()
        if any(kw in query_lower for kw in ['short', '1-3']):
            return 'short'
        elif any(kw in query_lower for kw in ['long', '7-10']):
            return 'long'
        return 'medium'
    
    def _extract_constraints(self, query: str) -> Dict[str, bool]:
        query_lower = query.lower()
        return {
            'preserve_yield': 'maintain' in query_lower and 'yield' in query_lower,
            'maintain_liquidity': 'liquid' in query_lower,
            'avoid_downgrades': 'credit' in query_lower,
            'sector_diversity': 'divers' in query_lower,
            'rating_above_aa': 'AAA' in query or 'AA' in query
        }


class DataAugmenter:
    """Augment data with variations"""
    
    @staticmethod
    def augment_dataset(dataset: List[Dict], factor: float = 0.5) -> List[Dict]:
        """Augment dataset"""
        num_to_augment = int(len(dataset) * factor)
        samples = random.sample(dataset, min(num_to_augment, len(dataset)))
        
        print("=" * 60)
        print("AUGMENTING DATA")
        print("=" * 60)
        
        augmented = []
        for sample in tqdm(samples, desc="Augmenting"):
            aug_type = random.choice(['synonym', 'insertion', 'deletion'])
            
            if aug_type == 'synonym':
                augmented.append(DataAugmenter._synonym_replacement(sample))
            elif aug_type == 'insertion':
                augmented.append(DataAugmenter._random_insertion(sample))
            else:
                augmented.append(DataAugmenter._random_deletion(sample))
        
        print(f"✓ Added {len(augmented)} augmented samples\n")
        return dataset + augmented
    
    @staticmethod
    def _synonym_replacement(sample: Dict) -> Dict:
        synonyms = {
            'find': ['locate', 'search for', 'look for'],
            'bonds': ['securities', 'instruments'],
            'high': ['elevated', 'strong'],
            'yield': ['return', 'interest'],
        }
        
        words = sample['text'].split()
        for i, word in enumerate(words):
            if word.lower() in synonyms and random.random() < 0.3:
                words[i] = random.choice(synonyms[word.lower()])
        
        new_sample = sample.copy()
        new_sample['text'] = ' '.join(words)
        return new_sample
    
    @staticmethod
    def _random_insertion(sample: Dict) -> Dict:
        words = sample['text'].split()
        if len(words) > 3:
            pos = random.randint(0, len(words))
            words.insert(pos, random.choice(['please', 'kindly', 'also']))
        
        new_sample = sample.copy()
        new_sample['text'] = ' '.join(words)
        return new_sample
    
    @staticmethod
    def _random_deletion(sample: Dict) -> Dict:
        words = sample['text'].split()
        words = [w for w in words if w.lower() not in ['please', 'kindly'] or random.random() > 0.5]
        
        new_sample = sample.copy()
        new_sample['text'] = ' '.join(words) if words else sample['text']
        return new_sample


# ==================== PYTORCH DATASET ====================

class BondQueryDataset(Dataset):
    """PyTorch Dataset"""
    
    def __init__(self, data: List[Dict], tokenizer, max_length: int = 128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        self.intent_to_id = {
            'buy_recommendation': 0, 'sell_recommendation': 1,
            'portfolio_analysis': 2, 'reduce_duration': 3,
            'increase_yield': 4, 'hedge_volatility': 5,
            'sector_rebalance': 6, 'barbell_strategy': 7,
            'switch_bonds': 8, 'explain_recommendation': 9,
            'market_outlook': 10, 'credit_analysis': 11,
            'forecast_prices': 12
        }
        
        self.sector_to_id = {
            'Sovereign': 0, 'PSU Energy': 1, 'Financial': 2,
            'Corporate': 3, 'Infrastructure': 4, 'NBFC': 5, 'Banking': 6
        }
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        sample = self.data[idx]
        
        encoding = self.tokenizer(
            sample['text'],
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        intent_label = self.intent_to_id[sample['intent']]
        
        sector_labels = torch.zeros(len(self.sector_to_id))
        for sector in sample.get('sectors', []):
            if sector in self.sector_to_id:
                sector_labels[self.sector_to_id[sector]] = 1
        
        rating = sample.get('rating')
        rating_map = {'AAA': 0, 'AA+': 1, 'AA': 2, 'A+': 3, 'A': 4, 'BBB': 5}
        rating_label = rating_map.get(rating, 6)
        
        duration_map = {'short': 0, 'medium': 1, 'long': 2}
        duration_label = duration_map.get(sample.get('duration', 'medium'), 1)
        
        constraints = sample.get('constraints', {})
        constraint_labels = torch.tensor([
            float(constraints.get('preserve_yield', False)),
            float(constraints.get('maintain_liquidity', False)),
            float(constraints.get('avoid_downgrades', False)),
            float(constraints.get('sector_diversity', False)),
            float(constraints.get('rating_above_aa', False))
        ])
        
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'intent_label': torch.tensor(intent_label),
            'sector_labels': sector_labels,
            'rating_label': torch.tensor(rating_label),
            'duration_label': torch.tensor(duration_label),
            'constraint_labels': constraint_labels
        }


# ==================== MODEL ====================

class ProductionBondClassifier(nn.Module):
    """Multi-task classifier"""
    
    def __init__(self, base_model: str = 'distilbert-base-uncased', dropout: float = 0.15):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(base_model)
        hidden_size = self.bert.config.hidden_size
        
        self.feature_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout),
            nn.GELU()
        )
        
        feature_size = hidden_size // 2
        
        self.intent_head = nn.Linear(feature_size, 13)
        self.sector_head = nn.Linear(feature_size, 7)
        self.rating_head = nn.Linear(feature_size, 7)
        self.duration_head = nn.Linear(feature_size, 3)
        self.constraint_head = nn.Linear(feature_size, 5)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        features = self.feature_layer(cls_output)
        features = self.dropout(features)
        
        return {
            'intent_logits': self.intent_head(features),
            'sector_logits': self.sector_head(features),
            'rating_logits': self.rating_head(features),
            'duration_logits': self.duration_head(features),
            'constraint_logits': self.constraint_head(features),
        }


# ==================== LOSS ====================

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
    
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()


class MultiTaskLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.intent_loss_fn = FocalLoss(gamma=2.0)
        self.sector_loss_fn = nn.BCEWithLogitsLoss()
        self.rating_loss_fn = nn.CrossEntropyLoss()
        self.duration_loss_fn = nn.CrossEntropyLoss()
        self.constraint_loss_fn = nn.BCEWithLogitsLoss()
    
    def forward(self, outputs, labels):
        intent_loss = self.intent_loss_fn(outputs['intent_logits'], labels['intent_label'])
        sector_loss = self.sector_loss_fn(outputs['sector_logits'], labels['sector_labels'])
        rating_loss = self.rating_loss_fn(outputs['rating_logits'], labels['rating_label'])
        duration_loss = self.duration_loss_fn(outputs['duration_logits'], labels['duration_label'])
        constraint_loss = self.constraint_loss_fn(outputs['constraint_logits'], labels['constraint_labels'])
        
        total = intent_loss + 0.5*sector_loss + 0.3*rating_loss + 0.3*duration_loss + 0.4*constraint_loss
        
        return {'total': total, 'intent': intent_loss}


# ==================== TRAINER ====================

class Trainer:
    def __init__(self, model, train_loader, val_loader, optimizer, scheduler, criterion, device, output_dir):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.criterion = criterion
        self.device = device
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.best_acc = 0.0
    
    def train_epoch(self, epoch):
        self.model.train()
        total_loss = 0
        all_preds, all_labels = [], []
        
        pbar = tqdm(self.train_loader, desc=f'Epoch {epoch}')
        for batch in pbar:
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            
            labels = {k: v.to(self.device) for k, v in batch.items() 
                     if k not in ['input_ids', 'attention_mask']}
            
            self.optimizer.zero_grad()
            outputs = self.model(input_ids, attention_mask)
            loss_dict = self.criterion(outputs, labels)
            loss = loss_dict['total']
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
            self.optimizer.step()
            self.scheduler.step()
            
            total_loss += loss.item()
            preds = outputs['intent_logits'].argmax(dim=-1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels['intent_label'].cpu().numpy())
            
            pbar.set_postfix({'loss': f'{loss.item():.4f}', 
                            'acc': f'{accuracy_score(all_labels[-len(preds):], preds):.3f}'})
        
        return total_loss / len(self.train_loader)
    
    def evaluate(self):
        self.model.eval()
        total_loss = 0
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc='Evaluating'):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = {k: v.to(self.device) for k, v in batch.items() 
                         if k not in ['input_ids', 'attention_mask']}
                
                outputs = self.model(input_ids, attention_mask)
                loss_dict = self.criterion(outputs, labels)
                total_loss += loss_dict['total'].item()
                
                preds = outputs['intent_logits'].argmax(dim=-1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels['intent_label'].cpu().numpy())
        
        return {
            'loss': total_loss / len(self.val_loader),
            'accuracy': accuracy_score(all_labels, all_preds),
            'f1_macro': f1_score(all_labels, all_preds, average='macro')
        }
    
    def save_checkpoint(self, epoch, metrics):
        if metrics['accuracy'] > self.best_acc:
            self.best_acc = metrics['accuracy']
            torch.save(self.model.state_dict(), self.output_dir / 'pytorch_model.bin')
            print(f"✓ New best model saved (acc: {metrics['accuracy']:.4f})")
    
    def train(self, num_epochs):
        print("=" * 60)
        print("TRAINING")
        print("=" * 60)
        
        for epoch in range(1, num_epochs + 1):
            train_loss = self.train_epoch(epoch)
            print(f"\nEpoch {epoch}/{num_epochs} - Train Loss: {train_loss:.4f}")
            
            val_metrics = self.evaluate()
            print(f"Val Loss: {val_metrics['loss']:.4f}")
            print(f"Val Accuracy: {val_metrics['accuracy']:.4f}")
            print(f"Val F1: {val_metrics['f1_macro']:.4f}\n")
            
            self.save_checkpoint(epoch, val_metrics)
        
        print(f"Training complete! Best accuracy: {self.best_acc:.4f}\n")


# ==================== MAIN TRAINING FUNCTION ====================

def train_model():
    """Main training function"""
    
    # Generate data
    generator = SyntheticDataGenerator(CONFIG['num_samples_per_intent'])
    dataset = generator.generate_dataset()
    
    # Augment
    dataset = DataAugmenter.augment_dataset(dataset, CONFIG['augmentation_factor'])
    print(f"Total dataset size: {len(dataset)} samples\n")
    
    # Split
    print("=" * 60)
    print("SPLITTING DATA")
    print("=" * 60)
    intents = [s['intent'] for s in dataset]
    train_data, temp_data = train_test_split(dataset, test_size=0.3, stratify=intents, random_state=42)
    temp_intents = [s['intent'] for s in temp_data]
    val_data, test_data = train_test_split(temp_data, test_size=0.5, stratify=temp_intents, random_state=42)
    
    print(f"Train: {len(train_data)}")
    print(f"Val: {len(val_data)}")
    print(f"Test: {len(test_data)}\n")
    
    # Create datasets
    print("=" * 60)
    print("LOADING MODEL")
    print("=" * 60)
    tokenizer = AutoTokenizer.from_pretrained(CONFIG['base_model'])
    
    train_dataset = BondQueryDataset(train_data, tokenizer, CONFIG['max_length'])
    val_dataset = BondQueryDataset(val_data, tokenizer, CONFIG['max_length'])
    test_dataset = BondQueryDataset(test_data, tokenizer, CONFIG['max_length'])
    
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'])
    
    # Model
    model = ProductionBondClassifier(CONFIG['base_model'])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    print(f"✓ Model loaded: {sum(p.numel() for p in model.parameters()):,} parameters")
    print(f"✓ Device: {device}\n")
    
    # Optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['learning_rate'], weight_decay=0.01)
    num_training_steps = len(train_loader) * CONFIG['num_epochs']
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(num_training_steps * CONFIG['warmup_ratio']),
        num_training_steps=num_training_steps
    )
    
    criterion = MultiTaskLoss()
    
    # Train
    trainer = Trainer(model, train_loader, val_loader, optimizer, scheduler, 
                     criterion, device, CONFIG['output_dir'])
    trainer.train(CONFIG['num_epochs'])
    
    # Save tokenizer
    tokenizer.save_pretrained(CONFIG['output_dir'])
    
    # Test
    print("=" * 60)
    print("FINAL TEST")
    print("=" * 60)
    model.eval()
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Testing'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            outputs = model(input_ids, attention_mask)
            preds = outputs['intent_logits'].argmax(dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['intent_label'].numpy())
    
    test_acc = accuracy_score(all_labels, all_preds)
    test_f1 = f1_score(all_labels, all_preds, average='macro')
    
    print(f"\n✓ Test Accuracy: {test_acc:.4f}")
    print(f"✓ Test F1 Macro: {test_f1:.4f}\n")
    
    print("=" * 60)
    print("TRAINING COMPLETE!")
    print("=" * 60)
    print(f"\n✓ Model saved to: {CONFIG['output_dir']}")
    print(f"✓ Files: pytorch_model.bin, tokenizer files")
    print(f"\nTo download:")
    print(f"  1. Go to Output tab")
    print(f"  2. Download 'bond_classifier_v3' folder")
    print(f"  3. Use locally with production code")


# ==================== RUN ====================

if __name__ == '__main__':
    train_model()

INSTALLING DEPENDENCIES
✓ Dependencies installed!

GPU CHECK
✓ GPU detected: Tesla T4
✓ GPU memory: 15.83 GB
✓ CUDA version: 12.4

GENERATING SYNTHETIC DATA


Generating:   0%|          | 0/13 [00:00<?, ?it/s]

✓ Generated 13000 samples

AUGMENTING DATA


Augmenting:   0%|          | 0/6500 [00:00<?, ?it/s]

✓ Added 6500 augmented samples

Total dataset size: 19500 samples

SPLITTING DATA
Train: 13650
Val: 2925
Test: 2925

LOADING MODEL




✓ Model loaded: 142,205,987 parameters
✓ Device: cuda

TRAINING


Epoch 1:   0%|          | 0/427 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [5]:
"""
Bond Query Classifier - Kaggle Inference Notebook
==================================================

Use this after training to test your model in Kaggle.

Setup:
1. Attach the output from training notebook as a dataset
2. Copy this file into a new Kaggle notebook
3. Run to test the classifier!
"""

# ==================== INSTALL & IMPORTS ====================
import sys
import subprocess

print("Installing dependencies...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "transformers"])

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from enum import Enum
from collections import deque
import numpy as np

print("✓ Imports complete!\n")


# ==================== MODEL ARCHITECTURE ====================

class ProductionBondClassifier(nn.Module):
    """Same architecture as training"""
    
    def __init__(self, base_model: str = 'distilbert-base-uncased', dropout: float = 0.15):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(base_model)
        hidden_size = self.bert.config.hidden_size
        
        self.feature_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout),
            nn.GELU()
        )
        
        feature_size = hidden_size // 2
        
        self.intent_head = nn.Linear(feature_size, 13)
        self.sector_head = nn.Linear(feature_size, 7)
        self.rating_head = nn.Linear(feature_size, 7)
        self.duration_head = nn.Linear(feature_size, 3)
        self.constraint_head = nn.Linear(feature_size, 5)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        features = self.feature_layer(cls_output)
        features = self.dropout(features)
        
        return {
            'intent_logits': self.intent_head(features),
            'sector_logits': self.sector_head(features),
            'rating_logits': self.rating_head(features),
            'duration_logits': self.duration_head(features),
            'constraint_logits': self.constraint_head(features),
        }


# ==================== INFERENCE CLASS ====================

class QueryIntent(str, Enum):
    BUY_RECOMMENDATION = "buy_recommendation"
    SELL_RECOMMENDATION = "sell_recommendation"
    PORTFOLIO_ANALYSIS = "portfolio_analysis"
    REDUCE_DURATION = "reduce_duration"
    INCREASE_YIELD = "increase_yield"
    HEDGE_VOLATILITY = "hedge_volatility"
    SECTOR_REBALANCE = "sector_rebalance"
    BARBELL_STRATEGY = "barbell_strategy"
    SWITCH_BONDS = "switch_bonds"
    EXPLAIN_RECOMMENDATION = "explain_recommendation"
    MARKET_OUTLOOK = "market_outlook"
    CREDIT_ANALYSIS = "credit_analysis"
    FORECAST_PRICES = "forecast_prices"


@dataclass
class ClassificationResult:
    intent: str
    confidence: float
    filters: Dict[str, Any]
    constraints: Dict[str, bool]


class BondClassifier:
    """Production classifier"""
    
    def __init__(self, model_path: str):
        """
        Load model from Kaggle path
        
        Args:
            model_path: Path like '/kaggle/input/bond-classifier-v3/bond_classifier_v3'
        """
        print(f"Loading model from: {model_path}")
        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Load model
        self.model = ProductionBondClassifier()
        state_dict = torch.load(f"{model_path}/pytorch_model.bin", map_location=self.device)
        self.model.load_state_dict(state_dict)
        self.model.to(self.device)
        self.model.eval()
        
        # Label mappings
        self.intent_names = [
            'buy_recommendation', 'sell_recommendation', 'portfolio_analysis',
            'reduce_duration', 'increase_yield', 'hedge_volatility',
            'sector_rebalance', 'barbell_strategy', 'switch_bonds',
            'explain_recommendation', 'market_outlook', 'credit_analysis',
            'forecast_prices'
        ]
        
        self.sector_names = ['Sovereign', 'PSU Energy', 'Financial', 'Corporate', 
                            'Infrastructure', 'NBFC', 'Banking']
        self.rating_names = ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB', 'Unrated']
        self.duration_names = ['short', 'medium', 'long']
        
        print("✓ Model loaded successfully!\n")
    
    def classify(self, query: str, num_samples: int = 5) -> ClassificationResult:
        """
        Classify query with uncertainty estimation
        
        Args:
            query: User query string
            num_samples: Number of MC dropout samples
        """
        # Tokenize
        inputs = self.tokenizer(
            query,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=128
        ).to(self.device)
        
        # MC Dropout for uncertainty
        self.model.train()  # Enable dropout
        intent_predictions = []
        
        with torch.no_grad():
            for _ in range(num_samples):
                outputs = self.model(**inputs)
                intent_probs = F.softmax(outputs['intent_logits'], dim=-1)
                intent_predictions.append(intent_probs.cpu())
        
        # Calculate statistics
        intent_mean = torch.stack(intent_predictions).mean(dim=0)[0]
        confidence = intent_mean.max().item()
        predicted_idx = intent_mean.argmax().item()
        
        # Get final outputs
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(**inputs)
            
            # Extract all predictions
            sectors = self._extract_sectors(outputs['sector_logits'])
            rating = self._extract_rating(outputs['rating_logits'])
            duration = self._extract_duration(outputs['duration_logits'])
            constraints = self._extract_constraints(outputs['constraint_logits'])
        
        return ClassificationResult(
            intent=self.intent_names[predicted_idx],
            confidence=confidence,
            filters={
                'sectors': sectors,
                'min_rating': rating,
                'duration_preference': duration
            },
            constraints=constraints
        )
    
    def _extract_sectors(self, logits: torch.Tensor) -> List[str]:
        probs = torch.sigmoid(logits[0])
        indices = (probs > 0.5).nonzero(as_tuple=True)[0]
        return [self.sector_names[i] for i in indices]
    
    def _extract_rating(self, logits: torch.Tensor) -> Optional[str]:
        idx = logits[0].argmax().item()
        return self.rating_names[idx] if idx < 6 else None
    
    def _extract_duration(self, logits: torch.Tensor) -> str:
        idx = logits[0].argmax().item()
        return self.duration_names[idx]
    
    def _extract_constraints(self, logits: torch.Tensor) -> Dict[str, bool]:
        probs = torch.sigmoid(logits[0])
        return {
            'preserve_yield': probs[0].item() > 0.5,
            'maintain_liquidity': probs[1].item() > 0.5,
            'avoid_downgrades': probs[2].item() > 0.5,
            'sector_diversity': probs[3].item() > 0.5,
            'rating_above_aa': probs[4].item() > 0.5
        }
    
    def batch_classify(self, queries: List[str]) -> List[ClassificationResult]:
        """Classify multiple queries"""
        return [self.classify(q) for q in queries]


# ==================== USAGE EXAMPLE ====================

def test_classifier():
    """Test the classifier with sample queries"""
    
    # TODO: Update this path to match your Kaggle dataset
    # After training, add the output as a dataset, then use that path
    MODEL_PATH = '/kaggle/working/bond_classifier_v3'
    
    # Or if testing in the same notebook after training:
    # MODEL_PATH = '/kaggle/working/bond_classifier_v3'
    
    print("=" * 60)
    print("LOADING CLASSIFIER")
    print("=" * 60)
    
    try:
        classifier = BondClassifier(MODEL_PATH)
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        print("\nMake sure to:")
        print("1. Add training output as a dataset")
        print("2. Update MODEL_PATH above")
        return
    
    print("=" * 60)
    print("TESTING QUERIES")
    print("=" * 60)
    
    # Test queries
    test_queries = [
        # --- Category 1: Ambiguous Buy/Sell/Strategy ---
        "Should I shift my SDL holdings into shorter PSU bonds or just hold?",
        "I want to reduce rate risk but I don’t want my yield to fall. What should I do?",
        "Is it better to exit long-duration bonds and move into AA corporates?",
        "Would switching from NTPC 2035 to REC 2030 improve my return?",
        "Which reduces risk more: selling perpetuals or adding short-term G-Secs?",
    
        # --- Category 2: Disguised intents ---
        "My yield looks weak lately… what should I change?",
        "These long papers are stressing me out—what’s the safest move?",
        "My portfolio feels too boring. Suggest something with more kick.",
        "The curve is flattening; should I reposition?",
        "My advisor said I’m too exposed. What adjustments should I consider?",
    
        # --- Category 3: Multi-intent queries ---
        "Recommend high-yield PSU bonds and also check if any of my holdings should be sold.",
        "Reduce my duration and give alternatives with at least 7.5% yield.",
        "Analyze my portfolio and tell me which low-yield bonds I should switch.",
        "Before suggesting buys, what’s your outlook on corporate spreads?",
        "If I shift to shorter bonds, how will my yield be impacted?",
    
        # --- Category 4: Tricky phrasing for buy intent ---
        "Is NTPC 2033 attractive at current spreads?",
        "Are AA PSU bonds offering a good entry point?",
        "Is this a good time to accumulate SDLs?",
        "Should I start adding exposure to long maturities?",
        "Are there better alternatives to ICICI 2029 with similar risk?",
    
        # --- Category 5: Risk management intents ---
        "The market seems jumpy; how do I protect my portfolio?",
        "If RBI hikes unexpectedly, which holdings will get hit the most?",
        "Rate volatility worries me—how do I reduce the impact?",
        "How can I stabilize P&L swings in my portfolio?",
        "Should I rebalance sectors before the credit cycle weakens?",
    
        # --- Category 6: Very short hard queries ---
        "Duration too high?",
        "Better yield ideas?",
        "Switch or hold?",
        "Cut risk?",
        "Add PSU?",
    
        # --- Category 7: Contradictory constraints ---
        "Cut duration but don’t let yield drop below 7.6%.",
        "I want high yield but without taking credit risk.",
        "Reduce risk but avoid selling anything.",
        "Switch out of low-yield bonds but keep duration same.",
        "Increase return without increasing duration or credit risk.",
    
        # --- Category 8: Multi-sentence queries ---
        "My duration increased after the last purchases. I’m worried about hikes. Suggest adjustments that don’t hurt yield.",
        "My portfolio is mostly PSU and financials. Seems concentrated. Should I diversify into private corporates?",
        "I sold some long-term bonds last month. Now thinking of adding 3–5 year AA corporates. Any ideas?",
        "I expect inflation to cool. Should I increase duration a bit?",
        "Markets feel stable. Should I rotate sectors or focus on yield first?",
    
        # --- Category 9: Credit-analysis queries ---
        "How is the credit quality of PFC right now?",
        "Is REC fundamentally strong enough for long-term holding?",
        "What’s the default risk on NTPC?",
        "Should I worry about credit spreads widening?",
        "Are AA- names safe in this environment?",
    
        # --- Category 10: Forecast / outlook queries ---
        "Where do you see G-Sec yields in six months?",
        "If rates fall by 50 bps, what happens to long-duration PSU bonds?",
        "Will corporate spreads tighten this year?",
        "Predict the movement of the 10-year benchmark.",
        "How will a Fed cut affect Indian bond yields?",
    ]

    
    for query in test_queries:
        result = classifier.classify(query)
        
        print(f"\n{'='*60}")
        print(f"Query: {query}")
        print(f"{'='*60}")
        print(f"Intent: {result.intent}")
        print(f"Confidence: {result.confidence:.3f}")
        print(f"Sectors: {result.filters['sectors']}")
        print(f"Rating: {result.filters['min_rating']}")
        print(f"Duration: {result.filters['duration_preference']}")
        print(f"Constraints: {result.constraints}")
    
    print("\n" + "=" * 60)
    print("TESTING COMPLETE!")
    print("=" * 60)


# ==================== INTERACTIVE TESTING ====================

def interactive_test():
    """Interactive testing - type your own queries"""
    
    MODEL_PATH = '/kaggle/working/bond_classifier_v3'
    # Or: MODEL_PATH = '/kaggle/working/bond_classifier_v3'
    
    print("Loading classifier...")
    classifier = BondClassifier(MODEL_PATH)
    
    print("\n" + "=" * 60)
    print("INTERACTIVE MODE")
    print("=" * 60)
    print("Type your bond queries below. Type 'quit' to exit.\n")
    
    while True:
        query = input("Query: ").strip()
        
        if query.lower() in ['quit', 'exit', 'q']:
            break
        
        if not query:
            continue
        
        result = classifier.classify(query)
        
        print(f"\n  → Intent: {result.intent}")
        print(f"  → Confidence: {result.confidence:.3f}")
        print(f"  → Filters: {result.filters}")
        print()


# ==================== BATCH TESTING ====================

def batch_test_from_list():
    """Test a large batch of queries"""
    
    MODEL_PATH = '/kaggle/input/bond-classifier-output/bond_classifier_v3'
    
    print("Loading classifier...")
    classifier = BondClassifier(MODEL_PATH)
    
    # Large test set
    # queries = [
    #     "Find government bonds",
    #     "High yield corporate bonds",
    #     "Reduce duration risk",
    #     "Increase portfolio yield",
    #     "Diversify sector exposure",
    #     "Analyze my portfolio",
    #     "Switch from ICICI to HDFC bonds",
    #     "Should I sell in this market?",
    #     "What's the credit risk of NTPC?",
    #     "Forecast bond prices",
    #     "Explain your recommendation",
    #     "Create barbell strategy",
    #     "Hedge against inflation",
    # ]
    queries = [
        # --- Category 1: Ambiguous Buy/Sell/Strategy ---
        "Should I shift my SDL holdings into shorter PSU bonds or just hold?",
        "I want to reduce rate risk but I don’t want my yield to fall. What should I do?",
        "Is it better to exit long-duration bonds and move into AA corporates?",
        "Would switching from NTPC 2035 to REC 2030 improve my return?",
        "Which reduces risk more: selling perpetuals or adding short-term G-Secs?",
    
        # --- Category 2: Disguised intents ---
        "My yield looks weak lately… what should I change?",
        "These long papers are stressing me out—what’s the safest move?",
        "My portfolio feels too boring. Suggest something with more kick.",
        "The curve is flattening; should I reposition?",
        "My advisor said I’m too exposed. What adjustments should I consider?",
    
        # --- Category 3: Multi-intent queries ---
        "Recommend high-yield PSU bonds and also check if any of my holdings should be sold.",
        "Reduce my duration and give alternatives with at least 7.5% yield.",
        "Analyze my portfolio and tell me which low-yield bonds I should switch.",
        "Before suggesting buys, what’s your outlook on corporate spreads?",
        "If I shift to shorter bonds, how will my yield be impacted?",
    
        # --- Category 4: Tricky phrasing for buy intent ---
        "Is NTPC 2033 attractive at current spreads?",
        "Are AA PSU bonds offering a good entry point?",
        "Is this a good time to accumulate SDLs?",
        "Should I start adding exposure to long maturities?",
        "Are there better alternatives to ICICI 2029 with similar risk?",
    
        # --- Category 5: Risk management intents ---
        "The market seems jumpy; how do I protect my portfolio?",
        "If RBI hikes unexpectedly, which holdings will get hit the most?",
        "Rate volatility worries me—how do I reduce the impact?",
        "How can I stabilize P&L swings in my portfolio?",
        "Should I rebalance sectors before the credit cycle weakens?",
    
        # --- Category 6: Very short hard queries ---
        "Duration too high?",
        "Better yield ideas?",
        "Switch or hold?",
        "Cut risk?",
        "Add PSU?",
    
        # --- Category 7: Contradictory constraints ---
        "Cut duration but don’t let yield drop below 7.6%.",
        "I want high yield but without taking credit risk.",
        "Reduce risk but avoid selling anything.",
        "Switch out of low-yield bonds but keep duration same.",
        "Increase return without increasing duration or credit risk.",
    
        # --- Category 8: Multi-sentence queries ---
        "My duration increased after the last purchases. I’m worried about hikes. Suggest adjustments that don’t hurt yield.",
        "My portfolio is mostly PSU and financials. Seems concentrated. Should I diversify into private corporates?",
        "I sold some long-term bonds last month. Now thinking of adding 3–5 year AA corporates. Any ideas?",
        "I expect inflation to cool. Should I increase duration a bit?",
        "Markets feel stable. Should I rotate sectors or focus on yield first?",
    
        # --- Category 9: Credit-analysis queries ---
        "How is the credit quality of PFC right now?",
        "Is REC fundamentally strong enough for long-term holding?",
        "What’s the default risk on NTPC?",
        "Should I worry about credit spreads widening?",
        "Are AA- names safe in this environment?",
    
        # --- Category 10: Forecast / outlook queries ---
        "Where do you see G-Sec yields in six months?",
        "If rates fall by 50 bps, what happens to long-duration PSU bonds?",
        "Will corporate spreads tighten this year?",
        "Predict the movement of the 10-year benchmark.",
        "How will a Fed cut affect Indian bond yields?",
    ]


    
    print(f"\nTesting {len(queries)} queries...\n")
    
    results = classifier.batch_classify(queries)
    
    # Show summary
    intent_counts = {}
    for result in results:
        intent_counts[result.intent] = intent_counts.get(result.intent, 0) + 1
    
    print("=" * 60)
    print("RESULTS SUMMARY")
    print("=" * 60)
    
    for intent, count in sorted(intent_counts.items(), key=lambda x: -x[1]):
        print(f"{intent:30s}: {count:2d} queries")
    
    avg_confidence = sum(r.confidence for r in results) / len(results)
    print(f"\nAverage confidence: {avg_confidence:.3f}")


# ==================== RUN ====================

if __name__ == '__main__':
    print("\n" + "=" * 60)
    print("BOND QUERY CLASSIFIER - INFERENCE")
    print("=" * 60 + "\n")
    
    # Choose test mode:
    
    # Option 1: Automated test with sample queries
    test_classifier()
    
    # Option 2: Interactive mode (uncomment to use)
    # interactive_test()
    
    # Option 3: Batch testing (uncomment to use)
    # batch_test_from_list()

Installing dependencies...


ERROR: Operation cancelled by user


KeyboardInterrupt: 

## V2

In [19]:
# ===== GEMINI SETUP CELL =====
!pip install -q google-genai

import os

# Paste your key here (or use Kaggle secrets and set env there)
os.environ["GEMINI_API_KEY"] = "AIzaSyD-9r0N-YGJfL_hZR2elzwpc6m4f6PPb2Y"

print("GEMINI_API_KEY set:", "GEMINI_API_KEY" in os.environ)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


GEMINI_API_KEY set: True


In [3]:
"""
Bond Query Classifier - Complete Kaggle Notebook (with LLM data generation)
===========================================================================

Setup:
1. Create new Kaggle notebook
2. Enable GPU (T4 x2 or P100)
3. Enable Internet
4. Set OPENAI_API_KEY in Kaggle secrets / environment
5. Copy this entire file into a cell
6. Run!

Model will be saved to: /kaggle/working/bond_classifier_v3/
"""

# ==================== INSTALL DEPENDENCIES ====================
import sys
import subprocess

print("=" * 60)
print("INSTALLING DEPENDENCIES")
print("=" * 60)

# Install required packages (added openai)
subprocess.check_call([
    sys.executable, "-m", "pip", "install", "-q",
    "transformers", "accelerate", "scikit-learn", "openai"
])

print("✓ Dependencies installed!\n")


# ==================== IMPORTS ====================
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModel,
    get_cosine_schedule_with_warmup
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import random
from typing import List, Dict, Tuple, Any, Optional
from dataclasses import dataclass
from enum import Enum
from collections import deque
from pathlib import Path
from tqdm.auto import tqdm
import json
import re

from google import genai  # Gemini Flash client

# ==================== CHECK GPU ====================
print("=" * 60)
print("GPU CHECK")
print("=" * 60)

if torch.cuda.is_available():
    print(f"✓ GPU detected: {torch.cuda.get_device_name(0)}")
    print(f"✓ GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"✓ CUDA version: {torch.version.cuda}")
else:
    print("⚠ WARNING: No GPU detected! Training will be very slow.")
    print("   Go to Settings → Accelerator → Select GPU")

print()


# ==================== CONFIGURATION ====================
CONFIG = {
    'base_model': 'microsoft/deberta-v3-small',
    'num_samples_per_intent': 800,   # synthetic per intent (you can tune)
    'augmentation_factor': 0.5,      # 50% more data

    # --- NEW: LLM data generation config ---
    'use_llm_data': True,            # toggle this to disable LLM data
    'llm_samples_per_intent': 200,   # how many LLM examples per intent
    'llm_model': 'gemini-2.0-flash',  # or 'gemini-1.5-flash' if you prefer

    'llm_dataset_cache': '/kaggle/working/llm_intent_dataset.jsonl',
    'reuse_cached_llm_data': True,   # if cache exists, reuse instead of regenerating

    'batch_size': 32,
    'num_epochs': 4,
    'learning_rate': 2e-5,
    'warmup_ratio': 0.1,
    'max_length': 128,
    'output_dir': '/kaggle/working/bond_classifier_v3',
    'seed': 42
}

# Set seeds
random.seed(CONFIG['seed'])
np.random.seed(CONFIG['seed'])
torch.manual_seed(CONFIG['seed'])
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(CONFIG['seed'])


# ==================== ENUMS ====================

class QueryIntent(str, Enum):
    BUY_RECOMMENDATION = "buy_recommendation"
    SELL_RECOMMENDATION = "sell_recommendation"
    PORTFOLIO_ANALYSIS = "portfolio_analysis"
    REDUCE_DURATION = "reduce_duration"
    INCREASE_YIELD = "increase_yield"
    HEDGE_VOLATILITY = "hedge_volatility"
    SECTOR_REBALANCE = "sector_rebalance"
    BARBELL_STRATEGY = "barbell_strategy"
    SWITCH_BONDS = "switch_bonds"
    EXPLAIN_RECOMMENDATION = "explain_recommendation"
    MARKET_OUTLOOK = "market_outlook"
    CREDIT_ANALYSIS = "credit_analysis"
    FORECAST_PRICES = "forecast_prices"


# ==================== SYNTHETIC DATA GENERATION ====================

class SyntheticDataGenerator:
    """Generate synthetic training data from templates (unchanged)"""
    
    def __init__(self, num_samples_per_intent: int = 1000):
        self.num_samples_per_intent = num_samples_per_intent
        self.templates = self._create_templates()
        self.slot_values = self._create_slot_values()
    
    def _create_templates(self) -> Dict[str, List[str]]:
        return {
            'buy_recommendation': [
                "Find {rating} rated {sector} bonds with {duration} duration",
                "Suggest {bond_type} bonds in {sector} sector",
                "Looking for {sector} bonds with {rating} rating",
                "Show me {duration} {sector} bonds rated {rating}",
                "Need {investor_type} investments in {sector}",
                "Want to buy {rating} {sector} bonds",
                "Recommend bonds for {investment_purpose}",
                "Find liquid {sector} bonds rated {rating}",
            ],
            'reduce_duration': [
                "Reduce portfolio duration from {old_duration} to {new_duration} years",
                "Lower interest rate sensitivity",
                "Decrease duration while {constraint}",
                "Shorten bond maturities due to {reason}",
                "Need to reduce duration risk",
                "Switch to shorter duration bonds",
            ],
            'increase_yield': [
                "Improve portfolio yield from {current_yield}% to {target_yield}%",
                "Find higher yielding alternatives",
                "Boost returns while maintaining {constraint}",
                "Need better yield than {current_yield}%",
                "Increase yield without sacrificing {factor}",
                "Look for yield enhancement opportunities",
            ],
            'hedge_volatility': [
                "Hedge against {risk_type} volatility",
                "Protect portfolio from rate risk",
                "Build defensive position",
                "Reduce sensitivity to interest rates",
                "Immunize portfolio against volatility",
            ],
            'sector_rebalance': [
                "Reduce {sector1} from {current_pct}% to {target_pct}%",
                "Too much concentration in {sector}",
                "Diversify away from {sector}",
                "Rebalance sector exposure",
                "Shift from {sector1} to {sector2}",
            ],
            'portfolio_analysis': [
                "Analyze my bond portfolio",
                "Review my holdings",
                "What are the risks in my portfolio?",
                "Check duration and sector exposures",
                "Evaluate credit quality distribution",
                "Is my portfolio well diversified?",
            ],
            'switch_bonds': [
                "Replace {entity} with better alternative",
                "Switch from {bond1} to {bond2}",
                "Find substitute for {entity}",
                "Swap {entity} for higher quality bond",
            ],
            'sell_recommendation': [
                "Should I sell {entity} given {reason}?",
                "Exit {entity} position",
                "Which bonds should I liquidate?",
                "Recommend bonds to sell",
            ],
            'market_outlook': [
                "What's your view on bond markets?",
                "How will {event} impact bonds?",
                "Is this good time to invest in {sector}?",
                "What's the outlook for {market_segment}?",
            ],
            'credit_analysis': [
                "Analyze credit quality of {entity}",
                "What's the default risk for {entity}?",
                "Compare credit profiles of {entity1} vs {entity2}",
                "Is {entity} likely to be downgraded?",
            ],
            'barbell_strategy': [
                "Create barbell with {short_duration} and {long_duration} bonds",
                "Build short-long strategy in {sector}",
                "Implement barbell approach",
            ],
            'forecast_prices': [
                "Forecast {entity} price for next {period}",
                "What will {entity} trade at in {timeframe}?",
                "Predict returns for {sector} bonds",
            ],
            'explain_recommendation': [
                "Why did you suggest {action}?",
                "Explain rationale behind recommendation",
                "What factors led to this suggestion?",
            ]
        }
    
    def _create_slot_values(self) -> Dict[str, List[str]]:
        return {
            'rating': ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB'],
            'sector': ['Sovereign', 'PSU Energy', 'Financial', 'Corporate', 
                      'Infrastructure', 'NBFC', 'Banking'],
            'bond_type': ['G-Sec', 'Corporate', 'PSU', 'SDL'],
            'duration': ['short', 'medium', 'long', '1-3 year', '5-7 year'],
            'investor_type': ['conservative', 'moderate', 'aggressive'],
            'investment_purpose': ['wealth preservation', 'income generation'],
            'constraint': ['maintaining yield', 'keeping liquidity'],
            'reason': ['rate risk', 'credit concerns', 'rebalancing'],
            'risk_type': ['interest rate', 'credit', 'duration'],
            'entity': ['HDFC Bank', 'ICICI Bank', 'NTPC', 'PFC'],
            'factor': ['credit quality', 'liquidity'],
            'event': ['RBI policy', 'Budget', 'inflation data'],
            'market_segment': ['government bonds', 'corporate bonds'],
            'period': ['quarter', '6 months', 'year'],
            'timeframe': ['3 months', '6 months'],
            'action': ['buying', 'selling', 'switching']
        }
    
    def generate_dataset(self) -> List[Dict]:
        """Generate complete dataset"""
        dataset = []
        
        print("=" * 60)
        print("GENERATING SYNTHETIC DATA")
        print("=" * 60)
        
        for intent, templates in tqdm(self.templates.items(), desc="Generating synthetic"):
            for _ in range(self.num_samples_per_intent):
                template = random.choice(templates)
                query = self._fill_template(template)
                
                sample = {
                    'text': query,
                    'intent': intent,
                    'sectors': self._extract_sectors(query),
                    'rating': self._extract_rating(query),
                    'duration': self._extract_duration(query),
                    'constraints': self._extract_constraints(query)
                }
                dataset.append(sample)
        
        print(f"✓ Generated {len(dataset)} synthetic samples\n")
        return dataset
    
    def _fill_template(self, template: str) -> str:
        """Fill template with random values"""
        query = template
        
        for slot_type, values in self.slot_values.items():
            placeholder = f"{{{slot_type}}}"
            if placeholder in query:
                query = query.replace(placeholder, random.choice(values))
        
        # Fill numeric placeholders
        query = re.sub(r'\{old_duration\}', str(random.randint(5, 10)), query)
        query = re.sub(r'\{new_duration\}', str(random.randint(2, 5)), query)
        query = re.sub(r'\{current_yield\}', f"{random.uniform(6.0, 7.5):.1f}", query)
        query = re.sub(r'\{target_yield\}', f"{random.uniform(7.5, 9.0):.1f}", query)
        query = re.sub(r'\{current_pct\}', str(random.randint(30, 50)), query)
        query = re.sub(r'\{target_pct\}', str(random.randint(15, 25)), query)
        
        # Fill remaining
        query = re.sub(r'\{[^}]+\}', 'bonds', query)
        return query
    
    def _extract_sectors(self, query: str) -> List[str]:
        sectors = []
        for sector in ['Sovereign', 'PSU Energy', 'Financial', 'Corporate']:
            if sector.lower() in query.lower():
                sectors.append(sector)
        return sectors
    
    def _extract_rating(self, query: str) -> Optional[str]:
        for rating in ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB']:
            if rating in query:
                return rating
        return None
    
    def _extract_duration(self, query: str) -> str:
        query_lower = query.lower()
        if any(kw in query_lower for kw in ['short', '1-3']):
            return 'short'
        elif any(kw in query_lower for kw in ['long', '7-10']):
            return 'long'
        return 'medium'
    
    def _extract_constraints(self, query: str) -> Dict[str, bool]:
        query_lower = query.lower()
        return {
            'preserve_yield': 'maintain' in query_lower and 'yield' in query_lower,
            'maintain_liquidity': 'liquid' in query_lower,
            'avoid_downgrades': 'credit' in query_lower,
            'sector_diversity': 'divers' in query_lower,
            'rating_above_aa': 'AAA' in query or 'AA' in query
        }


# ==================== DATA AUGMENTATION ====================

class DataAugmenter:
    """Augment data with cheap text variations"""
    
    @staticmethod
    def augment_dataset(dataset: List[Dict], factor: float = 0.5) -> List[Dict]:
        """Augment dataset"""
        num_to_augment = int(len(dataset) * factor)
        samples = random.sample(dataset, min(num_to_augment, len(dataset)))
        
        print("=" * 60)
        print("AUGMENTING DATA")
        print("=" * 60)
        
        augmented = []
        for sample in tqdm(samples, desc="Augmenting"):
            aug_type = random.choice(['synonym', 'insertion', 'deletion'])
            
            if aug_type == 'synonym':
                augmented.append(DataAugmenter._synonym_replacement(sample))
            elif aug_type == 'insertion':
                augmented.append(DataAugmenter._random_insertion(sample))
            else:
                augmented.append(DataAugmenter._random_deletion(sample))
        
        print(f"✓ Added {len(augmented)} augmented samples\n")
        return dataset + augmented
    
    @staticmethod
    def _synonym_replacement(sample: Dict) -> Dict:
        synonyms = {
            'find': ['locate', 'search for', 'look for'],
            'bonds': ['securities', 'instruments'],
            'high': ['elevated', 'strong'],
            'yield': ['return', 'interest'],
        }
        
        words = sample['text'].split()
        for i, word in enumerate(words):
            if word.lower() in synonyms and random.random() < 0.3:
                words[i] = random.choice(synonyms[word.lower()])
        
        new_sample = sample.copy()
        new_sample['text'] = ' '.join(words)
        return new_sample
    
    @staticmethod
    def _random_insertion(sample: Dict) -> Dict:
        words = sample['text'].split()
        if len(words) > 3:
            pos = random.randint(0, len(words))
            words.insert(pos, random.choice(['please', 'kindly', 'also']))
        
        new_sample = sample.copy()
        new_sample['text'] = ' '.join(words)
        return new_sample
    
    @staticmethod
    def _random_deletion(sample: Dict) -> Dict:
        words = sample['text'].split()
        words = [w for w in words if w.lower() not in ['please', 'kindly'] or random.random() > 0.5]
        
        new_sample = sample.copy()
        new_sample['text'] = ' '.join(words) if words else sample['text']
        return new_sample


# ==================== NEW: LLM DATA GENERATOR ====================

class LLMIntentDataGenerator:
    """
    Use a Gemini Flash model to generate realistic labelled queries per intent.
    Requires GEMINI_API_KEY to be set in environment.
    """
    def __init__(
        self,
        model_name: str,
        samples_per_intent: int = 200,
        max_per_call: int = 20
    ):
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            raise RuntimeError(
                "GEMINI_API_KEY not set. "
                "Set it in an earlier cell with os.environ['GEMINI_API_KEY'] = '...'"
            )

        # Gemini client
        self.client = genai.Client(api_key=api_key)
        self.model_name = model_name
        self.samples_per_intent = samples_per_intent
        self.max_per_call = max_per_call

        # For metadata validation
        self.allowed_sectors = [
            'Sovereign', 'PSU Energy', 'Financial',
            'Corporate', 'Infrastructure', 'NBFC', 'Banking'
        ]
        self.allowed_ratings = ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB']
        self.allowed_durations = ['short', 'medium', 'long']

        # Human description per intent to guide the LLM
        self.intent_descriptions = {
            "buy_recommendation": "User wants recommendations of which bonds to buy, often with preferences about sector, rating, duration or yield.",
            "sell_recommendation": "User wants to know what to sell or whether to exit certain bonds or positions.",
            "portfolio_analysis": "User wants analysis or diagnosis of their current portfolio, exposures, risks and diversification.",
            "reduce_duration": "User wants to reduce interest rate risk or duration of their bond portfolio.",
            "increase_yield": "User wants to increase portfolio yield or returns, often by moving to higher-yielding bonds.",
            "hedge_volatility": "User wants to hedge or reduce the impact of rate or price volatility.",
            "sector_rebalance": "User wants to rebalance sector allocation or reduce concentration in particular sectors.",
            "barbell_strategy": "User wants a barbell strategy (mix of short and long duration bonds).",
            "switch_bonds": "User wants to switch from one bond or issuer to another similar bond.",
            "explain_recommendation": "User wants explanation or rationale for a previous recommendation or trade idea.",
            "market_outlook": "User wants outlook on bond markets, yields, interest rates or spreads.",
            "credit_analysis": "User wants analysis of credit quality, default risk or rating outlook.",
            "forecast_prices": "User wants explicit forecasts of future bond prices or yields."
        }

    def generate_dataset(self) -> List[Dict]:
        """Generate LLM-labelled dataset across all intents."""
        print("=" * 60)
        print("GENERATING LLM DATA (Gemini)")
        print("=" * 60)

        all_samples: List[Dict] = []

        for intent in QueryIntent:
            intent_name = intent.value
            needed = self.samples_per_intent
            print(f"\n→ Generating LLM data for intent: {intent_name} ({needed} samples)")
            while needed > 0:
                batch_size = min(self.max_per_call, needed)
                batch = self._generate_batch_for_intent(intent_name, batch_size)
                all_samples.extend(batch)
                needed -= len(batch)
                print(
                    f"  Collected {self.samples_per_intent - needed}/"
                    f"{self.samples_per_intent} for {intent_name}"
                )

        print(f"\n✓ Generated {len(all_samples)} LLM-labelled samples\n")
        return all_samples

    def _generate_batch_for_intent(self, intent_name: str, batch_size: int) -> List[Dict]:
        """Ask Gemini for a small batch of JSONL examples for a single intent."""
        description = self.intent_descriptions[intent_name]

        system_msg = (
            "You are an expert bond investment assistant generating synthetic "
            "training data for supervised learning. You must produce realistic "
            "user queries for a bond assistant."
        )

        user_prompt = f"""
Generate {batch_size} diverse user queries whose PRIMARY intent is: "{intent_name}".

Intent description:
{description}

For EACH example, output ONE JSON object on its own line (JSON Lines format).
Do NOT wrap the objects in an array. Do NOT add any commentary or code fences.

Each JSON object MUST have exactly these keys:
- "text": (string) realistic user query in natural language
- "intent": (string) MUST be exactly "{intent_name}"
- "sectors": (array of strings) zero or more from this list ONLY:
  ["Sovereign", "PSU Energy", "Financial", "Corporate", "Infrastructure", "NBFC", "Banking"]
- "rating": (string or null) one of ["AAA","AA+","AA","A+","A","BBB"] or null
- "duration": (string) one of ["short","medium","long"]
- "constraints": (object) with boolean fields:
  {{
    "preserve_yield": <true/false>,
    "maintain_liquidity": <true/false>,
    "avoid_downgrades": <true/false>,
    "sector_diversity": <true/false>,
    "rating_above_aa": <true/false>
  }}

Guidelines:
- Mix short queries and longer multi-sentence queries.
- Vary tone: retail investor, professional PM, brief, descriptive, etc.
- Do NOT leak these guidelines into the "text" field.
- Keep everything realistic and specific to bonds (NOT equities or crypto).
"""

        # Simple single-text prompt is enough for Gemini
        full_prompt = system_msg + "\n\n" + user_prompt

        resp = self.client.models.generate_content(
            model=self.model_name,
            contents=full_prompt,
        )

        content = resp.text or ""
        return self._parse_jsonl(content, intent_name)

    def _parse_jsonl(self, raw: str, intent_name: str) -> List[Dict]:
        """Parse JSON-lines text returned by the LLM."""
        samples: List[Dict] = []
        for line in raw.splitlines():
            line = line.strip()
            if not line:
                continue
            # Strip bullets if Gemini gets chatty
            if line.startswith("```"):
                continue
            if line.startswith("-"):
                line = line.lstrip("-").strip()
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue  # skip bad lines

            sample = self._normalize_record(obj, intent_name)
            if sample is not None:
                samples.append(sample)
        return samples

    def _normalize_record(self, obj: Dict[str, Any], intent_name: str) -> Optional[Dict]:
        """Clean up and validate a single sample from the LLM."""
        text = str(obj.get("text", "")).strip()
        if not text:
            return None

        # Force our canonical intent name
        intent = obj.get("intent", intent_name)
        if intent != intent_name:
            intent = intent_name

        # Sectors
        raw_sectors = obj.get("sectors") or []
        sectors: List[str] = []
        if isinstance(raw_sectors, list):
            for s in raw_sectors:
                s_str = str(s).strip()
                if s_str in self.allowed_sectors and s_str not in sectors:
                    sectors.append(s_str)

        # Rating
        rating = obj.get("rating")
        if rating is None:
            rating_norm = None
        else:
            rating_str = str(rating).strip()
            rating_norm = rating_str if rating_str in self.allowed_ratings else None

        # Duration
        duration = str(obj.get("duration", "medium")).strip().lower()
        if duration not in self.allowed_durations:
            duration = "medium"

        # Constraints
        raw_constraints = obj.get("constraints") or {}
        constraints_defaults = {
            "preserve_yield": False,
            "maintain_liquidity": False,
            "avoid_downgrades": False,
            "sector_diversity": False,
            "rating_above_aa": False,
        }
        if isinstance(raw_constraints, dict):
            for k in list(constraints_defaults.keys()):
                if k in raw_constraints:
                    constraints_defaults[k] = bool(raw_constraints[k])

        # If rating is high, we can set rating_above_aa = True
        if rating_norm in ("AAA", "AA+", "AA"):
            constraints_defaults["rating_above_aa"] = True

        return {
            "text": text,
            "intent": intent,
            "sectors": sectors,
            "rating": rating_norm,
            "duration": duration,
            "constraints": constraints_defaults,
        }


# ==================== PYTORCH DATASET ====================

class BondQueryDataset(Dataset):
    """PyTorch Dataset"""
    
    def __init__(self, data: List[Dict], tokenizer, max_length: int = 128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        self.intent_to_id = {
            'buy_recommendation': 0, 'sell_recommendation': 1,
            'portfolio_analysis': 2, 'reduce_duration': 3,
            'increase_yield': 4, 'hedge_volatility': 5,
            'sector_rebalance': 6, 'barbell_strategy': 7,
            'switch_bonds': 8, 'explain_recommendation': 9,
            'market_outlook': 10, 'credit_analysis': 11,
            'forecast_prices': 12
        }
        
        self.sector_to_id = {
            'Sovereign': 0, 'PSU Energy': 1, 'Financial': 2,
            'Corporate': 3, 'Infrastructure': 4, 'NBFC': 5, 'Banking': 6
        }
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        sample = self.data[idx]
        
        encoding = self.tokenizer(
            sample['text'],
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        intent_label = self.intent_to_id[sample['intent']]
        
        sector_labels = torch.zeros(len(self.sector_to_id))
        for sector in sample.get('sectors', []):
            if sector in self.sector_to_id:
                sector_labels[self.sector_to_id[sector]] = 1
        
        rating = sample.get('rating')
        rating_map = {'AAA': 0, 'AA+': 1, 'AA': 2, 'A+': 3, 'A': 4, 'BBB': 5}
        rating_label = rating_map.get(rating, 6)
        
        duration_map = {'short': 0, 'medium': 1, 'long': 2}
        duration_label = duration_map.get(sample.get('duration', 'medium'), 1)
        
        constraints = sample.get('constraints', {})
        constraint_labels = torch.tensor([
            float(constraints.get('preserve_yield', False)),
            float(constraints.get('maintain_liquidity', False)),
            float(constraints.get('avoid_downgrades', False)),
            float(constraints.get('sector_diversity', False)),
            float(constraints.get('rating_above_aa', False))
        ])
        
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'intent_label': torch.tensor(intent_label),
            'sector_labels': sector_labels,
            'rating_label': torch.tensor(rating_label),
            'duration_label': torch.tensor(duration_label),
            'constraint_labels': constraint_labels
        }


# ==================== MODEL ====================

class ProductionBondClassifier(nn.Module):
    """Multi-task classifier"""
    
    def __init__(self, base_model: str = 'distilbert-base-uncased', dropout: float = 0.15):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(base_model)
        hidden_size = self.bert.config.hidden_size
        
        self.feature_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout),
            nn.GELU()
        )
        
        feature_size = hidden_size // 2
        
        self.intent_head = nn.Linear(feature_size, 13)
        self.sector_head = nn.Linear(feature_size, 7)
        self.rating_head = nn.Linear(feature_size, 7)
        self.duration_head = nn.Linear(feature_size, 3)
        self.constraint_head = nn.Linear(feature_size, 5)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        features = self.feature_layer(cls_output)
        features = self.dropout(features)
        
        return {
            'intent_logits': self.intent_head(features),
            'sector_logits': self.sector_head(features),
            'rating_logits': self.rating_head(features),
            'duration_logits': self.duration_head(features),
            'constraint_logits': self.constraint_head(features),
        }


# ==================== LOSS ====================

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
    
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()


class MultiTaskLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.intent_loss_fn = FocalLoss(gamma=2.0)
        self.sector_loss_fn = nn.BCEWithLogitsLoss()
        self.rating_loss_fn = nn.CrossEntropyLoss()
        self.duration_loss_fn = nn.CrossEntropyLoss()
        self.constraint_loss_fn = nn.BCEWithLogitsLoss()
    
    def forward(self, outputs, labels):
        intent_loss = self.intent_loss_fn(outputs['intent_logits'], labels['intent_label'])
        sector_loss = self.sector_loss_fn(outputs['sector_logits'], labels['sector_labels'])
        rating_loss = self.rating_loss_fn(outputs['rating_logits'], labels['rating_label'])
        duration_loss = self.duration_loss_fn(outputs['duration_logits'], labels['duration_label'])
        constraint_loss = self.constraint_loss_fn(outputs['constraint_logits'], labels['constraint_labels'])
        
        total = intent_loss + 0.5*sector_loss + 0.3*rating_loss + 0.3*duration_loss + 0.4*constraint_loss
        
        return {'total': total, 'intent': intent_loss}


# ==================== TRAINER ====================

class Trainer:
    def __init__(self, model, train_loader, val_loader, optimizer, scheduler, criterion, device, output_dir):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.criterion = criterion
        self.device = device
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.best_acc = 0.0
    
    def train_epoch(self, epoch):
        self.model.train()
        total_loss = 0
        all_preds, all_labels = [], []
        
        pbar = tqdm(self.train_loader, desc=f'Epoch {epoch}')
        for batch in pbar:
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            
            labels = {k: v.to(self.device) for k, v in batch.items() 
                     if k not in ['input_ids', 'attention_mask']}
            
            self.optimizer.zero_grad()
            outputs = self.model(input_ids, attention_mask)
            loss_dict = self.criterion(outputs, labels)
            loss = loss_dict['total']
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
            self.optimizer.step()
            self.scheduler.step()
            
            total_loss += loss.item()
            preds = outputs['intent_logits'].argmax(dim=-1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels['intent_label'].cpu().numpy())
            
            pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{accuracy_score(all_labels[-len(preds):], preds):.3f}'
            })
        
        return total_loss / len(self.train_loader)
    
    def evaluate(self):
        self.model.eval()
        total_loss = 0
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc='Evaluating'):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = {k: v.to(self.device) for k, v in batch.items() 
                         if k not in ['input_ids', 'attention_mask']}
                
                outputs = self.model(input_ids, attention_mask)
                loss_dict = self.criterion(outputs, labels)
                total_loss += loss_dict['total'].item()
                
                preds = outputs['intent_logits'].argmax(dim=-1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels['intent_label'].cpu().numpy())
        
        return {
            'loss': total_loss / len(self.val_loader),
            'accuracy': accuracy_score(all_labels, all_preds),
            'f1_macro': f1_score(all_labels, all_preds, average='macro')
        }
    
    def save_checkpoint(self, epoch, metrics):
        if metrics['accuracy'] > self.best_acc:
            self.best_acc = metrics['accuracy']
            torch.save(self.model.state_dict(), self.output_dir / 'pytorch_model.bin')
            print(f"✓ New best model saved (acc: {metrics['accuracy']:.4f})")
    
    def train(self, num_epochs):
        print("=" * 60)
        print("TRAINING")
        print("=" * 60)
        
        for epoch in range(1, num_epochs + 1):
            train_loss = self.train_epoch(epoch)
            print(f"\nEpoch {epoch}/{num_epochs} - Train Loss: {train_loss:.4f}")
            
            val_metrics = self.evaluate()
            print(f"Val Loss: {val_metrics['loss']:.4f}")
            print(f"Val Accuracy: {val_metrics['accuracy']:.4f}")
            print(f"Val F1: {val_metrics['f1_macro']:.4f}\n")
            
            self.save_checkpoint(epoch, val_metrics)
        
        print(f"Training complete! Best accuracy: {self.best_acc:.4f}\n")


# ==================== MAIN TRAINING FUNCTION ====================

def train_model():
    """Main training function"""
    
    # ---- 1) Synthetic data ----
    synth_gen = SyntheticDataGenerator(CONFIG['num_samples_per_intent'])
    synthetic_dataset = synth_gen.generate_dataset()
    
    # ---- 2) LLM data (optional) ----
    llm_dataset: List[Dict] = []
    if CONFIG.get('use_llm_data', False):
        cache_path = Path(CONFIG['llm_dataset_cache'])
        if CONFIG.get('reuse_cached_llm_data', True) and cache_path.exists():
            print("=" * 60)
            print("LOADING CACHED LLM DATA")
            print("=" * 60)
            with cache_path.open("r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        llm_dataset.append(json.loads(line))
                    except json.JSONDecodeError:
                        continue
            print(f"✓ Loaded {len(llm_dataset)} LLM samples from cache\n")
        else:
            llm_gen = LLMIntentDataGenerator(
                model_name=CONFIG['llm_model'],
                samples_per_intent=CONFIG['llm_samples_per_intent']
            )
            llm_dataset = llm_gen.generate_dataset()
            cache_path.parent.mkdir(parents=True, exist_ok=True)
            print(f"Saving LLM dataset to: {cache_path}")
            with cache_path.open("w", encoding="utf-8") as f:
                for s in llm_dataset:
                    f.write(json.dumps(s, ensure_ascii=False) + "\n")
            print("✓ LLM dataset cached\n")
    
    # ---- 3) Merge datasets ----
    dataset = synthetic_dataset + llm_dataset
    print(f"Total base dataset size (synthetic + LLM): {len(dataset)} samples\n")
    
    # ---- 4) Augment ----
    dataset = DataAugmenter.augment_dataset(dataset, CONFIG['augmentation_factor'])
    print(f"Total dataset size after augmentation: {len(dataset)} samples\n")
    
    # ---- 5) Split ----
    print("=" * 60)
    print("SPLITTING DATA")
    print("=" * 60)
    intents = [s['intent'] for s in dataset]
    train_data, temp_data = train_test_split(dataset, test_size=0.3, stratify=intents, random_state=42)
    temp_intents = [s['intent'] for s in temp_data]
    val_data, test_data = train_test_split(temp_data, test_size=0.5, stratify=temp_intents, random_state=42)
    
    print(f"Train: {len(train_data)}")
    print(f"Val:   {len(val_data)}")
    print(f"Test:  {len(test_data)}\n")
    
    # ---- 6) Tokenizer & Datasets ----
    print("=" * 60)
    print("LOADING MODEL & TOKENIZER")
    print("=" * 60)
    tokenizer = AutoTokenizer.from_pretrained(CONFIG['base_model'])
    
    train_dataset = BondQueryDataset(train_data, tokenizer, CONFIG['max_length'])
    val_dataset = BondQueryDataset(val_data, tokenizer, CONFIG['max_length'])
    test_dataset = BondQueryDataset(test_data, tokenizer, CONFIG['max_length'])
    
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'])
    
    # ---- 7) Model ----
    model = ProductionBondClassifier(CONFIG['base_model'])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    print(f"✓ Model loaded: {sum(p.numel() for p in model.parameters()):,} parameters")
    print(f"✓ Device: {device}\n")
    
    # ---- 8) Optimizer & Scheduler ----
    optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['learning_rate'], weight_decay=0.01)
    num_training_steps = len(train_loader) * CONFIG['num_epochs']
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(num_training_steps * CONFIG['warmup_ratio']),
        num_training_steps=num_training_steps
    )
    
    criterion = MultiTaskLoss()
    
    # ---- 9) Train ----
    trainer = Trainer(model, train_loader, val_loader, optimizer, scheduler, 
                     criterion, device, CONFIG['output_dir'])
    trainer.train(CONFIG['num_epochs'])
    
    # ---- 10) Save tokenizer ----
    tokenizer.save_pretrained(CONFIG['output_dir'])
    
    # ---- 11) Final test ----
    print("=" * 60)
    print("FINAL TEST")
    print("=" * 60)
    model.eval()
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Testing'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            outputs = model(input_ids, attention_mask)
            preds = outputs['intent_logits'].argmax(dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['intent_label'].numpy())
    
    test_acc = accuracy_score(all_labels, all_preds)
    test_f1 = f1_score(all_labels, all_preds, average='macro')
    
    print(f"\n✓ Test Accuracy: {test_acc:.4f}")
    print(f"✓ Test F1 Macro: {test_f1:.4f}\n")
    
    print("=" * 60)
    print("TRAINING COMPLETE!")
    print("=" * 60)
    print(f"\n✓ Model saved to: {CONFIG['output_dir']}")
    print(f"✓ Files: pytorch_model.bin, tokenizer files")
    print("\nTo download:")
    print("  1. Go to Output tab")
    print("  2. Download 'bond_classifier_v3' folder")
    print("  3. Use locally with your inference notebook")


# ==================== RUN ====================

if __name__ == '__main__':
    train_model()


INSTALLING DEPENDENCIES
✓ Dependencies installed!

GPU CHECK
✓ GPU detected: Tesla T4
✓ GPU memory: 15.83 GB
✓ CUDA version: 12.4

GENERATING SYNTHETIC DATA


Generating synthetic:   0%|          | 0/13 [00:00<?, ?it/s]

✓ Generated 10400 synthetic samples

GENERATING LLM DATA (Gemini)

→ Generating LLM data for intent: buy_recommendation (200 samples)
  Collected 20/200 for buy_recommendation
  Collected 40/200 for buy_recommendation
  Collected 60/200 for buy_recommendation
  Collected 80/200 for buy_recommendation
  Collected 100/200 for buy_recommendation
  Collected 120/200 for buy_recommendation
  Collected 140/200 for buy_recommendation
  Collected 160/200 for buy_recommendation
  Collected 180/200 for buy_recommendation
  Collected 200/200 for buy_recommendation

→ Generating LLM data for intent: sell_recommendation (200 samples)
  Collected 20/200 for sell_recommendation
  Collected 40/200 for sell_recommendation
  Collected 60/200 for sell_recommendation
  Collected 80/200 for sell_recommendation
  Collected 100/200 for sell_recommendation
  Collected 120/200 for sell_recommendation
  Collected 140/200 for sell_recommendation
  Collected 160/200 for sell_recommendation
  Collected 180/200 for

Augmenting:   0%|          | 0/6500 [00:00<?, ?it/s]

✓ Added 6500 augmented samples

Total dataset size after augmentation: 19500 samples

SPLITTING DATA
Train: 13650
Val:   2925
Test:  2925

LOADING MODEL & TOKENIZER


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

2025-11-30 07:31:20.989825: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764487881.193815      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764487881.251439      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

pytorch_model.bin:   0%|          | 0.00/286M [00:00<?, ?B/s]

✓ Model loaded: 142,205,987 parameters
✓ Device: cuda

TRAINING


Epoch 1:   0%|          | 0/427 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/286M [00:00<?, ?B/s]


Epoch 1/4 - Train Loss: 1.5893


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.4190
Val Accuracy: 0.9942
Val F1: 0.9942

✓ New best model saved (acc: 0.9942)


Epoch 2:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 2/4 - Train Loss: 0.3604


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.2936
Val Accuracy: 0.9925
Val F1: 0.9925



Epoch 3:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 3/4 - Train Loss: 0.2755


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.2490
Val Accuracy: 0.9959
Val F1: 0.9959

✓ New best model saved (acc: 0.9959)


Epoch 4:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 4/4 - Train Loss: 0.2502


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.2409
Val Accuracy: 0.9945
Val F1: 0.9945

Training complete! Best accuracy: 0.9959

FINAL TEST


Testing:   0%|          | 0/92 [00:00<?, ?it/s]


✓ Test Accuracy: 0.9945
✓ Test F1 Macro: 0.9945

TRAINING COMPLETE!

✓ Model saved to: /kaggle/working/bond_classifier_v3
✓ Files: pytorch_model.bin, tokenizer files

To download:
  1. Go to Output tab
  2. Download 'bond_classifier_v3' folder
  3. Use locally with your inference notebook


In [12]:
# ==================== CONFIGURATION ====================
CONFIG = {
    'base_model': 'microsoft/deberta-v3-small',
    'num_samples_per_intent': 800,   # synthetic per intent (you can tune)
    'augmentation_factor': 0.5,      # 50% more data

    # --- NEW: LLM data generation config ---
    'use_llm_data': True,            # toggle this to disable LLM data
    'llm_samples_per_intent': 200,   # how many LLM examples per intent
    'llm_model': 'gemini-2.0-flash',  # or 'gemini-1.5-flash' if you prefer

    'llm_dataset_cache': '/kaggle/working/llm_intent_dataset.jsonl',
    'reuse_cached_llm_data': True,   # if cache exists, reuse instead of regenerating

    'batch_size': 32,
    'num_epochs': 10,
    'learning_rate': 2e-5,
    'warmup_ratio': 0.1,
    'max_length': 128,
    'output_dir': '/kaggle/working/bond_classifier_v3',
    'seed': 42
}

# ==================== MODEL ====================

class ProductionBondClassifier(nn.Module):
    """Multi-task classifier"""
    
    def __init__(self, base_model: str = 'distilbert-base-uncased', dropout: float = 0.15):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(base_model)
        hidden_size = self.bert.config.hidden_size
        
        self.feature_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout),
            nn.GELU()
        )
        
        feature_size = hidden_size // 2
        
        self.intent_head = nn.Linear(feature_size, 13)
        self.sector_head = nn.Linear(feature_size, 7)
        self.rating_head = nn.Linear(feature_size, 7)
        self.duration_head = nn.Linear(feature_size, 3)
        self.constraint_head = nn.Linear(feature_size, 5)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        features = self.feature_layer(cls_output)
        features = self.dropout(features)
        
        return {
            'intent_logits': self.intent_head(features),
            'sector_logits': self.sector_head(features),
            'rating_logits': self.rating_head(features),
            'duration_logits': self.duration_head(features),
            'constraint_logits': self.constraint_head(features),
        }


# ==================== LOSS ====================

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
    
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()


class MultiTaskLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.intent_loss_fn = FocalLoss(gamma=2.0)
        self.sector_loss_fn = nn.BCEWithLogitsLoss()
        self.rating_loss_fn = nn.CrossEntropyLoss()
        self.duration_loss_fn = nn.CrossEntropyLoss()
        self.constraint_loss_fn = nn.BCEWithLogitsLoss()
    
    def forward(self, outputs, labels):
        intent_loss = self.intent_loss_fn(outputs['intent_logits'], labels['intent_label'])
        sector_loss = self.sector_loss_fn(outputs['sector_logits'], labels['sector_labels'])
        rating_loss = self.rating_loss_fn(outputs['rating_logits'], labels['rating_label'])
        duration_loss = self.duration_loss_fn(outputs['duration_logits'], labels['duration_label'])
        constraint_loss = self.constraint_loss_fn(outputs['constraint_logits'], labels['constraint_labels'])
        
        total = intent_loss + 0.5*sector_loss + 0.3*rating_loss + 0.3*duration_loss + 0.4*constraint_loss
        
        return {'total': total, 'intent': intent_loss}


# ==================== TRAINER ====================

class Trainer:
    def __init__(self, model, train_loader, val_loader, optimizer, scheduler, criterion, device, output_dir):
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.criterion = criterion
        self.device = device
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.best_acc = 0.0
    
    def train_epoch(self, epoch):
        self.model.train()
        total_loss = 0
        all_preds, all_labels = [], []
        
        pbar = tqdm(self.train_loader, desc=f'Epoch {epoch}')
        for batch in pbar:
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            
            labels = {k: v.to(self.device) for k, v in batch.items() 
                     if k not in ['input_ids', 'attention_mask']}
            
            self.optimizer.zero_grad()
            outputs = self.model(input_ids, attention_mask)
            loss_dict = self.criterion(outputs, labels)
            loss = loss_dict['total']
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
            self.optimizer.step()
            self.scheduler.step()
            
            total_loss += loss.item()
            preds = outputs['intent_logits'].argmax(dim=-1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels['intent_label'].cpu().numpy())
            
            pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{accuracy_score(all_labels[-len(preds):], preds):.3f}'
            })
        
        return total_loss / len(self.train_loader)
    
    def evaluate(self):
        self.model.eval()
        total_loss = 0
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc='Evaluating'):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = {k: v.to(self.device) for k, v in batch.items() 
                         if k not in ['input_ids', 'attention_mask']}
                
                outputs = self.model(input_ids, attention_mask)
                loss_dict = self.criterion(outputs, labels)
                total_loss += loss_dict['total'].item()
                
                preds = outputs['intent_logits'].argmax(dim=-1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels['intent_label'].cpu().numpy())
        
        return {
            'loss': total_loss / len(self.val_loader),
            'accuracy': accuracy_score(all_labels, all_preds),
            'f1_macro': f1_score(all_labels, all_preds, average='macro')
        }
    
    def save_checkpoint(self, epoch, metrics):
        if metrics['accuracy'] > self.best_acc:
            self.best_acc = metrics['accuracy']
            torch.save(self.model.state_dict(), self.output_dir / 'pytorch_model.bin')
            print(f"✓ New best model saved (acc: {metrics['accuracy']:.4f})")
    
    def train(self, num_epochs):
        print("=" * 60)
        print("TRAINING")
        print("=" * 60)
        
        for epoch in range(1, num_epochs + 1):
            train_loss = self.train_epoch(epoch)
            print(f"\nEpoch {epoch}/{num_epochs} - Train Loss: {train_loss:.4f}")
            
            val_metrics = self.evaluate()
            print(f"Val Loss: {val_metrics['loss']:.4f}")
            print(f"Val Accuracy: {val_metrics['accuracy']:.4f}")
            print(f"Val F1: {val_metrics['f1_macro']:.4f}\n")
            
            self.save_checkpoint(epoch, val_metrics)
        
        print(f"Training complete! Best accuracy: {self.best_acc:.4f}\n")


# ==================== MAIN TRAINING FUNCTION ====================

def train_model():
    """Main training function"""
    
    # ---- 1) Synthetic data ----
    synth_gen = SyntheticDataGenerator(CONFIG['num_samples_per_intent'])
    synthetic_dataset = synth_gen.generate_dataset()
    
    # ---- 2) LLM data (optional) ----
    llm_dataset: List[Dict] = []
    if CONFIG.get('use_llm_data', False):
        cache_path = Path(CONFIG['llm_dataset_cache'])
        if CONFIG.get('reuse_cached_llm_data', True) and cache_path.exists():
            print("=" * 60)
            print("LOADING CACHED LLM DATA")
            print("=" * 60)
            with cache_path.open("r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        llm_dataset.append(json.loads(line))
                    except json.JSONDecodeError:
                        continue
            print(f"✓ Loaded {len(llm_dataset)} LLM samples from cache\n")
        else:
            llm_gen = LLMIntentDataGenerator(
                model_name=CONFIG['llm_model'],
                samples_per_intent=CONFIG['llm_samples_per_intent']
            )
            llm_dataset = llm_gen.generate_dataset()
            cache_path.parent.mkdir(parents=True, exist_ok=True)
            print(f"Saving LLM dataset to: {cache_path}")
            with cache_path.open("w", encoding="utf-8") as f:
                for s in llm_dataset:
                    f.write(json.dumps(s, ensure_ascii=False) + "\n")
            print("✓ LLM dataset cached\n")
    
    # ---- 3) Merge datasets ----
    dataset = synthetic_dataset + llm_dataset
    print(f"Total base dataset size (synthetic + LLM): {len(dataset)} samples\n")
    
    # ---- 4) Augment ----
    dataset = DataAugmenter.augment_dataset(dataset, CONFIG['augmentation_factor'])
    print(f"Total dataset size after augmentation: {len(dataset)} samples\n")
    
    # ---- 5) Split ----
    print("=" * 60)
    print("SPLITTING DATA")
    print("=" * 60)
    intents = [s['intent'] for s in dataset]
    train_data, temp_data = train_test_split(dataset, test_size=0.3, stratify=intents, random_state=42)
    temp_intents = [s['intent'] for s in temp_data]
    val_data, test_data = train_test_split(temp_data, test_size=0.5, stratify=temp_intents, random_state=42)
    
    print(f"Train: {len(train_data)}")
    print(f"Val:   {len(val_data)}")
    print(f"Test:  {len(test_data)}\n")
    
    # ---- 6) Tokenizer & Datasets ----
    print("=" * 60)
    print("LOADING MODEL & TOKENIZER")
    print("=" * 60)
    tokenizer = AutoTokenizer.from_pretrained(CONFIG['base_model'])
    
    train_dataset = BondQueryDataset(train_data, tokenizer, CONFIG['max_length'])
    val_dataset = BondQueryDataset(val_data, tokenizer, CONFIG['max_length'])
    test_dataset = BondQueryDataset(test_data, tokenizer, CONFIG['max_length'])
    
    train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'])
    
    # ---- 7) Model ----
    model = ProductionBondClassifier(CONFIG['base_model'])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    print(f"✓ Model loaded: {sum(p.numel() for p in model.parameters()):,} parameters")
    print(f"✓ Device: {device}\n")
    
    # ---- 8) Optimizer & Scheduler ----
    optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['learning_rate'], weight_decay=0.01)
    num_training_steps = len(train_loader) * CONFIG['num_epochs']
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(num_training_steps * CONFIG['warmup_ratio']),
        num_training_steps=num_training_steps
    )
    
    criterion = MultiTaskLoss()
    
    # ---- 9) Train ----
    trainer = Trainer(model, train_loader, val_loader, optimizer, scheduler, 
                     criterion, device, CONFIG['output_dir'])
    trainer.train(CONFIG['num_epochs'])
    
    # ---- 10) Save tokenizer ----
    tokenizer.save_pretrained(CONFIG['output_dir'])
    
    # ---- 11) Final test ----
    print("=" * 60)
    print("FINAL TEST")
    print("=" * 60)
    model.eval()
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc='Testing'):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            outputs = model(input_ids, attention_mask)
            preds = outputs['intent_logits'].argmax(dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['intent_label'].numpy())
    
    test_acc = accuracy_score(all_labels, all_preds)
    test_f1 = f1_score(all_labels, all_preds, average='macro')
    
    print(f"\n✓ Test Accuracy: {test_acc:.4f}")
    print(f"✓ Test F1 Macro: {test_f1:.4f}\n")
    
    print("=" * 60)
    print("TRAINING COMPLETE!")
    print("=" * 60)
    print(f"\n✓ Model saved to: {CONFIG['output_dir']}")
    print(f"✓ Files: pytorch_model.bin, tokenizer files")
    print("\nTo download:")
    print("  1. Go to Output tab")
    print("  2. Download 'bond_classifier_v3' folder")
    print("  3. Use locally with your inference notebook")


# ==================== RUN ====================

if __name__ == '__main__':
    train_model()

GENERATING SYNTHETIC DATA


Generating:   0%|          | 0/13 [00:00<?, ?it/s]

✓ Generated 10400 samples

LOADING CACHED LLM DATA
✓ Loaded 2600 LLM samples from cache

Total base dataset size (synthetic + LLM): 13000 samples

AUGMENTING DATA


Augmenting:   0%|          | 0/6500 [00:00<?, ?it/s]

✓ Added 6500 augmented samples

Total dataset size after augmentation: 19500 samples

SPLITTING DATA
Train: 13650
Val:   2925
Test:  2925

LOADING MODEL & TOKENIZER




✓ Model loaded: 142,205,987 parameters
✓ Device: cuda

TRAINING


Epoch 1:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 1/10 - Train Loss: 2.1460


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.5034
Val Accuracy: 0.9908
Val F1: 0.9907

✓ New best model saved (acc: 0.9908)


Epoch 2:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 2/10 - Train Loss: 0.4018


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.2853
Val Accuracy: 0.9952
Val F1: 0.9952

✓ New best model saved (acc: 0.9952)


Epoch 3:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 3/10 - Train Loss: 0.2671


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.2153
Val Accuracy: 0.9966
Val F1: 0.9966

✓ New best model saved (acc: 0.9966)


Epoch 4:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 4/10 - Train Loss: 0.2113


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.1759
Val Accuracy: 0.9983
Val F1: 0.9983

✓ New best model saved (acc: 0.9983)


Epoch 5:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 5/10 - Train Loss: 0.1747


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.1531
Val Accuracy: 0.9983
Val F1: 0.9983



Epoch 6:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 6/10 - Train Loss: 0.1546


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.1390
Val Accuracy: 0.9979
Val F1: 0.9979



Epoch 7:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 7/10 - Train Loss: 0.1409


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.1315
Val Accuracy: 0.9983
Val F1: 0.9983



Epoch 8:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 8/10 - Train Loss: 0.1332


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.1276
Val Accuracy: 0.9986
Val F1: 0.9986

✓ New best model saved (acc: 0.9986)


Epoch 9:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 9/10 - Train Loss: 0.1293


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.1256
Val Accuracy: 0.9986
Val F1: 0.9986



Epoch 10:   0%|          | 0/427 [00:00<?, ?it/s]


Epoch 10/10 - Train Loss: 0.1272


Evaluating:   0%|          | 0/92 [00:00<?, ?it/s]

Val Loss: 0.1255
Val Accuracy: 0.9986
Val F1: 0.9986

Training complete! Best accuracy: 0.9986

FINAL TEST


Testing:   0%|          | 0/92 [00:00<?, ?it/s]


✓ Test Accuracy: 0.9979
✓ Test F1 Macro: 0.9979

TRAINING COMPLETE!

✓ Model saved to: /kaggle/working/bond_classifier_v3
✓ Files: pytorch_model.bin, tokenizer files

To download:
  1. Go to Output tab
  2. Download 'bond_classifier_v3' folder
  3. Use locally with your inference notebook


In [5]:
# ==================== EXPORT DATASET FOR DOWNLOAD ====================
from pathlib import Path
import json
from sklearn.model_selection import train_test_split

print("=" * 60)
print("REBUILDING & SAVING DATASET (SYNTHETIC + GEMINI)")
print("=" * 60)

# 1) Synthetic data (same as training)
synth_gen = SyntheticDataGenerator(CONFIG['num_samples_per_intent'])
synthetic_dataset = synth_gen.generate_dataset()

# 2) LLM (Gemini) data – prefer cached file to avoid extra API calls
llm_dataset: List[Dict] = []
if CONFIG.get("use_llm_data", False):
    cache_path = Path(CONFIG["llm_dataset_cache"])
    if cache_path.exists():
        print("Loading LLM data from cache:", cache_path)
        with cache_path.open("r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    llm_dataset.append(json.loads(line))
                except json.JSONDecodeError:
                    continue
        print(f"✓ Loaded {len(llm_dataset)} LLM samples from cache\n")
    else:
        print("⚠ LLM cache not found, generating a fresh LLM dataset...")
        llm_gen = LLMIntentDataGenerator(
            model_name=CONFIG["llm_model"],
            samples_per_intent=CONFIG["llm_samples_per_intent"],
        )
        llm_dataset = llm_gen.generate_dataset()
        cache_path.parent.mkdir(parents=True, exist_ok=True)
        with cache_path.open("w", encoding="utf-8") as f:
            for s in llm_dataset:
                f.write(json.dumps(s, ensure_ascii=False) + "\n")
        print(f"✓ LLM dataset generated & cached to {cache_path}\n")

# 3) Merge datasets
dataset = synthetic_dataset + llm_dataset
print(f"Base dataset size (synthetic + LLM): {len(dataset)}")

# 4) Augment (same factor as training)
dataset = DataAugmenter.augment_dataset(dataset, CONFIG["augmentation_factor"])
print(f"Dataset size after augmentation: {len(dataset)}\n")

# 5) Train/val/test split (same logic as training)
intents = [s["intent"] for s in dataset]
train_data, temp_data = train_test_split(
    dataset, test_size=0.3, stratify=intents, random_state=42
)
temp_intents = [s["intent"] for s in temp_data]
val_data, test_data = train_test_split(
    temp_data, test_size=0.5, stratify=temp_intents, random_state=42
)

print(f"Train: {len(train_data)}")
print(f"Val:   {len(val_data)}")
print(f"Test:  {len(test_data)}\n")

# 6) Save to /kaggle/working so you can download from Output tab
out_dir = Path("/kaggle/working/bond_intent_data")
out_dir.mkdir(parents=True, exist_ok=True)

def save_jsonl(path: Path, rows):
    with path.open("w", encoding="utf-8") as f:
        for r in rows:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

save_jsonl(out_dir / "train.jsonl", train_data)
save_jsonl(out_dir / "val.jsonl", val_data)
save_jsonl(out_dir / "test.jsonl", test_data)

print("✓ Saved JSONL datasets to:")
print(f"  {out_dir / 'train.jsonl'}")
print(f"  {out_dir / 'val.jsonl'}")
print(f"  {out_dir / 'test.jsonl'}")
print("\nTo download:")
print("  Go to the 'Output' tab in Kaggle and download the 'bond_intent_data' folder.")

REBUILDING & SAVING DATASET (SYNTHETIC + GEMINI)
GENERATING SYNTHETIC DATA


Generating synthetic:   0%|          | 0/13 [00:00<?, ?it/s]

✓ Generated 10400 synthetic samples

Loading LLM data from cache: /kaggle/working/llm_intent_dataset.jsonl
✓ Loaded 2600 LLM samples from cache

Base dataset size (synthetic + LLM): 13000
AUGMENTING DATA


Augmenting:   0%|          | 0/6500 [00:00<?, ?it/s]

✓ Added 6500 augmented samples

Dataset size after augmentation: 19500

Train: 13650
Val:   2925
Test:  2925

✓ Saved JSONL datasets to:
  /kaggle/working/bond_intent_data/train.jsonl
  /kaggle/working/bond_intent_data/val.jsonl
  /kaggle/working/bond_intent_data/test.jsonl

To download:
  Go to the 'Output' tab in Kaggle and download the 'bond_intent_data' folder.


In [13]:
"""
Bond Query Classifier - Kaggle Inference Notebook (DeBERTa + Gemini data)
=========================================================================

Setup:
1. Upload / attach the training output as a Kaggle dataset
   (it should contain the folder `bond_classifier_v3` with pytorch_model.bin + tokenizer files)
2. Update MODEL_PATH below to point to that folder
3. Run this notebook to test the classifier
"""

# ==================== INSTALL & IMPORTS ====================
import sys
import subprocess

print("Installing dependencies...")
subprocess.check_call([
    sys.executable, "-m", "pip", "install", "-q",
    "transformers", "accelerate", "scikit-learn"
])

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from enum import Enum
import numpy as np

print("✓ Imports complete!\n")


# ==================== MODEL ARCHITECTURE ====================

class ProductionBondClassifier(nn.Module):
    """Same architecture as training: DeBERTa-v3-small + multi-task heads."""
    
    def __init__(self, base_model: str = 'microsoft/deberta-v3-small', dropout: float = 0.15):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(base_model)
        hidden_size = self.bert.config.hidden_size
        
        self.feature_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout),
            nn.GELU()
        )
        
        feature_size = hidden_size // 2
        
        # Heads: intent + sectors + rating + duration + constraints
        self.intent_head = nn.Linear(feature_size, 13)
        self.sector_head = nn.Linear(feature_size, 7)
        self.rating_head = nn.Linear(feature_size, 7)
        self.duration_head = nn.Linear(feature_size, 3)
        self.constraint_head = nn.Linear(feature_size, 5)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        features = self.feature_layer(cls_output)
        features = self.dropout(features)
        
        return {
            'intent_logits': self.intent_head(features),
            'sector_logits': self.sector_head(features),
            'rating_logits': self.rating_head(features),
            'duration_logits': self.duration_head(features),
            'constraint_logits': self.constraint_head(features),
        }


# ==================== INFERENCE CLASS ====================

class QueryIntent(str, Enum):
    BUY_RECOMMENDATION = "buy_recommendation"
    SELL_RECOMMENDATION = "sell_recommendation"
    PORTFOLIO_ANALYSIS = "portfolio_analysis"
    REDUCE_DURATION = "reduce_duration"
    INCREASE_YIELD = "increase_yield"
    HEDGE_VOLATILITY = "hedge_volatility"
    SECTOR_REBALANCE = "sector_rebalance"
    BARBELL_STRATEGY = "barbell_strategy"
    SWITCH_BONDS = "switch_bonds"
    EXPLAIN_RECOMMENDATION = "explain_recommendation"
    MARKET_OUTLOOK = "market_outlook"
    CREDIT_ANALYSIS = "credit_analysis"
    FORECAST_PRICES = "forecast_prices"


@dataclass
class ClassificationResult:
    intent: str
    confidence: float
    filters: Dict[str, Any]
    constraints: Dict[str, bool]


class BondClassifier:
    """Production classifier wrapper with MC-Dropout + filter extraction."""
    
    def __init__(self, model_path: str, base_model: str = "microsoft/deberta-v3-small"):
        """
        Load model from Kaggle path.
        
        Args:
            model_path: path to folder containing pytorch_model.bin + tokenizer files,
                        e.g. '/kaggle/working/bond_classifier_v3'
        """
        print(f"Loading model from: {model_path}")
        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")
        
        # Load tokenizer saved during training
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Load model with same architecture as training
        self.model = ProductionBondClassifier(base_model=base_model)
        state_dict = torch.load(f"{model_path}/pytorch_model.bin", map_location=self.device)
        self.model.load_state_dict(state_dict)
        self.model.to(self.device)
        self.model.eval()
        
        # Label mappings
        self.intent_names = [
            'buy_recommendation', 'sell_recommendation', 'portfolio_analysis',
            'reduce_duration', 'increase_yield', 'hedge_volatility',
            'sector_rebalance', 'barbell_strategy', 'switch_bonds',
            'explain_recommendation', 'market_outlook', 'credit_analysis',
            'forecast_prices'
        ]
        
        self.sector_names = [
            'Sovereign', 'PSU Energy', 'Financial', 'Corporate', 
            'Infrastructure', 'NBFC', 'Banking'
        ]
        # 7 rating classes in training (0..6)
        self.rating_names = ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB', 'Unrated']
        self.duration_names = ['short', 'medium', 'long']
        
        print("✓ Model loaded successfully!\n")
    
    def classify(self, query: str, num_samples: int = 5) -> ClassificationResult:
        """
        Classify query with MC-Dropout uncertainty.
        
        Args:
            query: User query string
            num_samples: Number of stochastic forward passes
        """
        # Tokenize
        enc = self.tokenizer(
            query,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=128
        )
        input_ids = enc["input_ids"].to(self.device)
        attention_mask = enc["attention_mask"].to(self.device)
        
        # --- MC Dropout on intent head ---
        self.model.train()  # enable dropout
        intent_predictions = []
        
        with torch.no_grad():
            for _ in range(num_samples):
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                intent_probs = F.softmax(outputs['intent_logits'], dim=-1)
                intent_predictions.append(intent_probs.cpu())
        
        # Aggregate
        intent_mean = torch.stack(intent_predictions).mean(dim=0)[0]
        confidence = intent_mean.max().item()
        predicted_idx = intent_mean.argmax().item()
        
        # Final pass (deterministic) for other heads
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            sectors = self._extract_sectors(outputs['sector_logits'])
            rating = self._extract_rating(outputs['rating_logits'])
            duration = self._extract_duration(outputs['duration_logits'])
            constraints = self._extract_constraints(outputs['constraint_logits'])
        
        return ClassificationResult(
            intent=self.intent_names[predicted_idx],
            confidence=confidence,
            filters={
                'sectors': sectors,
                'min_rating': rating,
                'duration_preference': duration
            },
            constraints=constraints
        )
    
    def _extract_sectors(self, logits: torch.Tensor) -> List[str]:
        """Multi-label sector head with simple sigmoid threshold."""
        probs = torch.sigmoid(logits[0])
        # Slightly relaxed threshold to account for noisy training
        indices = (probs > 0.4).nonzero(as_tuple=True)[0]
        return [self.sector_names[i] for i in indices]
    
    def _extract_rating(self, logits: torch.Tensor) -> Optional[str]:
        """Map argmax of 7-way rating head to rating string."""
        idx = logits[0].argmax().item()
        if 0 <= idx < len(self.rating_names):
            return self.rating_names[idx]
        return None
    
    def _extract_duration(self, logits: torch.Tensor) -> str:
        """Map argmax of 3-way duration head to short/medium/long."""
        idx = logits[0].argmax().item()
        if 0 <= idx < len(self.duration_names):
            return self.duration_names[idx]
        return "medium"
    
    def _extract_constraints(self, logits: torch.Tensor) -> Dict[str, bool]:
        """Binary constraint flags from 5-way sigmoid head."""
        probs = torch.sigmoid(logits[0])
        return {
            'preserve_yield': probs[0].item() > 0.5,
            'maintain_liquidity': probs[1].item() > 0.5,
            'avoid_downgrades': probs[2].item() > 0.5,
            'sector_diversity': probs[3].item() > 0.5,
            'rating_above_aa': probs[4].item() > 0.5
        }
    
    def batch_classify(self, queries: List[str]) -> List[ClassificationResult]:
        """Classify multiple queries."""
        return [self.classify(q) for q in queries]


# ==================== USAGE EXAMPLE ====================

def test_classifier():
    """Test the classifier with sample queries."""
    
    # If running in the SAME notebook right after training:
    MODEL_PATH = '/kaggle/working/bond_classifier_v3'
    # If as a separate notebook, this might be e.g.:
    # MODEL_PATH = '/kaggle/input/bond-classifier-v3/bond_classifier_v3'
    
    print("=" * 60)
    print("LOADING CLASSIFIER")
    print("=" * 60)
    
    try:
        classifier = BondClassifier(MODEL_PATH, base_model="microsoft/deberta-v3-small")
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        print("\nMake sure to:")
        print("1. Attach the training output as a dataset (if in a new notebook)")
        print("2. Update MODEL_PATH above")
        return
    
    print("=" * 60)
    print("TESTING QUERIES")
    print("=" * 60)
    
    test_queries = [
        # --- Category 1: Ambiguous Buy/Sell/Strategy ---
        "Should I shift my SDL holdings into shorter PSU bonds or just hold?",
        "I want to reduce rate risk but I don’t want my yield to fall. What should I do?",
        "Is it better to exit long-duration bonds and move into AA corporates?",
        "Would switching from NTPC 2035 to REC 2030 improve my return?",
        "Which reduces risk more: selling perpetuals or adding short-term G-Secs?",
    
        # --- Category 2: Disguised intents ---
        "My yield looks weak lately… what should I change?",
        "These long papers are stressing me out—what’s the safest move?",
        "My portfolio feels too boring. Suggest something with more kick.",
        "The curve is flattening; should I reposition?",
        "My advisor said I’m too exposed. What adjustments should I consider?",
    
        # --- Category 3: Multi-intent queries ---
        "Recommend high-yield PSU bonds and also check if any of my holdings should be sold.",
        "Reduce my duration and give alternatives with at least 7.5% yield.",
        "Analyze my portfolio and tell me which low-yield bonds I should switch.",
        "Before suggesting buys, what’s your outlook on corporate spreads?",
        "If I shift to shorter bonds, how will my yield be impacted?",
    
        # --- Category 4: Tricky phrasing for buy intent ---
        "Is NTPC 2033 attractive at current spreads?",
        "Are AA PSU bonds offering a good entry point?",
        "Is this a good time to accumulate SDLs?",
        "Should I start adding exposure to long maturities?",
        "Are there better alternatives to ICICI 2029 with similar risk?",
    
        # --- Category 5: Risk management intents ---
        "The market seems jumpy; how do I protect my portfolio?",
        "If RBI hikes unexpectedly, which holdings will get hit the most?",
        "Rate volatility worries me—how do I reduce the impact?",
        "How can I stabilize P&L swings in my portfolio?",
        "Should I rebalance sectors before the credit cycle weakens?",
    
        # --- Category 6: Very short hard queries ---
        "Duration too high?",
        "Better yield ideas?",
        "Switch or hold?",
        "Cut risk?",
        "Add PSU?",
    
        # --- Category 7: Contradictory constraints ---
        "Cut duration but don’t let yield drop below 7.6%.",
        "I want high yield but without taking credit risk.",
        "Reduce risk but avoid selling anything.",
        "Switch out of low-yield bonds but keep duration same.",
        "Increase return without increasing duration or credit risk.",
    
        # --- Category 8: Multi-sentence queries ---
        "My duration increased after the last purchases. I’m worried about hikes. Suggest adjustments that don’t hurt yield.",
        "My portfolio is mostly PSU and financials. Seems concentrated. Should I diversify into private corporates?",
        "I sold some long-term bonds last month. Now thinking of adding 3–5 year AA corporates. Any ideas?",
        "I expect inflation to cool. Should I increase duration a bit?",
        "Markets feel stable. Should I rotate sectors or focus on yield first?",
    
        # --- Category 9: Credit-analysis queries ---
        "How is the credit quality of PFC right now?",
        "Is REC fundamentally strong enough for long-term holding?",
        "What’s the default risk on NTPC?",
        "Should I worry about credit spreads widening?",
        "Are AA- names safe in this environment?",
    
        # --- Category 10: Forecast / outlook queries ---
        "Where do you see G-Sec yields in six months?",
        "If rates fall by 50 bps, what happens to long-duration PSU bonds?",
        "Will corporate spreads tighten this year?",
        "Predict the movement of the 10-year benchmark.",
        "How will a Fed cut affect Indian bond yields?",
    ]

    for query in test_queries:
        result = classifier.classify(query)
        
        print(f"\n{'='*60}")
        print(f"Query: {query}")
        print(f"{'='*60}")
        print(f"Intent: {result.intent}")
        print(f"Confidence: {result.confidence:.3f}")
        # print(f"Sectors: {result.filters['sectors']}")
        # print(f"Rating: {result.filters['min_rating']}")
        # print(f"Duration: {result.filters['duration_preference']}")
        # print(f"Constraints: {result.constraints}")
    
    print("\n" + "=" * 60)
    print("TESTING COMPLETE!")
    print("=" * 60)


# ==================== RUN ====================

if __name__ == "__main__":
    print("\n" + "=" * 60)
    print("BOND QUERY CLASSIFIER - INFERENCE")
    print("=" * 60 + "\n")
    
    # Option 1: Automated test with curated queries
    test_classifier()
    
    # Option 2: Interactive mode (uncomment to use)
    # interactive_test()
    
    # Option 3: Batch testing (uncomment to use)
    # batch_test_from_list()

Installing dependencies...
✓ Imports complete!


BOND QUERY CLASSIFIER - INFERENCE

LOADING CLASSIFIER
Loading model from: /kaggle/working/bond_classifier_v3
Using device: cuda
✓ Model loaded successfully!

TESTING QUERIES

Query: Should I shift my SDL holdings into shorter PSU bonds or just hold?
Intent: switch_bonds
Confidence: 0.548

Query: I want to reduce rate risk but I don’t want my yield to fall. What should I do?
Intent: reduce_duration
Confidence: 0.586

Query: Is it better to exit long-duration bonds and move into AA corporates?
Intent: switch_bonds
Confidence: 0.309

Query: Would switching from NTPC 2035 to REC 2030 improve my return?
Intent: switch_bonds
Confidence: 0.574

Query: Which reduces risk more: selling perpetuals or adding short-term G-Secs?
Intent: reduce_duration
Confidence: 0.258

Query: My yield looks weak lately… what should I change?
Intent: increase_yield
Confidence: 0.342

Query: These long papers are stressing me out—what’s the safest move?
Intent: reduc

In [4]:
# ==================== EXPORT SYNTHETIC DATA ONLY ====================
from pathlib import Path
from sklearn.model_selection import train_test_split
import json

print("=" * 60)
print("GENERATING & SAVING SYNTHETIC DATA ONLY")
print("=" * 60)

# 1. Generate base synthetic dataset
generator = SyntheticDataGenerator(CONFIG['num_samples_per_intent'])
dataset = generator.generate_dataset()

# 2. Augment (optional – same as training script)
dataset = DataAugmenter.augment_dataset(dataset, CONFIG['augmentation_factor'])
print(f"Total dataset size after augmentation: {len(dataset)} samples\n")

# 3. Train/val/test split (same logic as train_model)
intents = [s['intent'] for s in dataset]
train_data, temp_data = train_test_split(
    dataset, test_size=0.3, stratify=intents, random_state=42
)
temp_intents = [s['intent'] for s in temp_data]
val_data, test_data = train_test_split(
    temp_data, test_size=0.5, stratify=temp_intents, random_state=42
)

print(f"Train: {len(train_data)}")
print(f"Val:   {len(val_data)}")
print(f"Test:  {len(test_data)}\n")

# 4. Save to /kaggle/working as JSONL
out_dir = Path("/kaggle/working/bond_query_data")
out_dir.mkdir(parents=True, exist_ok=True)

def save_jsonl(path: Path, rows):
    with path.open("w", encoding="utf-8") as f:
        for r in rows:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

save_jsonl(out_dir / "train.jsonl", train_data)
save_jsonl(out_dir / "val.jsonl", val_data)
save_jsonl(out_dir / "test.jsonl", test_data)

print("✓ Saved datasets to:")
print(f"  {out_dir / 'train.jsonl'}")
print(f"  {out_dir / 'val.jsonl'}")
print(f"  {out_dir / 'test.jsonl'}")
print("\nTo download:")
print("  Go to the 'Output' tab in Kaggle → download the 'bond_query_data' folder.")

GENERATING & SAVING SYNTHETIC DATA ONLY
GENERATING SYNTHETIC DATA


Generating:   0%|          | 0/13 [00:00<?, ?it/s]

✓ Generated 13000 samples

AUGMENTING DATA


Augmenting:   0%|          | 0/6500 [00:00<?, ?it/s]

✓ Added 6500 augmented samples

Total dataset size after augmentation: 19500 samples

Train: 13650
Val:   2925
Test:  2925

✓ Saved datasets to:
  /kaggle/working/bond_query_data/train.jsonl
  /kaggle/working/bond_query_data/val.jsonl
  /kaggle/working/bond_query_data/test.jsonl

To download:
  Go to the 'Output' tab in Kaggle → download the 'bond_query_data' folder.


# Improving model through additional training

In [14]:
# ==================== HARD-CASE DATASET (manually curated) ====================

HARD_CASES = [
    # ---- Cluster A: reduce_duration vs increase_yield (duration is primary) ----
    {
        "text": "Reduce my portfolio duration but keep my overall yield roughly the same.",
        "intent": "reduce_duration",
        "sectors": [],
        "rating": None,
        "duration": "short",
        "constraints": {"preserve_yield": True, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Shorten duration across my holdings without dropping below 7.5% portfolio yield.",
        "intent": "reduce_duration",
        "sectors": [],
        "rating": None,
        "duration": "short",
        "constraints": {"preserve_yield": True, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Cut my exposure to long bonds and move into shorter ones, but I still want a decent yield.",
        "intent": "reduce_duration",
        "sectors": [],
        "rating": None,
        "duration": "short",
        "constraints": {"preserve_yield": True, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Help me move out of 10-year papers into 3-year ones while keeping yield close to current levels.",
        "intent": "reduce_duration",
        "sectors": [],
        "rating": None,
        "duration": "short",
        "constraints": {"preserve_yield": True, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "I’m okay with slightly lower yield if it really reduces my duration risk.",
        "intent": "reduce_duration",
        "sectors": [],
        "rating": None,
        "duration": "short",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },

    # Opposite: yield is primary, duration as constraint
    {
        "text": "Increase my portfolio yield but don’t extend duration beyond 5 years.",
        "intent": "increase_yield",
        "sectors": [],
        "rating": None,
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Find higher-yield bonds with similar duration and credit risk to my current holdings.",
        "intent": "increase_yield",
        "sectors": [],
        "rating": None,
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": True, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Boost my yield by switching into slightly lower-rated bonds but keep duration unchanged.",
        "intent": "increase_yield",
        "sectors": [],
        "rating": "A",
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },

    # ---- Cluster B: direction of duration change (increase vs reduce) ----
    {
        "text": "I think rates will fall, so I want to increase the duration of my bond portfolio.",
        "intent": "buy_recommendation",  # or a separate "increase_duration" if you had it
        "sectors": [],
        "rating": None,
        "duration": "long",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Add some longer-maturity bonds to take advantage of a potential rate cut.",
        "intent": "buy_recommendation",
        "sectors": [],
        "rating": None,
        "duration": "long",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "I expect inflation to cool, so please extend my duration a bit.",
        "intent": "buy_recommendation",
        "sectors": [],
        "rating": None,
        "duration": "long",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Raise the average maturity of my holdings while keeping credit quality investment grade.",
        "intent": "buy_recommendation",
        "sectors": [],
        "rating": "A",
        "duration": "long",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": True, "sector_diversity": False,
                        "rating_above_aa": False},
    },

    # Reduce duration (clear negative direction)
    {
        "text": "Rates might spike, so I want to cut my duration sharply.",
        "intent": "reduce_duration",
        "sectors": [],
        "rating": None,
        "duration": "short",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Move me from long-duration bonds into short ones ahead of potential RBI hikes.",
        "intent": "reduce_duration",
        "sectors": [],
        "rating": None,
        "duration": "short",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },

    # ---- Cluster C: reduce risk but avoid selling (no sell_recommendation) ----
    {
        "text": "Reduce my risk but avoid selling anything; I just want to hedge my portfolio.",
        "intent": "hedge_volatility",
        "sectors": [],
        "rating": None,
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": True,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "I don’t want to liquidate any bonds, just soften the impact of rate moves.",
        "intent": "hedge_volatility",
        "sectors": [],
        "rating": None,
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": True,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Keep my existing holdings but add positions that offset interest-rate risk.",
        "intent": "hedge_volatility",
        "sectors": [],
        "rating": None,
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Protect my portfolio from volatility using hedges rather than selling bonds.",
        "intent": "hedge_volatility",
        "sectors": [],
        "rating": None,
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": True,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },

    # ---- Cluster D: credit safety vs buy (credit_analysis vs buy_recommendation) ----
    {
        "text": "Are AA- PSU bonds fundamentally safe right now from a credit perspective?",
        "intent": "credit_analysis",
        "sectors": ["PSU Energy"],
        "rating": "AA",
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": True, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Is it still safe to hold AA- names if the credit cycle weakens?",
        "intent": "credit_analysis",
        "sectors": [],
        "rating": "AA",
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": True, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "How risky are BBB rated bonds compared to AA in the current environment?",
        "intent": "credit_analysis",
        "sectors": [],
        "rating": "BBB",
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": True, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    {
        "text": "Given recent downgrades, is it prudent to add more exposure to AA- corporate bonds?",
        "intent": "credit_analysis",
        "sectors": ["Corporate"],
        "rating": "AA",
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": True, "sector_diversity": False,
                        "rating_above_aa": False},
    },
    # Contrast: explicit buy intent
    {
        "text": "Suggest specific AA- bonds to buy if you think they are still fundamentally safe.",
        "intent": "buy_recommendation",
        "sectors": [],
        "rating": "AA",
        "duration": "medium",
        "constraints": {"preserve_yield": False, "maintain_liquidity": False,
                        "avoid_downgrades": False, "sector_diversity": False,
                        "rating_above_aa": False},
    },
]


In [15]:
# ==================== CONTINUATION TRAINING ON HARD CASES ====================

def continue_training_on_hard_cases(
    model_path: str = "/kaggle/working/bond_classifier_v3",
    base_model: str = "microsoft/deberta-v3-small",
    epochs: int = 3,
    lr: float = 5e-6,
    batch_size: int = 8,
):
    """
    Continue training the already trained model on the HARD_CASES dataset only.
    Does NOT rebuild synthetic/LLM data. Small LR, a few epochs.
    """

    print("=" * 60)
    print("CONTINUATION TRAINING ON HARD CASES")
    print("=" * 60)
    print(f"Loading tokenizer & model from: {model_path}")

    # 1) Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # 2) Build small dataset from HARD_CASES
    hard_dataset = BondQueryDataset(HARD_CASES, tokenizer, CONFIG["max_length"])
    hard_loader = DataLoader(hard_dataset, batch_size=batch_size, shuffle=True)

    # 3) Load existing model checkpoint
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ProductionBondClassifier(base_model)  # will use DeBERTa-v3-small
    state_dict = torch.load(os.path.join(model_path, "pytorch_model.bin"), map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)

    print(f"✓ Loaded model for continuation training on {len(hard_dataset)} hard examples")
    print(f"Using device: {device}")

    # 4) Optimizer & loss (small LR)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.0)
    criterion = MultiTaskLoss()

    # 5) Simple training loop (no scheduler/val, dataset is tiny)
    model.train()
    for epoch in range(1, epochs + 1):
        print(f"\n--- Hard-case Epoch {epoch}/{epochs} ---")
        epoch_loss = 0.0
        all_preds, all_labels = [], []

        pbar = tqdm(hard_loader, desc=f"Hard-case Epoch {epoch}")
        for batch in pbar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = {k: v.to(device) for k, v in batch.items()
                      if k not in ["input_ids", "attention_mask"]}

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss_dict = criterion(outputs, labels)
            loss = loss_dict["total"]

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            epoch_loss += loss.item()

            preds = outputs["intent_logits"].argmax(dim=-1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels["intent_label"].cpu().numpy())

            pbar.set_postfix({"loss": f"{loss.item():.4f}"})

        avg_loss = epoch_loss / len(hard_loader)
        acc = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds, average="macro")
        print(f"Hard-case Epoch {epoch} - Loss: {avg_loss:.4f} | Acc: {acc:.3f} | F1: {f1:.3f}")

    # 6) Save updated weights
    out_path = os.path.join(model_path, "pytorch_model_hard.bin")
    torch.save(model.state_dict(), out_path)
    print(f"\n✓ Hard-case fine-tuned model saved to: {out_path}")
    print("You can point your inference notebook to this file if desired.")


In [17]:
continue_training_on_hard_cases(
    model_path="/kaggle/working/bond_classifier_v3",
    base_model="microsoft/deberta-v3-small",
    epochs=10,
    lr=5e-6,
    batch_size=8,
)

CONTINUATION TRAINING ON HARD CASES
Loading tokenizer & model from: /kaggle/working/bond_classifier_v3
✓ Loaded model for continuation training on 23 hard examples
Using device: cuda

--- Hard-case Epoch 1/10 ---


Hard-case Epoch 1:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 1 - Loss: 1.5951 | Acc: 0.696 | F1: 0.551

--- Hard-case Epoch 2/10 ---


Hard-case Epoch 2:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 2 - Loss: 1.5565 | Acc: 0.565 | F1: 0.472

--- Hard-case Epoch 3/10 ---


Hard-case Epoch 3:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 3 - Loss: 1.3535 | Acc: 0.696 | F1: 0.457

--- Hard-case Epoch 4/10 ---


Hard-case Epoch 4:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 4 - Loss: 1.1646 | Acc: 0.826 | F1: 0.615

--- Hard-case Epoch 5/10 ---


Hard-case Epoch 5:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 5 - Loss: 0.9765 | Acc: 0.739 | F1: 0.641

--- Hard-case Epoch 6/10 ---


Hard-case Epoch 6:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 6 - Loss: 0.7686 | Acc: 0.870 | F1: 0.870

--- Hard-case Epoch 7/10 ---


Hard-case Epoch 7:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 7 - Loss: 0.5777 | Acc: 0.870 | F1: 0.880

--- Hard-case Epoch 8/10 ---


Hard-case Epoch 8:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 8 - Loss: 0.5945 | Acc: 0.870 | F1: 0.758

--- Hard-case Epoch 9/10 ---


Hard-case Epoch 9:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 9 - Loss: 0.4301 | Acc: 0.957 | F1: 0.953

--- Hard-case Epoch 10/10 ---


Hard-case Epoch 10:   0%|          | 0/3 [00:00<?, ?it/s]

Hard-case Epoch 10 - Loss: 0.3909 | Acc: 1.000 | F1: 1.000

✓ Hard-case fine-tuned model saved to: /kaggle/working/bond_classifier_v3/pytorch_model_hard.bin
You can point your inference notebook to this file if desired.


In [18]:
"""
Bond Query Classifier - Hard-Finetuned Model Evaluation Notebook
================================================================

Setup:
1. In the training notebook, run `continue_training_on_hard_cases(...)`
   so that it saves: /kaggle/working/bond_classifier_v3/pytorch_model_hard.bin
2. In this NEW Kaggle notebook:
   - Attach the training output as a dataset (if needed), OR
   - Run in the same environment where /kaggle/working/bond_classifier_v3 exists.
3. Update MODEL_PATH below if necessary.
4. Run!
"""

# ==================== INSTALL & IMPORTS ====================
import sys
import subprocess

print("Installing dependencies...")
subprocess.check_call([
    sys.executable, "-m", "pip", "install", "-q",
    "transformers", "accelerate", "scikit-learn"
])

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from enum import Enum
import numpy as np
import os

print("✓ Imports complete!\n")


# ==================== MODEL ARCHITECTURE ====================

class ProductionBondClassifier(nn.Module):
    """Same architecture as training: DeBERTa-v3-small + multi-task heads."""
    
    def __init__(self, base_model: str = 'microsoft/deberta-v3-small', dropout: float = 0.15):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(base_model)
        hidden_size = self.bert.config.hidden_size
        
        self.feature_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout),
            nn.GELU()
        )
        
        feature_size = hidden_size // 2
        
        # Heads: intent + sectors + rating + duration + constraints
        self.intent_head = nn.Linear(feature_size, 13)
        self.sector_head = nn.Linear(feature_size, 7)
        self.rating_head = nn.Linear(feature_size, 7)
        self.duration_head = nn.Linear(feature_size, 3)
        self.constraint_head = nn.Linear(feature_size, 5)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        features = self.feature_layer(cls_output)
        features = self.dropout(features)
        
        return {
            'intent_logits': self.intent_head(features),
            'sector_logits': self.sector_head(features),
            'rating_logits': self.rating_head(features),
            'duration_logits': self.duration_head(features),
            'constraint_logits': self.constraint_head(features),
        }


# ==================== ENUMS & RESULT TYPE ====================

class QueryIntent(str, Enum):
    BUY_RECOMMENDATION = "buy_recommendation"
    SELL_RECOMMENDATION = "sell_recommendation"
    PORTFOLIO_ANALYSIS = "portfolio_analysis"
    REDUCE_DURATION = "reduce_duration"
    INCREASE_YIELD = "increase_yield"
    HEDGE_VOLATILITY = "hedge_volatility"
    SECTOR_REBALANCE = "sector_rebalance"
    BARBELL_STRATEGY = "barbell_strategy"
    SWITCH_BONDS = "switch_bonds"
    EXPLAIN_RECOMMENDATION = "explain_recommendation"
    MARKET_OUTLOOK = "market_outlook"
    CREDIT_ANALYSIS = "credit_analysis"
    FORECAST_PRICES = "forecast_prices"


@dataclass
class ClassificationResult:
    intent: str
    confidence: float
    filters: Dict[str, Any]
    constraints: Dict[str, bool]


# ==================== CLASSIFIER WRAPPER ====================

class BondClassifier:
    """Production classifier wrapper with MC-Dropout + filter extraction."""
    
    def __init__(
        self,
        model_path: str,
        base_model: str = "microsoft/deberta-v3-small",
        weight_file: str = "pytorch_model_hard.bin"  # <-- use hard-finetuned weights
    ):
        """
        Load model from Kaggle path.
        
        Args:
            model_path: path to folder containing weights + tokenizer files,
                        e.g. '/kaggle/working/bond_classifier_v3'
            base_model: HF base model name used in training
            weight_file: which checkpoint to load
        """
        print(f"Loading model from: {model_path}")
        print(f"Using weights file: {weight_file}")
        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")
        
        # Load tokenizer saved during training
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Load model with same architecture as training
        self.model = ProductionBondClassifier(base_model=base_model)
        weight_path = os.path.join(model_path, weight_file)
        if not os.path.exists(weight_path):
            raise FileNotFoundError(f"Checkpoint not found at {weight_path}")
        
        state_dict = torch.load(weight_path, map_location=self.device)
        self.model.load_state_dict(state_dict)
        self.model.to(self.device)
        self.model.eval()
        
        # Label mappings
        self.intent_names = [
            'buy_recommendation', 'sell_recommendation', 'portfolio_analysis',
            'reduce_duration', 'increase_yield', 'hedge_volatility',
            'sector_rebalance', 'barbell_strategy', 'switch_bonds',
            'explain_recommendation', 'market_outlook', 'credit_analysis',
            'forecast_prices'
        ]
        
        self.sector_names = [
            'Sovereign', 'PSU Energy', 'Financial', 'Corporate', 
            'Infrastructure', 'NBFC', 'Banking'
        ]
        self.rating_names = ['AAA', 'AA+', 'AA', 'A+', 'A', 'BBB', 'Unrated']
        self.duration_names = ['short', 'medium', 'long']
        
        print("✓ Model loaded successfully!\n")
    
    def classify(self, query: str, num_samples: int = 5) -> ClassificationResult:
        """
        Classify query with MC-Dropout uncertainty.
        
        Args:
            query: User query string
            num_samples: Number of stochastic forward passes
        """
        # Tokenize
        enc = self.tokenizer(
            query,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=128
        )
        input_ids = enc["input_ids"].to(self.device)
        attention_mask = enc["attention_mask"].to(self.device)
        
        # --- MC Dropout on intent head ---
        self.model.train()  # enable dropout
        intent_predictions = []
        
        with torch.no_grad():
            for _ in range(num_samples):
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                intent_probs = F.softmax(outputs['intent_logits'], dim=-1)
                intent_predictions.append(intent_probs.cpu())
        
        # Aggregate
        intent_mean = torch.stack(intent_predictions).mean(dim=0)[0]
        confidence = intent_mean.max().item()
        predicted_idx = intent_mean.argmax().item()
        
        # Final pass (deterministic) for other heads
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            sectors = self._extract_sectors(outputs['sector_logits'])
            rating = self._extract_rating(outputs['rating_logits'])
            duration = self._extract_duration(outputs['duration_logits'])
            constraints = self._extract_constraints(outputs['constraint_logits'])
        
        return ClassificationResult(
            intent=self.intent_names[predicted_idx],
            confidence=confidence,
            filters={
                'sectors': sectors,
                'min_rating': rating,
                'duration_preference': duration
            },
            constraints=constraints
        )
    
    def _extract_sectors(self, logits: torch.Tensor) -> List[str]:
        """Multi-label sector head with simple sigmoid threshold."""
        probs = torch.sigmoid(logits[0])
        indices = (probs > 0.4).nonzero(as_tuple=True)[0]
        return [self.sector_names[i] for i in indices]
    
    def _extract_rating(self, logits: torch.Tensor) -> Optional[str]:
        """Map argmax of 7-way rating head to rating string."""
        idx = logits[0].argmax().item()
        if 0 <= idx < len(self.rating_names):
            return self.rating_names[idx]
        return None
    
    def _extract_duration(self, logits: torch.Tensor) -> str:
        """Map argmax of 3-way duration head to short/medium/long."""
        idx = logits[0].argmax().item()
        if 0 <= idx < len(self.duration_names):
            return self.duration_names[idx]
        return "medium"
    
    def _extract_constraints(self, logits: torch.Tensor) -> Dict[str, bool]:
        """Binary constraint flags from 5-way sigmoid head."""
        probs = torch.sigmoid(logits[0])
        return {
            'preserve_yield': probs[0].item() > 0.5,
            'maintain_liquidity': probs[1].item() > 0.5,
            'avoid_downgrades': probs[2].item() > 0.5,
            'sector_diversity': probs[3].item() > 0.5,
            'rating_above_aa': probs[4].item() > 0.5
        }
    
    def batch_classify(self, queries: List[str]) -> List[ClassificationResult]:
        """Classify multiple queries."""
        return [self.classify(q) for q in queries]


# ==================== EVAL SCRIPT ====================

TEST_QUERIES = [
    # --- Category 1: Ambiguous Buy/Sell/Strategy ---
    "Should I shift my SDL holdings into shorter PSU bonds or just hold?",
    "I want to reduce rate risk but I don’t want my yield to fall. What should I do?",
    "Is it better to exit long-duration bonds and move into AA corporates?",
    "Would switching from NTPC 2035 to REC 2030 improve my return?",
    "Which reduces risk more: selling perpetuals or adding short-term G-Secs?",

    # --- Category 2: Disguised intents ---
    "My yield looks weak lately… what should I change?",
    "These long papers are stressing me out—what’s the safest move?",
    "My portfolio feels too boring. Suggest something with more kick.",
    "The curve is flattening; should I reposition?",
    "My advisor said I’m too exposed. What adjustments should I consider?",

    # --- Category 3: Multi-intent queries ---
    "Recommend high-yield PSU bonds and also check if any of my holdings should be sold.",
    "Reduce my duration and give alternatives with at least 7.5% yield.",
    "Analyze my portfolio and tell me which low-yield bonds I should switch.",
    "Before suggesting buys, what’s your outlook on corporate spreads?",
    "If I shift to shorter bonds, how will my yield be impacted?",

    # --- Category 4: Tricky phrasing for buy intent ---
    "Is NTPC 2033 attractive at current spreads?",
    "Are AA PSU bonds offering a good entry point?",
    "Is this a good time to accumulate SDLs?",
    "Should I start adding exposure to long maturities?",
    "Are there better alternatives to ICICI 2029 with similar risk?",

    # --- Category 5: Risk management intents ---
    "The market seems jumpy; how do I protect my portfolio?",
    "If RBI hikes unexpectedly, which holdings will get hit the most?",
    "Rate volatility worries me—how do I reduce the impact?",
    "How can I stabilize P&L swings in my portfolio?",
    "Should I rebalance sectors before the credit cycle weakens?",

    # --- Category 6: Very short hard queries ---
    "Duration too high?",
    "Better yield ideas?",
    "Switch or hold?",
    "Cut risk?",
    "Add PSU?",

    # --- Category 7: Contradictory constraints ---
    "Cut duration but don’t let yield drop below 7.6%.",
    "I want high yield but without taking credit risk.",
    "Reduce risk but avoid selling anything.",
    "Switch out of low-yield bonds but keep duration same.",
    "Increase return without increasing duration or credit risk.",

    # --- Category 8: Multi-sentence queries ---
    "My duration increased after the last purchases. I’m worried about hikes. Suggest adjustments that don’t hurt yield.",
    "My portfolio is mostly PSU and financials. Seems concentrated. Should I diversify into private corporates?",
    "I sold some long-term bonds last month. Now thinking of adding 3–5 year AA corporates. Any ideas?",
    "I expect inflation to cool. Should I increase duration a bit?",
    "Markets feel stable. Should I rotate sectors or focus on yield first?",

    # --- Category 9: Credit-analysis queries ---
    "How is the credit quality of PFC right now?",
    "Is REC fundamentally strong enough for long-term holding?",
    "What’s the default risk on NTPC?",
    "Should I worry about credit spreads widening?",
    "Are AA- names safe in this environment?",

    # --- Category 10: Forecast / outlook queries ---
    "Where do you see G-Sec yields in six months?",
    "If rates fall by 50 bps, what happens to long-duration PSU bonds?",
    "Will corporate spreads tighten this year?",
    "Predict the movement of the 10-year benchmark.",
    "How will a Fed cut affect Indian bond yields?",
]


def evaluate_hard_model():
    """Evaluate the hard-case fine-tuned model on the tough query set."""
    
    MODEL_PATH = '/kaggle/working/bond_classifier_v3'
    # If using as a separate notebook with attached dataset, update to:
    # MODEL_PATH = '/kaggle/input/bond-classifier-v3/bond_classifier_v3'
    
    print("=" * 60)
    print("LOADING HARD-FINETUNED CLASSIFIER")
    print("=" * 60)
    
    try:
        classifier = BondClassifier(
            MODEL_PATH,
            base_model="microsoft/deberta-v3-small",
            weight_file="pytorch_model_hard.bin"
        )
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        print("\nMake sure:")
        print("1. Hard-case fine-tuning saved pytorch_model_hard.bin")
        print("2. MODEL_PATH is correct.")
        return
    
    print("=" * 60)
    print("TESTING QUERIES (HARD-FINETUNED MODEL)")
    print("=" * 60)
    
    for query in TEST_QUERIES:
        result = classifier.classify(query)
        
        print(f"\n{'='*60}")
        print(f"Query: {query}")
        print(f"{'='*60}")
        print(f"Intent: {result.intent}")
        print(f"Confidence: {result.confidence:.3f}")
    
    print("\n" + "=" * 60)
    print("HARD-FINETUNED MODEL EVALUATION COMPLETE!")
    print("=" * 60)


# ==================== RUN ====================

if __name__ == "__main__":
    print("\n" + "=" * 60)
    print("BOND QUERY CLASSIFIER - HARD MODEL EVAL")
    print("=" * 60 + "\n")
    
    evaluate_hard_model()


Installing dependencies...
✓ Imports complete!


BOND QUERY CLASSIFIER - HARD MODEL EVAL

LOADING HARD-FINETUNED CLASSIFIER
Loading model from: /kaggle/working/bond_classifier_v3
Using weights file: pytorch_model_hard.bin
Using device: cuda
✓ Model loaded successfully!

TESTING QUERIES (HARD-FINETUNED MODEL)

Query: Should I shift my SDL holdings into shorter PSU bonds or just hold?
Intent: reduce_duration
Confidence: 0.443

Query: I want to reduce rate risk but I don’t want my yield to fall. What should I do?
Intent: reduce_duration
Confidence: 0.671

Query: Is it better to exit long-duration bonds and move into AA corporates?
Intent: buy_recommendation
Confidence: 0.181

Query: Would switching from NTPC 2035 to REC 2030 improve my return?
Intent: switch_bonds
Confidence: 0.409

Query: Which reduces risk more: selling perpetuals or adding short-term G-Secs?
Intent: hedge_volatility
Confidence: 0.255

Query: My yield looks weak lately… what should I change?
Intent: increase_yield
Confi

In [20]:
# ==================== CELL 1: GEMINI EDGE-CASE DATA GENERATION ====================

from google import genai
from pathlib import Path
import json
import os

EDGE_CASE_OUTPUT = "/kaggle/working/edge_cases.jsonl"

class EdgeCaseDataGenerator:
    """
    Generate 'hard' edge-case examples for the bond intent classifier using Gemini.
    Focus areas:
      - reduce_duration vs increase_yield (both mentioned)
      - increase vs reduce duration
      - reduce risk but avoid selling (no sell_recommendation)
      - credit safety vs buy_recommendation
      - what-if / analytics questions (not pure buy/sell)
    """

    def __init__(self, model_name: str = "gemini-2.0-flash", total_examples: int = 80):
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            raise RuntimeError(
                "GEMINI_API_KEY not set. "
                "Set it earlier with os.environ['GEMINI_API_KEY'] = '...'"
            )
        self.client = genai.Client(api_key=api_key)
        self.model_name = model_name
        self.total_examples = total_examples

        # Valid intents
        self.allowed_intents = [
            "buy_recommendation",
            "sell_recommendation",
            "portfolio_analysis",
            "reduce_duration",
            "increase_yield",
            "hedge_volatility",
            "sector_rebalance",
            "barbell_strategy",
            "switch_bonds",
            "explain_recommendation",
            "market_outlook",
            "credit_analysis",
            "forecast_prices",
        ]
        self.allowed_sectors = [
            "Sovereign", "PSU Energy", "Financial",
            "Corporate", "Infrastructure", "NBFC", "Banking"
        ]
        self.allowed_ratings = ["AAA", "AA+", "AA", "A+", "A", "BBB"]
        self.allowed_durations = ["short", "medium", "long"]

    def generate_edge_cases(self) -> list[dict]:
        """
        Ask Gemini for a single batch of diverse edge-case examples.
        """
        print("=" * 60)
        print("GENERATING EDGE-CASE DATA WITH GEMINI")
        print("=" * 60)

        system_msg = (
            "You are an expert bond investment assistant generating edge-case "
            "queries for intent classification training. The classifier must decide "
            "between similar intents such as reduce_duration vs increase_yield, "
            "hedge_volatility vs sell_recommendation vs portfolio_analysis, "
            "credit_analysis vs buy_recommendation, and analytics/what-if questions "
            "vs pure advisory."
        )

        user_prompt = f"""
Generate {self.total_examples} diverse, realistic user queries for a bond assistant that are
INTENTIONALLY AMBIGUOUS or lie on the boundary between related intents.

For EACH example, output ONE JSON object per line (JSON Lines format). 
DO NOT wrap in an array. DO NOT add commentary or code fences.

Valid intents (intent MUST be exactly one of these):
- buy_recommendation
- sell_recommendation
- portfolio_analysis
- reduce_duration
- increase_yield
- hedge_volatility
- sector_rebalance
- barbell_strategy
- switch_bonds
- explain_recommendation
- market_outlook
- credit_analysis
- forecast_prices

Follow these patterns:

1) Duration vs yield (reduce_duration vs increase_yield)
   - Queries that mention BOTH duration and yield.
   - Some where reduce_duration is primary (yield is a constraint).
   - Some where increase_yield is primary (duration is a constraint).
   - Label with reduce_duration when the main aim is cutting duration.
   - Label with increase_yield when the main aim is boosting yield.

2) Direction of duration (increase vs reduce)
   - Queries that clearly want to INCREASE duration (longer maturities) because of rate cut expectations.
   - Label those as buy_recommendation or market_outlook (not reduce_duration).
   - Queries that clearly want to REDUCE duration ahead of rate hikes.
   - Label those as reduce_duration.

3) Reduce risk but avoid selling
   - Queries that say "reduce risk" or "protect portfolio" but explicitly say "avoid selling" or "do not sell".
   - Label these as hedge_volatility or reduce_duration, NOT sell_recommendation.

4) Credit safety vs buying
   - Queries about "are AA- names safe", "is this issuer safe", "credit risk in this environment".
   - Label those as credit_analysis.
   - Separate queries that explicitly ask for bonds to BUY if credit is safe -> label buy_recommendation.

5) Analytics / what-if questions
   - Queries like "if RBI hikes, which holdings get hit most?", "if rates fall 50 bps, what happens to long bonds?"
   - These should test whether the classifier can distinguish analysis from pure buy/sell.
   - Label as portfolio_analysis, market_outlook, or forecast_prices (not buy/sell).

For EACH example, output:

{{
  "text": "... user query ...",
  "intent": "<one of valid intents>",
  "sectors": [ ... zero or more from ["Sovereign","PSU Energy","Financial","Corporate","Infrastructure","NBFC","Banking"] ... ],
  "rating": "<AAA|AA+|AA|A+|A|BBB>" or null,
  "duration": "<short|medium|long>",
  "constraints": {{
      "preserve_yield": true/false,
      "maintain_liquidity": true/false,
      "avoid_downgrades": true/false,
      "sector_diversity": true/false,
      "rating_above_aa": true/false
  }}
}}

Guidelines:
- Mix short and multi-sentence queries.
- Vary tone (retail, PM, brief, detailed).
- Keep everything in the context of BONDS (not equities/crypto).
"""

        full_prompt = system_msg + "\n\n" + user_prompt

        resp = self.client.models.generate_content(
            model=self.model_name,
            contents=full_prompt,
        )

        raw = resp.text or ""
        samples = self._parse_and_normalize(raw)
        print(f"\n✓ Parsed {len(samples)} valid edge-case examples")
        return samples

    def _parse_and_normalize(self, raw: str) -> list[dict]:
        samples = []
        for line in raw.splitlines():
            line = line.strip()
            if not line:
                continue
            if line.startswith("```"):
                continue
            if line.startswith("-"):
                line = line.lstrip("-").strip()
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue

            sample = self._normalize_record(obj)
            if sample is not None:
                samples.append(sample)
        return samples

    def _normalize_record(self, obj: dict) -> Optional[dict]:
        text = str(obj.get("text", "")).strip()
        if not text:
            return None

        intent = obj.get("intent")
        if intent not in self.allowed_intents:
            return None

        # sectors
        raw_sectors = obj.get("sectors") or []
        sectors = []
        if isinstance(raw_sectors, list):
            for s in raw_sectors:
                s = str(s).strip()
                if s in self.allowed_sectors and s not in sectors:
                    sectors.append(s)

        # rating
        rating = obj.get("rating")
        if rating is not None:
            r = str(rating).strip()
            rating_norm = r if r in self.allowed_ratings else None
        else:
            rating_norm = None

        # duration
        duration = str(obj.get("duration", "medium")).strip().lower()
        if duration not in self.allowed_durations:
            duration = "medium"

        # constraints
        raw_constraints = obj.get("constraints") or {}
        constraints_defaults = {
            "preserve_yield": False,
            "maintain_liquidity": False,
            "avoid_downgrades": False,
            "sector_diversity": False,
            "rating_above_aa": False,
        }
        if isinstance(raw_constraints, dict):
            for k in list(constraints_defaults.keys()):
                if k in raw_constraints:
                    constraints_defaults[k] = bool(raw_constraints[k])

        if rating_norm in ("AAA", "AA+", "AA"):
            constraints_defaults["rating_above_aa"] = True

        return {
            "text": text,
            "intent": intent,
            "sectors": sectors,
            "rating": rating_norm,
            "duration": duration,
            "constraints": constraints_defaults,
        }


# ---- Run generation & save JSONL ----
edge_gen = EdgeCaseDataGenerator(model_name=CONFIG["llm_model"], total_examples=80)
edge_cases = edge_gen.generate_edge_cases()

edge_path = Path(EDGE_CASE_OUTPUT)
edge_path.parent.mkdir(parents=True, exist_ok=True)
with edge_path.open("w", encoding="utf-8") as f:
    for ex in edge_cases:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

print(f"\n✓ Edge-case dataset saved to: {edge_path}")
print("You can inspect/download it from the 'Output' tab.")

GENERATING EDGE-CASE DATA WITH GEMINI

✓ Parsed 90 valid edge-case examples

✓ Edge-case dataset saved to: /kaggle/working/edge_cases.jsonl
You can inspect/download it from the 'Output' tab.


In [22]:
# ==================== CELL 2: CONTINUE TRAINING ON EDGE CASES ====================

from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score

EDGE_CASE_OUTPUT = "/kaggle/working/edge_cases.jsonl"

def load_edge_cases(path: str) -> List[Dict]:
    data = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            data.append(obj)
    print(f"Loaded {len(data)} edge-case examples from {path}")
    return data


def continue_training_on_edge_cases(
    model_path: str = "/kaggle/working/bond_classifier_v3",
    base_model: str = "microsoft/deberta-v3-small",
    edge_path: str = EDGE_CASE_OUTPUT,
    epochs: int = 3,
    lr: float = 5e-6,
    batch_size: int = 8,
    out_weight_file: str = "pytorch_model_edge.bin",
):
    """
    Continue training the already trained model on the Gemini edge-case dataset only.
    """

    print("=" * 60)
    print("CONTINUATION TRAINING ON GEMINI EDGE CASES")
    print("=" * 60)
    print(f"Loading tokenizer & base model from: {model_path}")

    # 1) Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # 2) Load edge-case data
    edge_data = load_edge_cases(edge_path)
    if not edge_data:
        print("❌ No edge-case data found. Run the generation cell first.")
        return

    # 3) Build dataset & dataloader
    edge_dataset = BondQueryDataset(edge_data, tokenizer, CONFIG["max_length"])
    edge_loader = DataLoader(edge_dataset, batch_size=batch_size, shuffle=True)

    # 4) Load existing model checkpoint
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ProductionBondClassifier(base_model)  # uses DeBERTa-v3-small
    ckpt_path = os.path.join(model_path, "pytorch_model_hard.bin")
    state_dict = torch.load(ckpt_path, map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)

    print(f"✓ Loaded base model from {ckpt_path}")
    print(f"Fine-tuning on {len(edge_dataset)} edge-case examples")
    print(f"Using device: {device}")

    # 5) Optimizer & loss
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.0)
    criterion = MultiTaskLoss()

    # 6) Simple training loop
    for epoch in range(1, epochs + 1):
        print(f"\n--- Edge-case Epoch {epoch}/{epochs} ---")
        model.train()
        epoch_loss = 0.0
        all_preds, all_labels = [], []

        pbar = tqdm(edge_loader, desc=f"Edge-case Epoch {epoch}")
        for batch in pbar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = {k: v.to(device) for k, v in batch.items()
                      if k not in ["input_ids", "attention_mask"]}

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss_dict = criterion(outputs, labels)
            loss = loss_dict["total"]

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            epoch_loss += loss.item()

            preds = outputs["intent_logits"].argmax(dim=-1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels["intent_label"].cpu().numpy())

            pbar.set_postfix({"loss": f"{loss.item():.4f}"})

        avg_loss = epoch_loss / len(edge_loader)
        acc = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds, average="macro")
        print(f"Edge-case Epoch {epoch} - Loss: {avg_loss:.4f} | Acc: {acc:.3f} | F1: {f1:.3f}")

    # 7) Save updated weights
    out_path = os.path.join(model_path, out_weight_file)
    torch.save(model.state_dict(), out_path)
    print(f"\n✓ Edge-case fine-tuned model saved to: {out_path}")
    print("Point your inference notebook at this weight file to use the updated model.")


# ---- Run continuation training ----
continue_training_on_edge_cases(
    model_path="/kaggle/working/bond_classifier_v3",
    base_model="microsoft/deberta-v3-small",
    edge_path=EDGE_CASE_OUTPUT,
    epochs=8,
    lr=5e-6,
    batch_size=8,
    out_weight_file="pytorch_model_edge.bin",
)

CONTINUATION TRAINING ON GEMINI EDGE CASES
Loading tokenizer & base model from: /kaggle/working/bond_classifier_v3
Loaded 90 edge-case examples from /kaggle/working/edge_cases.jsonl
✓ Loaded base model from /kaggle/working/bond_classifier_v3/pytorch_model_hard.bin
Fine-tuning on 90 edge-case examples
Using device: cuda

--- Edge-case Epoch 1/8 ---


Edge-case Epoch 1:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 1 - Loss: 1.0356 | Acc: 0.733 | F1: 0.712

--- Edge-case Epoch 2/8 ---


Edge-case Epoch 2:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 2 - Loss: 0.8891 | Acc: 0.778 | F1: 0.773

--- Edge-case Epoch 3/8 ---


Edge-case Epoch 3:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 3 - Loss: 0.6680 | Acc: 0.856 | F1: 0.852

--- Edge-case Epoch 4/8 ---


Edge-case Epoch 4:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 4 - Loss: 0.4729 | Acc: 0.867 | F1: 0.858

--- Edge-case Epoch 5/8 ---


Edge-case Epoch 5:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 5 - Loss: 0.4416 | Acc: 0.889 | F1: 0.898

--- Edge-case Epoch 6/8 ---


Edge-case Epoch 6:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 6 - Loss: 0.3496 | Acc: 0.956 | F1: 0.960

--- Edge-case Epoch 7/8 ---


Edge-case Epoch 7:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 7 - Loss: 0.3155 | Acc: 0.967 | F1: 0.970

--- Edge-case Epoch 8/8 ---


Edge-case Epoch 8:   0%|          | 0/12 [00:00<?, ?it/s]

Edge-case Epoch 8 - Loss: 0.3010 | Acc: 0.967 | F1: 0.973

✓ Edge-case fine-tuned model saved to: /kaggle/working/bond_classifier_v3/pytorch_model_edge.bin
Point your inference notebook at this weight file to use the updated model.


In [23]:
# ==================== CELL 3: EVALUATE EDGE-TUNED MODEL ====================

# Reuse ProductionBondClassifier, ClassificationResult, etc. from above

TEST_QUERIES = [
    # --- Category 1: Ambiguous Buy/Sell/Strategy ---
    "Should I shift my SDL holdings into shorter PSU bonds or just hold?",
    "I want to reduce rate risk but I don’t want my yield to fall. What should I do?",
    "Is it better to exit long-duration bonds and move into AA corporates?",
    "Would switching from NTPC 2035 to REC 2030 improve my return?",
    "Which reduces risk more: selling perpetuals or adding short-term G-Secs?",

    # --- Category 2: Disguised intents ---
    "My yield looks weak lately… what should I change?",
    "These long papers are stressing me out—what’s the safest move?",
    "My portfolio feels too boring. Suggest something with more kick.",
    "The curve is flattening; should I reposition?",
    "My advisor said I’m too exposed. What adjustments should I consider?",

    # --- Category 3: Multi-intent queries ---
    "Recommend high-yield PSU bonds and also check if any of my holdings should be sold.",
    "Reduce my duration and give alternatives with at least 7.5% yield.",
    "Analyze my portfolio and tell me which low-yield bonds I should switch.",
    "Before suggesting buys, what’s your outlook on corporate spreads?",
    "If I shift to shorter bonds, how will my yield be impacted?",

    # --- Category 4: Tricky phrasing for buy intent ---
    "Is NTPC 2033 attractive at current spreads?",
    "Are AA PSU bonds offering a good entry point?",
    "Is this a good time to accumulate SDLs?",
    "Should I start adding exposure to long maturities?",
    "Are there better alternatives to ICICI 2029 with similar risk?",

    # --- Category 5: Risk management intents ---
    "The market seems jumpy; how do I protect my portfolio?",
    "If RBI hikes unexpectedly, which holdings will get hit the most?",
    "Rate volatility worries me—how do I reduce the impact?",
    "How can I stabilize P&L swings in my portfolio?",
    "Should I rebalance sectors before the credit cycle weakens?",

    # --- Category 6: Very short hard queries ---
    "Duration too high?",
    "Better yield ideas?",
    "Switch or hold?",
    "Cut risk?",
    "Add PSU?",

    # --- Category 7: Contradictory constraints ---
    "Cut duration but don’t let yield drop below 7.6%.",
    "I want high yield but without taking credit risk.",
    "Reduce risk but avoid selling anything.",
    "Switch out of low-yield bonds but keep duration same.",
    "Increase return without increasing duration or credit risk.",

    # --- Category 8: Multi-sentence queries ---
    "My duration increased after the last purchases. I’m worried about hikes. Suggest adjustments that don’t hurt yield.",
    "My portfolio is mostly PSU and financials. Seems concentrated. Should I diversify into private corporates?",
    "I sold some long-term bonds last month. Now thinking of adding 3–5 year AA corporates. Any ideas?",
    "I expect inflation to cool. Should I increase duration a bit?",
    "Markets feel stable. Should I rotate sectors or focus on yield first?",

    # --- Category 9: Credit-analysis queries ---
    "How is the credit quality of PFC right now?",
    "Is REC fundamentally strong enough for long-term holding?",
    "What’s the default risk on NTPC?",
    "Should I worry about credit spreads widening?",
    "Are AA- names safe in this environment?",

    # --- Category 10: Forecast / outlook queries ---
    "Where do you see G-Sec yields in six months?",
    "If rates fall by 50 bps, what happens to long-duration PSU bonds?",
    "Will corporate spreads tighten this year?",
    "Predict the movement of the 10-year benchmark.",
    "How will a Fed cut affect Indian bond yields?",
]


def evaluate_edge_tuned_model():
    """Evaluate the edge-case fine-tuned model on the tough query set."""
    
    MODEL_PATH = "/kaggle/working/bond_classifier_v3"
    WEIGHT_FILE = "pytorch_model_edge.bin"
    
    print("=" * 60)
    print("LOADING EDGE-TUNED CLASSIFIER")
    print("=" * 60)
    
    try:
        classifier = BondClassifier(
            MODEL_PATH,
            base_model="microsoft/deberta-v3-small",
            weight_file=WEIGHT_FILE
        )
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        print("\nMake sure:")
        print("1. Edge-case fine-tuning saved", WEIGHT_FILE)
        print("2. MODEL_PATH is correct.")
        return
    
    print("=" * 60)
    print("TESTING QUERIES (EDGE-TUNED MODEL)")
    print("=" * 60)
    
    for q in TEST_QUERIES:
        result = classifier.classify(q)
        print(f"\n{'='*60}")
        print(f"Query: {q}")
        print(f"{'='*60}")
        print(f"Intent: {result.intent}")
        print(f"Confidence: {result.confidence:.3f}")
    
    print("\n" + "=" * 60)
    print("EDGE-TUNED MODEL EVALUATION COMPLETE!")
    print("=" * 60)


# ---- Run evaluation ----
evaluate_edge_tuned_model()


LOADING EDGE-TUNED CLASSIFIER
Loading model from: /kaggle/working/bond_classifier_v3
Using weights file: pytorch_model_edge.bin
Using device: cuda
✓ Model loaded successfully!

TESTING QUERIES (EDGE-TUNED MODEL)

Query: Should I shift my SDL holdings into shorter PSU bonds or just hold?
Intent: switch_bonds
Confidence: 0.499

Query: I want to reduce rate risk but I don’t want my yield to fall. What should I do?
Intent: reduce_duration
Confidence: 0.613

Query: Is it better to exit long-duration bonds and move into AA corporates?
Intent: buy_recommendation
Confidence: 0.196

Query: Would switching from NTPC 2035 to REC 2030 improve my return?
Intent: switch_bonds
Confidence: 0.783

Query: Which reduces risk more: selling perpetuals or adding short-term G-Secs?
Intent: reduce_duration
Confidence: 0.269

Query: My yield looks weak lately… what should I change?
Intent: switch_bonds
Confidence: 0.391

Query: These long papers are stressing me out—what’s the safest move?
Intent: buy_recommen

## Testing

In [48]:
import torch
from transformers import AutoTokenizer
import torch.nn.functional as F
import time
from pathlib import Path
import torch.nn as nn
from transformers import AutoModel

# --- Load the trained model ---
class ProductionBondClassifier(nn.Module):
    """Same architecture as training: DeBERTa-v3-small + multi-task heads."""
    
    def __init__(self, base_model: str = 'microsoft/deberta-v3-small', dropout: float = 0.15):
        super().__init__()
        
        self.bert = AutoModel.from_pretrained(base_model)
        hidden_size = self.bert.config.hidden_size
        
        self.feature_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.GELU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout),
            nn.GELU()
        )
        
        feature_size = hidden_size // 2
        
        # Heads: intent + sectors + rating + duration + constraints
        self.intent_head = nn.Linear(feature_size, 13)
        self.sector_head = nn.Linear(feature_size, 7)
        self.rating_head = nn.Linear(feature_size, 7)
        self.duration_head = nn.Linear(feature_size, 3)
        self.constraint_head = nn.Linear(feature_size, 5)
        
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        features = self.feature_layer(cls_output)
        features = self.dropout(features)
        
        return {
            'intent_logits': self.intent_head(features),
            'sector_logits': self.sector_head(features),
            'rating_logits': self.rating_head(features),
            'duration_logits': self.duration_head(features),
            'constraint_logits': self.constraint_head(features),
        }

class BondClassifier:
    def __init__(self, model_path: str, base_model: str = "microsoft/deberta-v3-small"):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Load model
        self.model = ProductionBondClassifier(base_model=base_model)
        state_dict = torch.load(f"{model_path}/pytorch_model_edge.bin", map_location=self.device)
        self.model.load_state_dict(state_dict)
        self.model.to(self.device)
        self.model.eval()

        self.intent_names = [
            'buy_recommendation', 'sell_recommendation', 'portfolio_analysis',
            'reduce_duration', 'increase_yield', 'hedge_volatility',
            'sector_rebalance', 'barbell_strategy', 'switch_bonds',
            'explain_recommendation', 'market_outlook', 'credit_analysis',
            'forecast_prices'
        ]
    
    def classify(self, query: str):
        # Tokenize
        enc = self.tokenizer(
            query,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=128
        )
        input_ids = enc["input_ids"].to(self.device)
        attention_mask = enc["attention_mask"].to(self.device)
        
        # Run model
        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
            intent_logits = outputs['intent_logits']
            intent_probs = F.softmax(intent_logits, dim=-1)
            
        # Get the predicted intent
        intent_idx = intent_probs.argmax().item()
        intent = self.intent_names[intent_idx]
        confidence = intent_probs.max().item()
        
        return intent, confidence

# --- Initialize the classifier ---
MODEL_PATH = '/kaggle/working/bond_classifier_v3'  # Path to your trained model directory
classifier = BondClassifier(MODEL_PATH)

# --- Input your query here ---
query = input("Enter your bond-related query: ")

# --- Start timing the classification process ---
start_time = time.time()

# --- Get the predicted intent ---
intent, confidence = classifier.classify(query)

# --- End timing the classification process ---
end_time = time.time()

# --- Output the results ---
print(f"Predicted Intent: {intent}")
print(f"Confidence: {confidence:.3f}")

# --- Output the time taken ---
print(f"Time taken for classification: {end_time - start_time:.4f} seconds")

Enter your bond-related query:      "If the Fed unexpectedly hikes interest rates, which of my bonds will likely be hit the hardest in terms of duration and credit quality?",


Predicted Intent: credit_analysis
Confidence: 0.771
Time taken for classification: 0.0160 seconds


# Non-bond related query

In [None]:
##DO for non-bonds and bonds
##Bonds - Intent classification
##Non bonds - Web search/General LLM 

#######

