In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/neural2/test_complaints.csv
/kaggle/input/neural2/train_complaints.csv


In [2]:
# =========================================================================================
# üöÄ DUAL-MODEL ENSEMBLE: DeBERTa (5 Folds) + RoBERTa (5 Folds) -> 5 Epochs Each
# =========================================================================================
import os
import gc
import glob
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from tqdm.notebook import tqdm
import warnings

warnings.filterwarnings('ignore')

# ==========================================
# üõë USER CONFIGURATION (EDIT THESE PATHS)
# ==========================================
TRAIN_CSV_PATH = "/kaggle/input/neural2/train_complaints.csv" 
TEST_CSV_PATH = "/kaggle/input/neural2/test_complaints.csv"
# ==========================================

# --- GLOBAL SETTINGS ---
class Config:
    max_len = 512
    batch_size = 8
    epochs = 5            # üÜï Requested: 7 Epochs
    lr = 2e-5
    n_folds = 5
    seed = 42
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)

set_seed(Config.seed)

# --- DATASET CLASS ---
class ComplaintDataset(Dataset):
    def __init__(self, df, tokenizer, max_len, is_test=False, 
                 le_primary=None, le_secondary=None):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.is_test = is_test
        self.text = df['complaint_text'].astype(str).values
        
        if not is_test:
            self.primary = le_primary.transform(df['primary_category'].values)
            self.secondary = le_secondary.transform(df['secondary_category'].values)
            self.severity = df['severity'].values.astype(float)

    def __len__(self): return len(self.df)

    def __getitem__(self, index):
        text = self.text[index]
        inputs = self.tokenizer.encode_plus(text, add_special_tokens=True, return_attention_mask=True, truncation=False)
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        
        # HEAD + TAIL TRUNCATION
        if len(input_ids) > self.max_len:
            input_ids = input_ids[:128] + input_ids[-(self.max_len-128):]
            attention_mask = attention_mask[:128] + attention_mask[-(self.max_len-128):]
        else:
            padding = self.max_len - len(input_ids)
            input_ids = input_ids + ([0] * padding)
            attention_mask = attention_mask + ([0] * padding)
            
        data = {'input_ids': torch.tensor(input_ids, dtype=torch.long),
                'attention_mask': torch.tensor(attention_mask, dtype=torch.long)}
        
        if not self.is_test:
            data['primary_label'] = torch.tensor(self.primary[index], dtype=torch.long)
            data['secondary_label'] = torch.tensor(self.secondary[index], dtype=torch.long)
            data['severity_label'] = torch.tensor(self.severity[index], dtype=torch.float)
        return data

# --- UNIVERSAL MODEL CLASS ---
class UniversalModel(nn.Module):
    def __init__(self, model_name, num_primary, num_secondary):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        self.drop = nn.Dropout(0.3)
        self.primary_head = nn.Linear(768, num_primary)
        self.secondary_head = nn.Linear(768, num_secondary)
        self.severity_head = nn.Linear(768, 1)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids, attention_mask=attention_mask)
        # Universal pooling: Take the first token [CLS] which is always index 0
        pooled_output = self.drop(outputs.last_hidden_state[:, 0, :])
        return {'primary': self.primary_head(pooled_output),
                'secondary': self.secondary_head(pooled_output),
                'severity': self.severity_head(pooled_output)}

# --- TRAINING FUNCTION ---
def train_model_type(model_name, save_prefix, train_df, le_p, le_s):
    print(f"\nüöÄ STARTING TRAINING SESSION FOR: {model_name}")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    for fold in range(Config.n_folds):
        print(f"\n   >>> Fold {fold + 1}/{Config.n_folds} ({model_name})")
        
        # Split
        d_train = train_df[train_df['fold'] != fold].reset_index(drop=True)
        d_val = train_df[train_df['fold'] == fold].reset_index(drop=True)
        
        train_ds = ComplaintDataset(d_train, tokenizer, Config.max_len, False, le_p, le_s)
        val_ds = ComplaintDataset(d_val, tokenizer, Config.max_len, False, le_p, le_s)
        
        train_loader = DataLoader(train_ds, batch_size=Config.batch_size, shuffle=True, num_workers=2)
        val_loader = DataLoader(val_ds, batch_size=Config.batch_size, shuffle=False, num_workers=2)
        
        model = UniversalModel(model_name, len(le_p.classes_), len(le_s.classes_))
        model.to(Config.device)
        
        optimizer = torch.optim.AdamW(model.parameters(), lr=Config.lr)
        num_train_steps = int(len(d_train) / Config.batch_size * Config.epochs)
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)
        
        # Loss Function
        def criterion(outputs, targets):
            loss_p = nn.CrossEntropyLoss()(outputs['primary'], targets['primary_label'])
            loss_s = nn.CrossEntropyLoss()(outputs['secondary'], targets['secondary_label'])
            loss_sev = nn.MSELoss()(outputs['severity'].squeeze(), targets['severity_label'])
            return (0.3 * loss_p) + (0.4 * loss_s) + (0.3 * loss_sev)
            
        best_loss = float('inf')
        
        for epoch in range(Config.epochs):
            model.train()
            loop = tqdm(train_loader, leave=False)
            for batch in loop:
                input_ids = batch['input_ids'].to(Config.device)
                mask = batch['attention_mask'].to(Config.device)
                targets = {
                    'primary_label': batch['primary_label'].to(Config.device),
                    'secondary_label': batch['secondary_label'].to(Config.device),
                    'severity_label': batch['severity_label'].to(Config.device)
                }
                optimizer.zero_grad()
                outputs = model(input_ids, mask)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                scheduler.step()
            
            # Validation
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for batch in val_loader:
                    input_ids = batch['input_ids'].to(Config.device)
                    mask = batch['attention_mask'].to(Config.device)
                    targets = {
                        'primary_label': batch['primary_label'].to(Config.device),
                        'secondary_label': batch['secondary_label'].to(Config.device),
                        'severity_label': batch['severity_label'].to(Config.device)
                    }
                    outputs = model(input_ids, mask)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item()
            val_loss /= len(val_loader)
            
            print(f"      Epoch {epoch+1} | Val Loss: {val_loss:.4f}")
            
            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(model.state_dict(), f"{save_prefix}_fold_{fold}.pth")
                
        # Memory Cleanup
        del model, optimizer, scheduler, train_loader, val_loader
        torch.cuda.empty_cache()
        gc.collect()

# ==========================================
# 1. PREPARE DATA
# ==========================================
print("‚è≥ Loading Data...")
train_df = pd.read_csv(TRAIN_CSV_PATH)
test_df = pd.read_csv(TEST_CSV_PATH)

le_p = LabelEncoder()
le_p.fit(train_df['primary_category'])
le_s = LabelEncoder()
le_s.fit(train_df['secondary_category'])

skf = StratifiedKFold(n_splits=Config.n_folds, shuffle=True, random_state=Config.seed)
train_df['fold'] = -1
for fold, (_, val_idx) in enumerate(skf.split(train_df, train_df['primary_category'])):
    train_df.loc[val_idx, 'fold'] = fold

# ==========================================
# 2. TRAIN BOTH MODELS
# ==========================================
# A. Train DeBERTa (5 Folds)
train_model_type("microsoft/deberta-v3-base", "deberta", train_df, le_p, le_s)

# B. Train RoBERTa (5 Folds)
train_model_type("roberta-base", "roberta", train_df, le_p, le_s)

print("\nüéâ ALL 10 MODELS TRAINED! STARTING ENSEMBLE INFERENCE...")

# ==========================================
# 3. ENSEMBLE INFERENCE (COMBINE 10 MODELS)
# ==========================================
avg_p_probs = np.zeros((len(test_df), len(le_p.classes_)))
avg_s_probs = np.zeros((len(test_df), len(le_s.classes_)))
avg_sev_preds = np.zeros(len(test_df))

# Find all saved models (should be 10 files)
model_files = sorted(glob.glob("./*_fold_*.pth"))
print(f"üîç Found {len(model_files)} models to combine: {[os.path.basename(x) for x in model_files]}")

for model_path in model_files:
    # Determine which architecture to use based on filename
    if "deberta" in model_path:
        model_name = "microsoft/deberta-v3-base"
    else:
        model_name = "roberta-base"
        
    print(f"   Using {os.path.basename(model_path)} (Arch: {model_name})...")
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    test_ds = ComplaintDataset(test_df, tokenizer, Config.max_len, is_test=True)
    test_loader = DataLoader(test_ds, batch_size=Config.batch_size, shuffle=False, num_workers=2)
    
    model = UniversalModel(model_name, len(le_p.classes_), len(le_s.classes_))
    model.load_state_dict(torch.load(model_path, map_location=Config.device))
    model.to(Config.device)
    model.eval()
    
    fold_p, fold_s, fold_sev = [], [], []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, leave=False):
            input_ids = batch['input_ids'].to(Config.device)
            mask = batch['attention_mask'].to(Config.device)
            outputs = model(input_ids, mask)
            
            fold_p.append(torch.softmax(outputs['primary'], dim=1).cpu().numpy())
            fold_s.append(torch.softmax(outputs['secondary'], dim=1).cpu().numpy())
            fold_sev.append(outputs['severity'].squeeze().cpu().numpy())
            
    # Add to Ensemble Average
    avg_p_probs += np.concatenate(fold_p)
    avg_s_probs += np.concatenate(fold_s)
    avg_sev_preds += np.concatenate(fold_sev)
    
    del model, tokenizer
    torch.cuda.empty_cache()

# ==========================================
# 4. SAVE SUBMISSION
# ==========================================
final_p_ids = np.argmax(avg_p_probs, axis=1)
final_s_ids = np.argmax(avg_s_probs, axis=1)
avg_sev_preds /= len(model_files)
final_sev_rounded = np.clip(np.round(avg_sev_preds), 1.0, 5.0).astype(int)

submission = pd.DataFrame({
    'complaint_id': test_df['complaint_id'],
    'primary_category': le_p.inverse_transform(final_p_ids),
    'secondary_category': le_s.inverse_transform(final_s_ids),
    'severity': final_sev_rounded
})

submission.to_csv("submission.csv", index=False)
print("‚úÖ Done! Saved 'submission.csv' (Combined DeBERTa + RoBERTa)")
print(submission.head())

‚è≥ Loading Data...

üöÄ STARTING TRAINING SESSION FOR: microsoft/deberta-v3-base


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]


   >>> Fold 1/5 (microsoft/deberta-v3-base)


2026-02-07 12:39:43.019911: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770467983.174598      24 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770467983.220464      24 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770467983.604497      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770467983.604520      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770467983.604523      24 computation_placer.cc:177] computation placer alr

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 1 | Val Loss: 1.4031


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 2 | Val Loss: 1.0178


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 3 | Val Loss: 0.8916


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 4 | Val Loss: 0.7965


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 5 | Val Loss: 0.7645

   >>> Fold 2/5 (microsoft/deberta-v3-base)


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 1 | Val Loss: 1.4239


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 2 | Val Loss: 0.9864


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 3 | Val Loss: 0.8962


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 4 | Val Loss: 0.8061


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 5 | Val Loss: 0.7740

   >>> Fold 3/5 (microsoft/deberta-v3-base)


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 1 | Val Loss: 1.4277


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 2 | Val Loss: 1.0715


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 3 | Val Loss: 0.9237


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 4 | Val Loss: 0.8808


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 5 | Val Loss: 0.8356

   >>> Fold 4/5 (microsoft/deberta-v3-base)


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 1 | Val Loss: 1.2885


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 2 | Val Loss: 0.9724


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 3 | Val Loss: 0.9021


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 4 | Val Loss: 0.8134


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 5 | Val Loss: 0.8051

   >>> Fold 5/5 (microsoft/deberta-v3-base)


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 1 | Val Loss: 1.2306


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 2 | Val Loss: 1.0211


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 3 | Val Loss: 0.8762


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 4 | Val Loss: 0.7974


  0%|          | 0/300 [00:00<?, ?it/s]

      Epoch 5 | Val Loss: 0.7738

üöÄ STARTING TRAINING SESSION FOR: roberta-base


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]


   >>> Fold 1/5 (roberta-base)


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1668 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (560 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors


      Epoch 1 | Val Loss: 1.0544


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (569 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1358 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors


      Epoch 2 | Val Loss: 0.8621


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (544 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2980 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors


      Epoch 3 | Val Loss: 0.7840


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (872 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (711 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors


      Epoch 4 | Val Loss: 0.7729


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (891 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (597 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (538 > 512). Running this sequence through the model will result in indexing errors


      Epoch 5 | Val Loss: 0.7668

   >>> Fold 2/5 (roberta-base)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (567 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (599 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (574 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors


      Epoch 1 | Val Loss: 1.0438


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (644 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (895 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (574 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors


      Epoch 2 | Val Loss: 0.8599


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1408 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (633 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (574 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors


      Epoch 3 | Val Loss: 0.7517


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (567 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1807 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (574 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors


      Epoch 4 | Val Loss: 0.7512


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (524 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (4384 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (574 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (564 > 512). Running this sequence through the model will result in indexing errors


      Epoch 5 | Val Loss: 0.7258

   >>> Fold 3/5 (roberta-base)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1089 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (556 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (717 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1071 > 512). Running this sequence through the model will result in indexing errors


      Epoch 1 | Val Loss: 1.1207


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (823 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (999 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (717 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1071 > 512). Running this sequence through the model will result in indexing errors


      Epoch 2 | Val Loss: 0.8923


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (820 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (769 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (717 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1071 > 512). Running this sequence through the model will result in indexing errors


      Epoch 3 | Val Loss: 0.8603


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1126 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (894 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (717 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1071 > 512). Running this sequence through the model will result in indexing errors


      Epoch 4 | Val Loss: 0.8368


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (589 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1113 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (717 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1071 > 512). Running this sequence through the model will result in indexing errors


      Epoch 5 | Val Loss: 0.8212

   >>> Fold 4/5 (roberta-base)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (862 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1088 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (597 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (543 > 512). Running this sequence through the model will result in indexing errors


      Epoch 1 | Val Loss: 1.0182


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (769 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (838 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (597 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (543 > 512). Running this sequence through the model will result in indexing errors


      Epoch 2 | Val Loss: 0.8307


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (670 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (598 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (597 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (543 > 512). Running this sequence through the model will result in indexing errors


      Epoch 3 | Val Loss: 0.7930


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (766 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1257 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (597 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (543 > 512). Running this sequence through the model will result in indexing errors


      Epoch 4 | Val Loss: 0.7700


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (864 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2103 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (597 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (543 > 512). Running this sequence through the model will result in indexing errors


      Epoch 5 | Val Loss: 0.7748

   >>> Fold 5/5 (roberta-base)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (625 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1616 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (864 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2519 > 512). Running this sequence through the model will result in indexing errors


      Epoch 1 | Val Loss: 1.1017


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1520 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (593 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (864 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2519 > 512). Running this sequence through the model will result in indexing errors


      Epoch 2 | Val Loss: 0.8791


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (516 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (714 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (864 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2519 > 512). Running this sequence through the model will result in indexing errors


      Epoch 3 | Val Loss: 0.8559


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (875 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (593 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (864 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2519 > 512). Running this sequence through the model will result in indexing errors


      Epoch 4 | Val Loss: 0.8012


  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (603 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (1088 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (864 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (2519 > 512). Running this sequence through the model will result in indexing errors


      Epoch 5 | Val Loss: 0.7905

üéâ ALL 10 MODELS TRAINED! STARTING ENSEMBLE INFERENCE...
üîç Found 10 models to combine: ['deberta_fold_0.pth', 'deberta_fold_1.pth', 'deberta_fold_2.pth', 'deberta_fold_3.pth', 'deberta_fold_4.pth', 'roberta_fold_0.pth', 'roberta_fold_1.pth', 'roberta_fold_2.pth', 'roberta_fold_3.pth', 'roberta_fold_4.pth']
   Using deberta_fold_0.pth (Arch: microsoft/deberta-v3-base)...


  0%|          | 0/63 [00:00<?, ?it/s]

   Using deberta_fold_1.pth (Arch: microsoft/deberta-v3-base)...


  0%|          | 0/63 [00:00<?, ?it/s]

   Using deberta_fold_2.pth (Arch: microsoft/deberta-v3-base)...


  0%|          | 0/63 [00:00<?, ?it/s]

   Using deberta_fold_3.pth (Arch: microsoft/deberta-v3-base)...


  0%|          | 0/63 [00:00<?, ?it/s]

   Using deberta_fold_4.pth (Arch: microsoft/deberta-v3-base)...


  0%|          | 0/63 [00:00<?, ?it/s]

   Using roberta_fold_0.pth (Arch: roberta-base)...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/63 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (732 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


   Using roberta_fold_1.pth (Arch: roberta-base)...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/63 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (732 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


   Using roberta_fold_2.pth (Arch: roberta-base)...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/63 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (732 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


   Using roberta_fold_3.pth (Arch: roberta-base)...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/63 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (732 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


   Using roberta_fold_4.pth (Arch: roberta-base)...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/63 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (732 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


‚úÖ Done! Saved 'submission.csv' (Combined DeBERTa + RoBERTa)
   complaint_id                                   primary_category  \
0       7799230  Credit reporting or other personal consumer re...   
1      15754196                                    Debt collection   
2      10989146  Credit reporting or other personal consumer re...   
3       3617850  Credit reporting or other personal consumer re...   
4       5253879  Credit reporting or other personal consumer re...   

                                  secondary_category  severity  
0                        Improper use of your report         1  
1                    Written notification about debt         1  
2  Problem with a company's investigation into an...         1  
3  Problem with a credit reporting company's inve...         1  
4                        Improper use of your report         4  
