In [None]:
# !pip install numpy
# !pip install pandas
# !pip install scikit-learn
# !pip install torch
# !pip install transformers


from transformers import RobertaModel, RobertaTokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, hamming_loss, roc_auc_score, average_precision_score
from collections import defaultdict
from torch.amp import autocast, GradScaler
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import time

## Hyperparameters
MAX_LEN = 128
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
TEST_BATCH_SIZE = 32
EPOCHS = 12
LEARNING_RATE = 1e-05
THRESHOLD = 0.5 # threshold for the sigmoid
## Dataset Class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer, max_len, target_list):
        self.tokenizer = tokenizer
        self.df = df
        self.title = list(df['File Contents'])
        self.targets = self.df[target_list].values
        self.max_len = max_len

    def __len__(self):
        return len(self.title)

    def __getitem__(self, index):
        title = str(self.title[index])
        title = " ".join(title.split())
        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.FloatTensor(self.targets[index]),
            'title': title
        }

## Data
train_file = '/content/train.csv'
val_file = '/content/val.csv'
test_file = '/content/test.csv'
train_df = pd.read_csv(train_file)
val_df = pd.read_csv(val_file)
test_df = pd.read_csv(test_file)

target_list = list(train_df.columns[1:])

## Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')


train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN, target_list)
valid_dataset = CustomDataset(val_df, tokenizer, MAX_LEN, target_list)
test_dataset = CustomDataset(test_df, tokenizer, MAX_LEN, target_list)

#print(train_dataset[0])

## Data Loader
train_data_loader = torch.utils.data.DataLoader(train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_data_loader = torch.utils.data.DataLoader(valid_dataset,
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

test_data_loader = torch.utils.data.DataLoader(test_dataset,
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)
## Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

## Model


class RobertaInceptionAttentionImproved(nn.Module):
    def __init__(self, num_classes):
        super(RobertaInceptionAttentionImproved, self).__init__()

        # Initialize RoBERTa model
        self.roberta = RobertaModel.from_pretrained('roberta-base', output_hidden_states=True)

        # Dropout layer after RoBERTa output
        self.dropout = nn.Dropout(0.3)

        # Inception block with multiple kernel sizes (32 output channels each)
        self.conv2 = nn.Conv1d(in_channels=768, out_channels=32, kernel_size=2, padding=0)
        self.conv3 = nn.Conv1d(in_channels=768, out_channels=32, kernel_size=3, padding=0)
        self.conv5 = nn.Conv1d(in_channels=768, out_channels=32, kernel_size=5, padding=0)
        self.conv7 = nn.Conv1d(in_channels=768, out_channels=32, kernel_size=7, padding=0)

        # Self-attention layer after Inception block
        self.attention = nn.MultiheadAttention(embed_dim=896, num_heads=8, batch_first=True)

        # Additional dense layer with LayerNorm for refined feature interaction
        self.dense = nn.Sequential(
            nn.Linear(896, 512),
            nn.ReLU(),
            nn.LayerNorm(512)
        )

        # Final dropout and classification layer
        self.final_dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, input_ids, attention_mask):
        # RoBERTa branch
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state  # Shape: (batch_size, seq_length, 768)

        # Apply dropout to RoBERTa embeddings
        hidden_states = self.dropout(hidden_states)

        # Inception block with manual padding after convolutions
        hidden_states = hidden_states.permute(0, 2, 1)  # Shape: (batch_size, 768, seq_length)

        # Apply convolutions without padding, then pad manually
        conv2_output = F.pad(self.conv2(hidden_states), (0, 1))  # Padding to match max seq length
        conv3_output = F.pad(self.conv3(hidden_states), (1, 1))  # Adjust to max seq length
        conv5_output = F.pad(self.conv5(hidden_states), (2, 2))  # Adjust to max seq length
        conv7_output = F.pad(self.conv7(hidden_states), (3, 3))  # Adjust to max seq length

        # Concatenate along the channel dimension
        inception_output = torch.cat([conv2_output, conv3_output, conv5_output, conv7_output], dim=1)  # Shape: (batch_size, 128, seq_length)
        inception_output = inception_output.permute(0, 2, 1)  # Back to (batch_size, seq_length, 128)

        # Concatenate Inception outputs with original RoBERTa embeddings
        concatenated_features = torch.cat([hidden_states.permute(0, 2, 1), inception_output], dim=2)  # Shape: (batch_size, seq_length, 896)

        # Apply dropout after concatenating
        #concatenated_features = self.dropout(concatenated_features)

        # Apply multi-head self-attention after gating
        key_padding_mask = ~attention_mask.bool()  # Shape: (batch_size, seq_length)

        attn_output, _ = self.attention(
            concatenated_features,
            concatenated_features,
            concatenated_features,
            key_padding_mask=key_padding_mask
        )  # Shape: (batch_size, seq_length, 896)

        # Global mean pooling over sequence length
        pooled_output = F.adaptive_avg_pool1d(attn_output.permute(0, 2, 1), output_size=1).squeeze(-1)

        # Additional dense layer with LayerNorm
        dense_output = self.dense(pooled_output)

        # Final dropout and classification layer
        dense_output = self.final_dropout(dense_output)
        logits = self.fc(dense_output)  # Shape: (batch_size, num_classes)

        return logits

## Setting the model
model = RobertaInceptionAttentionImproved(num_classes=len(target_list))
model.to(device)

## Loss & Optimizer
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

# define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3)

## Training function
def train_model(training_loader, model, optimizer, accumulation_steps=1):  # Removed accumulation_steps
    losses = []
    correct_predictions = 0
    num_samples = 0
    total_batches = len(training_loader)


    model.train()

    for batch_idx, data in enumerate(training_loader):

        ids = data['input_ids'].to(device, dtype=torch.long, non_blocking=True)
        mask = data['attention_mask'].to(device, dtype=torch.long, non_blocking=True)
        token_type_ids = data['token_type_ids'].to(device, dtype=torch.long, non_blocking=True)
        targets = data['targets'].to(device, dtype=torch.float, non_blocking=True)

        outputs = model(ids, mask)
        loss = loss_fn(outputs, targets)
        losses.append(loss.item())

        # Training accuracy, apply sigmoid, round (apply threshold 0.5)
        outputs = torch.sigmoid(outputs).cpu().detach().numpy().round()
        targets = targets.cpu().detach().numpy()
        correct_predictions += np.sum(outputs == targets)
        num_samples += targets.size


        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)


        optimizer.step()
        optimizer.zero_grad()

        # Clear GPU cache
        torch.cuda.empty_cache()

    return model, float(correct_predictions) / num_samples, np.mean(losses)


## Evaluator Function
def eval_model(validation_loader, model, threshold=0.5, target_list=None):
    model.eval()
    final_targets = []
    final_outputs = []
    final_probs = []
    losses = []

    with torch.no_grad():
        for data in validation_loader:
            ids = data['input_ids'].to(device, dtype=torch.long, non_blocking=True)
            mask = data['attention_mask'].to(device, dtype=torch.long, non_blocking=True)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long, non_blocking=True)
            targets = data['targets'].to(device, dtype=torch.float, non_blocking=True)

            # Forward pass
            outputs = model(ids, mask)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            probs = torch.sigmoid(outputs).cpu().detach().numpy()
            targets = targets.cpu().detach().numpy()
            final_outputs.extend(probs >= threshold)
            final_probs.extend(probs)
            final_targets.extend(targets)

            torch.cuda.empty_cache()

    final_outputs = np.array(final_outputs) >= threshold
    final_probs = np.array(final_probs)
    final_targets = np.array(final_targets)

    acc = accuracy_score(final_targets, final_outputs)
    f1 = f1_score(final_targets, final_outputs, average='weighted')
    precision = precision_score(final_targets, final_outputs, average='weighted')
    recall = recall_score(final_targets, final_outputs, average='weighted')
    hamming = hamming_loss(final_targets, final_outputs)

    auc_roc = roc_auc_score(final_targets, final_probs, average='weighted', multi_class='ovr')
    aupr = average_precision_score(final_targets, final_probs, average='weighted')

    average_loss = np.mean(losses)

    print(f"Accuracy: {acc}")
    print(f"F1 Score: {f1}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"Hamming Loss: {hamming}")
    print(f"Average Loss: {average_loss}")
    print(f"AUC-ROC: {auc_roc}")
    print(f"AUPR: {aupr}")
    print("\nClassification Report:\n", classification_report(final_targets, final_outputs, target_names=target_list))

    return f1, average_loss


#Learning Rate Scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# Training & Evaluation Loop
start = time.time()

history = defaultdict(list)
best_f1 = 0.0

for epoch in range(1, EPOCHS+1):
    print(f'Epoch {epoch}/{EPOCHS}')
    model, train_acc, train_loss = train_model(train_data_loader, model, optimizer)
    val_f1, val_loss = eval_model(val_data_loader, model)

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_f1'].append(val_f1)
    history['val_loss'].append(val_loss)

    scheduler.step()  # Step scheduler after each epoch

    if val_f1 > best_f1:
        torch.save(model.state_dict(), "caves_inceptive_roberta_32.bin")
        best_f1 = val_f1

end = time.time()
print(f"Total training and evaluation time: {end - start} seconds")


## Testing
print("\n\nTesting\n\n")
model = RobertaInceptionAttentionImproved(num_classes=len(target_list))
model.load_state_dict(torch.load("caves_inceptive_roberta_32.bin"))
model = model.to(device)

start = time.time()
eval_model(test_data_loader, model)
end = time.time()
print(f"Total test-set evaluation time: {end - start} seconds")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.475177304964539
F1 Score: 0.5331222660569803
Precision: 0.640198840958886
Recall: 0.5034722222222222
Hamming Loss: 0.06825089803813209
Average Loss: 0.19746172764608938
AUC-ROC: 0.9007730065432009
AUPR: 0.6930911832368983

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        49
           1       0.00      0.00      0.00        20
           2       0.76      0.56      0.64       167
           3       1.00      0.05      0.09        44
           4       0.88      0.18      0.30        78
           5       0.70      0.43      0.53       127
           6       0.00      0.00      0.00        63
           7       0.00      0.00      0.00         6
           8       0.69      0.58      0.63       147
           9       0.82      0.87      0.84       379
          10       0.00      0.00      0.00        72

   micro avg       0.77      0.50      0.61      1152
   macro avg       0.44      0.24    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.524822695035461
F1 Score: 0.6285001812471076
Precision: 0.7944580241922402
Recall: 0.5633680555555556
Hamming Loss: 0.06014552823063461
Average Loss: 0.1713762095858974
AUC-ROC: 0.9274457821096695
AUPR: 0.7614577418405097

Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.29      0.40        49
           1       0.00      0.00      0.00        20
           2       0.78      0.56      0.65       167
           3       0.89      0.39      0.54        44
           4       0.80      0.56      0.66        78
           5       0.83      0.38      0.52       127
           6       0.75      0.19      0.30        63
           7       0.00      0.00      0.00         6
           8       0.89      0.53      0.66       147
           9       0.81      0.87      0.84       379
          10       0.85      0.15      0.26        72

   micro avg       0.81      0.56      0.67      1152
   macro avg       0.66      0.36    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.5724417426545086
F1 Score: 0.6880146974282612
Precision: 0.7944686570302326
Recall: 0.6293402777777778
Hamming Loss: 0.05535599152620429
Average Loss: 0.15650369875854062
AUC-ROC: 0.9312593505802713
AUPR: 0.7865690915274147

Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.33      0.46        49
           1       0.69      0.45      0.55        20
           2       0.79      0.59      0.67       167
           3       0.86      0.57      0.68        44
           4       0.90      0.55      0.68        78
           5       0.78      0.50      0.61       127
           6       0.73      0.30      0.43        63
           7       0.00      0.00      0.00         6
           8       0.83      0.65      0.73       147
           9       0.82      0.88      0.85       379
          10       0.65      0.31      0.42        72

   micro avg       0.81      0.63      0.71      1152
   macro avg       0.71      0.47  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.5805471124620061
F1 Score: 0.7083542538812542
Precision: 0.7644673155694172
Recall: 0.6762152777777778
Hamming Loss: 0.055263885051119094
Average Loss: 0.15276238754872354
AUC-ROC: 0.9343471657492929
AUPR: 0.7940238983458151

Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.41      0.53        49
           1       0.69      0.45      0.55        20
           2       0.78      0.66      0.72       167
           3       0.78      0.66      0.72        44
           4       0.82      0.69      0.75        78
           5       0.76      0.52      0.62       127
           6       0.64      0.37      0.46        63
           7       0.00      0.00      0.00         6
           8       0.68      0.82      0.74       147
           9       0.85      0.85      0.85       379
          10       0.63      0.33      0.44        72

   micro avg       0.77      0.68      0.72      1152
   macro avg       0.67      0.52 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.5927051671732523
F1 Score: 0.7202107512737532
Precision: 0.7687194811256248
Recall: 0.6883680555555556
Hamming Loss: 0.05434282030026711
Average Loss: 0.1524448399582217
AUC-ROC: 0.9329946364806881
AUPR: 0.7903235618410585

Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.39      0.51        49
           1       0.75      0.45      0.56        20
           2       0.69      0.71      0.70       167
           3       0.93      0.64      0.76        44
           4       0.81      0.73      0.77        78
           5       0.72      0.64      0.68       127
           6       0.52      0.48      0.50        63
           7       0.00      0.00      0.00         6
           8       0.84      0.67      0.75       147
           9       0.84      0.86      0.85       379
          10       0.64      0.38      0.47        72

   micro avg       0.77      0.69      0.73      1152
   macro avg       0.68      0.54   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.6028368794326241
F1 Score: 0.7208980693088134
Precision: 0.7672405748079547
Recall: 0.6909722222222222
Hamming Loss: 0.053421755549415126
Average Loss: 0.1524993520109884
AUC-ROC: 0.9332276375713546
AUPR: 0.7930029336375045

Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.43      0.54        49
           1       0.75      0.45      0.56        20
           2       0.73      0.69      0.71       167
           3       0.85      0.64      0.73        44
           4       0.80      0.77      0.78        78
           5       0.74      0.55      0.63       127
           6       0.60      0.41      0.49        63
           7       0.00      0.00      0.00         6
           8       0.79      0.68      0.73       147
           9       0.83      0.89      0.86       379
          10       0.66      0.40      0.50        72

   micro avg       0.78      0.69      0.73      1152
   macro avg       0.68      0.54  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 8/12
Accuracy: 0.6028368794326241
F1 Score: 0.7281977123080826
Precision: 0.763205704116197
Recall: 0.7048611111111112
Hamming Loss: 0.05360596849958552
Average Loss: 0.1527594058744369
AUC-ROC: 0.9323344887299412
AUPR: 0.7905663089062215

Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.49      0.56        49
           1       0.71      0.50      0.59        20
           2       0.72      0.70      0.71       167
           3       0.88      0.66      0.75        44
           4       0.82      0.76      0.79        78
           5       0.70      0.62      0.66       127
           6       0.60      0.41      0.49        63
           7       1.00      0.33      0.50         6
           8       0.78      0.74      0.76       147
           9       0.84      0.87      0.85       379
          10       0.65      0.39      0.49        72

   micro avg       0.77      0.70      0.74      1152
   macro avg       0.76   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.5927051671732523
F1 Score: 0.7302574133493434
Precision: 0.7548069365521798
Recall: 0.7126736111111112
Hamming Loss: 0.05425071382518191
Average Loss: 0.1550813466310501
AUC-ROC: 0.9319711109535649
AUPR: 0.790024289352976

Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.49      0.56        49
           1       0.69      0.55      0.61        20
           2       0.74      0.70      0.72       167
           3       0.85      0.66      0.74        44
           4       0.78      0.78      0.78        78
           5       0.67      0.65      0.66       127
           6       0.55      0.41      0.47        63
           7       1.00      0.33      0.50         6
           8       0.75      0.79      0.77       147
           9       0.85      0.85      0.85       379
          10       0.58      0.42      0.48        72

   micro avg       0.76      0.71      0.74      1152
   macro avg       0.74      0.60    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 10/12
Accuracy: 0.5987841945288754
F1 Score: 0.7338025681090367
Precision: 0.7599607551703582
Recall: 0.7152777777777778
Hamming Loss: 0.053421755549415126
Average Loss: 0.15485690870592672
AUC-ROC: 0.9318338154206255
AUPR: 0.7906936577787377

Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.49      0.56        49
           1       0.73      0.55      0.63        20
           2       0.73      0.71      0.72       167
           3       0.85      0.66      0.74        44
           4       0.80      0.78      0.79        78
           5       0.69      0.64      0.66       127
           6       0.60      0.41      0.49        63
           7       1.00      0.33      0.50         6
           8       0.75      0.78      0.76       147
           9       0.85      0.85      0.85       379
          10       0.57      0.44      0.50        72

   micro avg       0.77      0.72      0.74      1152
   macro avg       0.7

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 11/12
Accuracy: 0.5987841945288754
F1 Score: 0.7338025681090367
Precision: 0.7599607551703582
Recall: 0.7152777777777778
Hamming Loss: 0.053421755549415126
Average Loss: 0.15485690870592672
AUC-ROC: 0.9318338154206255
AUPR: 0.7906936577787377

Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.49      0.56        49
           1       0.73      0.55      0.63        20
           2       0.73      0.71      0.72       167
           3       0.85      0.66      0.74        44
           4       0.80      0.78      0.79        78
           5       0.69      0.64      0.66       127
           6       0.60      0.41      0.49        63
           7       1.00      0.33      0.50         6
           8       0.75      0.78      0.76       147
           9       0.85      0.85      0.85       379
          10       0.57      0.44      0.50        72

   micro avg       0.77      0.72      0.74      1152
   macro avg       0.7

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.6008105369807497
F1 Score: 0.7326602296939406
Precision: 0.7574737402404721
Recall: 0.7161458333333334
Hamming Loss: 0.05360596849958552
Average Loss: 0.15461171250189504
AUC-ROC: 0.9317118644410513
AUPR: 0.7898879762346371

Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.49      0.56        49
           1       0.73      0.55      0.63        20
           2       0.72      0.73      0.73       167
           3       0.85      0.66      0.74        44
           4       0.80      0.77      0.78        78
           5       0.69      0.61      0.65       127
           6       0.62      0.41      0.50        63
           7       1.00      0.33      0.50         6
           8       0.75      0.78      0.76       147
           9       0.85      0.86      0.86       379
          10       0.57      0.43      0.49        72

   micro avg       0.76      0.72      0.74      1152
   macro avg       0.75      0.60  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load("caves_RIncDNet32_32_best.bin"))


Accuracy: 0.5832068791097622
F1 Score: 0.7255855918366548
Precision: 0.7564826076915825
Recall: 0.7038961038961039
Hamming Loss: 0.055226008185037014
Average Loss: 0.1593161721623713
AUC-ROC: 0.9288566061146707
AUPR: 0.7739985877020208

Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.43      0.52        97
           1       0.68      0.53      0.59        40
           2       0.71      0.74      0.73       334
           3       0.65      0.61      0.63        87
           4       0.78      0.69      0.73       157
           5       0.72      0.63      0.67       255
           6       0.66      0.60      0.63       125
           7       1.00      0.15      0.27        13
           8       0.74      0.76      0.75       295
           9       0.85      0.82      0.84       762
          10       0.67      0.47      0.55       145

   micro avg       0.76      0.70      0.73      2310
   macro avg       0.74      0.58  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
