In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dataset-for-k-fold/aos2.csv


In [3]:
!pip install --upgrade torch==2.0.1
!pip install --upgrade transformers==4.30.2
!pip install --upgrade datasets==2.12.0
!pip install --upgrade peft==0.3.0
!pip install --upgrade scikit-learn




In [5]:
import os
import gc
import json
import pickle
import random
import numpy as np
import pandas as pd
import torch
from datasets import Dataset
from sklearn.model_selection import KFold
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    pipeline
)
from peft import LoraConfig, get_peft_model
from sklearn.metrics import hamming_loss

# -----------------------------
# 1. Define functions
# -----------------------------

# Preprocessing: build prompt and tokenize
def preprocess_function(example):
    target = (
        f"Aspect detected: {example['span']} ## "
        f"Opinion detected: {example['opinion']} ## "
        f"Sentiment detected: {example['sentiment']}"
    )
    input_text = f"### Human: {example['text']} ### Assistant: {target}"
    tokenized = tokenizer(
        input_text,
        truncation=True,
        max_length=256,  # Adjust if necessary
        padding="max_length"
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Data collator (if needed)
def custom_data_collator(features):
    batch = {}
    for key in features[0].keys():
        collated = []
        for f in features:
            value = f[key]
            if not torch.is_tensor(value):
                value = torch.tensor(value)
            if value.ndim == 0:
                value = value.unsqueeze(0)
            if value.ndim == 1:
                value = value.unsqueeze(0)
            collated.append(value)
        batch[key] = torch.cat(collated, dim=0)
    return batch

# Inference pipeline function
def process_prompt(user_prompt, model):
    text_input = f"### Human: {user_prompt} ###"
    gen_pipe = pipeline(
        task="text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=int(len(tokenizer.encode(user_prompt)) * 3.5),
        device=0
    )
    return gen_pipe(text_input)

# Functions to extract A-O-S triples from actual and predicted text
def extract_aos_from_actual(row):
    """Convert actual row into a set of A-O-S triples."""
    aspects = [x.strip() for x in str(row['span']).split(",") if x.strip()]
    opinions = [x.strip() for x in str(row['opinion']).split(",") if x.strip()]
    sentiments = [x.strip() for x in str(row['sentiment']).split(",") if x.strip()]
    aos_set = set(sorted(zip(aspects, opinions, sentiments)))
    return aos_set

def extract_aos_from_pred(pred_str):
    """Extract A-O-S triples from the predicted output."""
    import re
    a_match = re.search(r"aspect detected:\s*(.*?)\s*##", pred_str, re.IGNORECASE)
    o_match = re.search(r"opinion detected:\s*(.*?)\s*##", pred_str, re.IGNORECASE)
    s_match = re.search(r"sentiment detected:\s*(.*)", pred_str, re.IGNORECASE)
    if a_match and o_match and s_match:
        aspects = [x.strip() for x in a_match.group(1).split(",") if x.strip()]
        opinions = [x.strip() for x in o_match.group(1).split(",") if x.strip()]
        sentiments = [x.strip() for x in s_match.group(1).split(",") if x.strip()]
        aos_set = set(sorted(zip(aspects, opinions, sentiments)))
        return aos_set
    else:
        return set()

# Compute evaluation metrics from predicted and actual A-O-S triples
def compute_metrics(actual_list, pred_list):
    # Build global universe of triples
    global_triples = sorted(set().union(*actual_list).union(*pred_list))
    triple_to_idx = {triple: i for i, triple in enumerate(global_triples)}
    
    def aos_to_vector(aos_set):
        vec = [0] * len(global_triples)
        for triple in aos_set:
            if triple in triple_to_idx:
                vec[triple_to_idx[triple]] = 1
        return np.array(vec)
    
    actual_vectors = np.stack([aos_to_vector(a) for a in actual_list])
    pred_vectors = np.stack([aos_to_vector(p) for p in pred_list])
    
    TP = np.sum(np.logical_and(actual_vectors == 1, pred_vectors == 1))
    TN = np.sum(np.logical_and(actual_vectors == 0, pred_vectors == 0))
    FP = np.sum(np.logical_and(actual_vectors == 0, pred_vectors == 1))
    FN = np.sum(np.logical_and(actual_vectors == 1, pred_vectors == 0))
    
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    mcc_den = (TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)
    mcc = ((TP * TN) - (FP * FN)) / np.sqrt(mcc_den) if mcc_den > 0 else 0
    hamming = hamming_loss(actual_vectors, pred_vectors)
    fdr = FP / (FP + TP) if (FP + TP) > 0 else 0

    return {
        "TP": int(TP),
        "TN": int(TN),
        "FP": int(FP),
        "FN": int(FN),
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "mcc": mcc,
        "hamming_loss": hamming,
        "fdr": fdr
    }

# -----------------------------
# 2. Global Setup: Load CSV and initialize KFold
# -----------------------------

# Load the CSV file that contains your training/validation data.
# The CSV is expected to have columns: text, span, opinion, sentiment
data_path = "/kaggle/input/datasetprop/dataset.csv"  # Update as needed
df = pd.read_csv(data_path)
print("Dataset columns:", df.columns.tolist())

# Only consider the first 500 sentences (rows)
df = df.head(500)

# Apply case folding
df = df.applymap(lambda x: x.lower() if isinstance(x, str) else x)

# Prepare a new column for actual A-O-S triples (will be used for metric calculation)
df['aos'] = df.apply(extract_aos_from_actual, axis=1)

# Set up 5-fold cross validation (without shuffling so order is preserved)
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=False)

# List to accumulate fold results
all_fold_results = []

# -----------------------------
# 3. Cross Validation Loop
# -----------------------------

for fold, (train_index, val_index) in enumerate(kf.split(df)):
    print(f"\n=== Processing Fold {fold} ===")
    print("Validation indices:", val_index.tolist())

    # Create train and validation splits
    df_train = df.iloc[train_index].reset_index(drop=True)
    df_val = df.iloc[val_index].reset_index(drop=True)

    # Convert DataFrames to Hugging Face Datasets
    train_dataset = Dataset.from_pandas(df_train)
    val_dataset = Dataset.from_pandas(df_val)

    # Initialize tokenizer (global across folds)
    model_name = "Orkhan/llama-2-7b-absa"  # update as needed
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Preprocess datasets
    train_dataset = train_dataset.map(preprocess_function, batched=False, remove_columns=train_dataset.column_names)
    val_dataset = val_dataset.map(preprocess_function, batched=False, remove_columns=val_dataset.column_names)

    train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

    # -----------------------------
    # 4. Setup Model with LoRA for the Fold
    # -----------------------------
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        return_dict=True,
        torch_dtype=torch.float16
    )
    base_model.to("cuda:0")
    base_model.config.use_cache = False
    base_model.config.pretraining_tp = 1

    # Apply LoRA
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(base_model, lora_config)
    model.print_trainable_parameters()

    # -----------------------------
    # 5. Training Arguments and Trainer Setup
    # -----------------------------
    training_args = TrainingArguments(
        output_dir=f"results_fold_{fold}",  # Temporary directory (won't be kept)
        overwrite_output_dir=True,
        num_train_epochs=1,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=1,
        evaluation_strategy="epoch",
        logging_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-4,
        fp16=True,
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        data_collator=custom_data_collator
    )

    # -----------------------------
    # 6. Train the Model for this Fold
    # -----------------------------
    print(f"Starting training for fold {fold} ...")
    trainer.train()
    print(f"Training complete for fold {fold}.")

    # -----------------------------
    # 7. Run Inference on the Validation Set
    # -----------------------------
    predictions = []
    # We will also use the original text and actual A-O-S from df_val.
    for idx, row in df_val.iterrows():
        user_prompt = row["text"]
        gen_output = process_prompt(user_prompt, model)
        pred_text = gen_output[0]["generated_text"]
        predictions.append(pred_text)

    # Add predictions to df_val
    df_val["prediction"] = predictions
    df_val["aos_pred"] = df_val["prediction"].apply(extract_aos_from_pred)

    # Create lists of actual and predicted A-O-S for metric computation
    actual_list = df_val["aos"].tolist()
    pred_list = df_val["aos_pred"].tolist()

    metrics = compute_metrics(actual_list, pred_list)
    print(f"\n=== Evaluation Metrics for Fold {fold} ===")
    for key, value in metrics.items():
        print(f"{key}: {value}")

    # Optionally, warn if any predictions are empty
    empty_preds = df_val[df_val["aos_pred"].apply(lambda x: len(x) == 0)]
    if not empty_preds.empty:
        print("\n⚠️ Warning: Some predictions are empty!")
        print(empty_preds[["text", "aos", "aos_pred"]].head())

    # Save fold result in our list
    fold_details = {
        "fold": fold,
        "validation_indices": val_index.tolist(),
        "metrics": metrics
        # Optionally, you can add more details like predictions if needed.
    }
    all_fold_results.append(fold_details)

    # Clean up: delete model and free GPU memory
    del model, base_model, trainer
    torch.cuda.empty_cache()
    gc.collect()

# -----------------------------
# 8. Save All Fold Results in a Single Pickle File
# -----------------------------
results_file = "kfold_results.pkl"
with open(results_file, "wb") as f:
    pickle.dump(all_fold_results, f)
print(f"\nSaved all fold results to '{results_file}'.")


Dataset columns: ['id', 'text', 'span', 'opinion', 'sentiment']

=== Processing Fold 0 ===
Validation indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199

  df = df.applymap(lambda x: x.lower() if isinstance(x, str) else x)


tokenizer_config.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/434 [00:00<?, ?B/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

trainable params: 8388608 || all params: 6746804224 || trainable%: 0.12433454005023165
Starting training for fold 0




Training complete for fold 0.


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Evaluation metrics for fold 0:
TP: 1500
TN: 6133
FP: 243
FN: 227
precision: 86.0927
recall: 86.8024
f1: 86.3994
mcc: 83.3654
hamming_loss: 0.0579
fdr: 0.1395

=== Processing Fold 1 ===
Validation indices: [641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799,



Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 8388608 || all params: 6746804224 || trainable%: 0.12433454005023165
Starting training for fold 1


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Training complete for fold 1.


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Evaluation metrics for fold 1:
TP: 1600
TN: 6853
FP: 239
FN: 253
precision: 86.9987
recall: 86.2982
f1: 86.4518
mcc: 83.2084
hamming_loss: 0.0549
fdr: 0.1299

=== Processing Fold 2 ===
Validation indices: [1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 14



Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 8388608 || all params: 6746804224 || trainable%: 0.12433454005023165
Starting training for fold 2


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Training complete for fold 2.


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Evaluation metrics for fold 2:
TP: 1550
TN: 6540
FP: 236
FN: 247
precision: 86.7888
recall: 86.299
f1: 86.5387
mcc: 83.1828
hamming_loss: 0.0563
fdr: 0.1322

=== Processing Fold 3 ===
Validation indices: [1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047, 2048, 2049, 2050, 2051, 2052, 2053, 2054, 205



Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 8388608 || all params: 6746804224 || trainable%: 0.12433454005023165
Starting training for fold 3


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Training complete for fold 3.


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Evaluation metrics for fold 3:
TP: 1650
TN: 8460
FP: 260
FN: 245
precision: 86.3882
recall: 87.0184
f1: 86.4603
mcc: 83.9624
hamming_loss: 0.0476
fdr: 0.1361

=== Processing Fold 4 ===
Validation indices: [2564, 2565, 2566, 2567, 2568, 2569, 2570, 2571, 2572, 2573, 2574, 2575, 2576, 2577, 2578, 2579, 2580, 2581, 2582, 2583, 2584, 2585, 2586, 2587, 2588, 2589, 2590, 2591, 2592, 2593, 2594, 2595, 2596, 2597, 2598, 2599, 2600, 2601, 2602, 2603, 2604, 2605, 2606, 2607, 2608, 2609, 2610, 2611, 2612, 2613, 2614, 2615, 2616, 2617, 2618, 2619, 2620, 2621, 2622, 2623, 2624, 2625, 2626, 2627, 2628, 2629, 2630, 2631, 2632, 2633, 2634, 2635, 2636, 2637, 2638, 2639, 2640, 2641, 2642, 2643, 2644, 2645, 2646, 2647, 2648, 2649, 2650, 2651, 2652, 2653, 2654, 2655, 2656, 2657, 2658, 2659, 2660, 2661, 2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671, 2672, 2673, 2674, 2675, 2676, 2677, 2678, 2679, 2680, 2681, 2682, 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2690, 2691, 2692, 2693, 2694, 2695, 26



Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 8388608 || all params: 6746804224 || trainable%: 0.12433454005023165
Starting training for fold 4


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Training complete for fold 4.


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerN

Evaluation metrics for fold 4:
TP: 1700
TN: 6849
FP: 259
FN: 277
precision: 87.0141
recall: 86.0196
f1: 85.0325
mcc: 82.7436
hamming_loss: 0.059
fdr: 0.1356

Saved all fold results to 'kfold_results.pkl'.


In [6]:
import pickle
import pandas as pd

# Load the fold results from the pickle file
results_file = "kfold_results.pkl"
with open(results_file, "rb") as f:
    all_fold_results = pickle.load(f)

# Convert the list of results into a DataFrame
df_results = pd.DataFrame(all_fold_results)

# The 'metrics' column is a dictionary, so we normalize it
metrics_df = pd.json_normalize(df_results['metrics'])

# Combine the fold numbers with the metrics
df_metrics = pd.concat([df_results[['fold']], metrics_df], axis=1)

# Print the metrics table
print(df_metrics.to_string(index=False))


 fold   TP   TN  FP  FN  precision  recall      f1     mcc  hamming_loss    fdr
    0 1500 6133 243 227    86.0927 86.8024 86.3994 83.3654        0.0579 0.1395
    1 1600 6853 239 253    86.9987 86.2982 86.4518 83.2084        0.0549 0.1299
    2 1550 6540 236 247    86.7888 86.2990 86.5387 83.1828        0.0563 0.1322
    3 1650 8460 260 245    86.3882 87.0184 86.4603 83.9624        0.0476 0.1361
    4 1700 6849 259 277    87.0141 86.0196 85.0325 82.7436        0.0590 0.1356


In [8]:
import pickle
import pandas as pd

results_file = "/kaggle/input/metrics/kfold_results.pkl"

# Load the fold results
with open(results_file, "rb") as f:
    all_fold_results = pickle.load(f)

# Convert the list of results into a DataFrame
df_results = pd.DataFrame(all_fold_results)

# If each fold’s metrics are stored in a dictionary under "metrics", you can flatten them:
metrics_df = pd.json_normalize(df_results['metrics'])

# Combine the fold number with the metrics
df_metrics = pd.concat([df_results[['fold']], metrics_df], axis=1)

# Print the metrics table
print(df_metrics.to_string(index=False))


 fold   TP   TN  FP  FN  precision  recall      f1     mcc  hamming_loss    fdr
    0 1500 6133 243 227    86.0927 86.8024 86.3994 83.3654        0.0579 0.1395
    1 1600 6853 239 253    86.9987 86.2982 86.4518 83.2084        0.0549 0.1299
    2 1550 6540 236 247    86.7888 86.2990 86.5387 83.1828        0.0563 0.1322
    3 1650 8460 260 245    86.3882 87.0184 86.4603 83.9624        0.0476 0.1361
    4 1700 6849 259 277    87.0141 86.0196 85.0325 82.7436        0.0590 0.1356
