In [1]:
import gc
import torch
import numpy as np
from transformers import AutoTokenizer, DataCollatorWithPadding, DataCollatorForLanguageModeling
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset, load_metric
import transformers
import os

gc.collect()
torch.cuda.empty_cache()

metric_collector = []


# List of glue tasks
#GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2",  "wnli"]
#GLUE_TASKS = ["qqp"]

In [2]:

for task in GLUE_TASKS:
    
    #List of glue keys
    task_to_keys = {
        "cola": ("sentence", None),
        "mnli": ("premise", "hypothesis"),
        "mnli-mm": ("premise", "hypothesis"),
        "mrpc": ("sentence1", "sentence2"),
        "qnli": ("question", "sentence"),
        "qqp": ("question1", "question2"),
        "rte": ("sentence1", "sentence2"),
        "sst2": ("sentence", None),
        "stsb": ("sentence1", "sentence2"),
        "wnli": ("sentence1", "sentence2"),
    }
    
    #Select task
    #task = "rte"  #cola, mrpc
    batch_size = 10 #10 normally, 8 for qnli
    
    # Load dataset based on task variable
    actual_task = "mnli" if task == "mnli-mm" else task
    dataset = load_dataset("glue", actual_task)
    metric = load_metric('glue', actual_task)
    
    #Collect sentence keys and labels
    sentence1_key, sentence2_key = task_to_keys[task]
    
    # Number of logits to output
    num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2
    
    

    ###############################################
    
    #         DEBERTA SECTION
    
    ###############################################
    
    
    ###  Tokenizing Section  ####
    
    #Load model
    model_checkpoint = "microsoft/deberta-v3-small"
    
    # Create tokenizer for respective model
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True, truncation=True, model_max_length=512)
    
    def tokenizer_func(examples):
        if sentence2_key is None:
            return tokenizer(examples[sentence1_key], truncation=True,)
        return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True,)
    
    # tokenize sentence(s)
    encoded_dataset = dataset.map(tokenizer_func, batched=True)
    
    #model_checkpoint = "deberta-v3-small_baseline_cola/"
    model_checkpoint = "deberta-v3-small_baseline_"+actual_task+"/"
    
    ###  Model Section  ####
    
    # Create model and attach ForSequenceClassification head
    model_deberta = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
    
    # Type of metric for given task
    metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
    
    args = TrainingArguments(
        f"{model_checkpoint}-finetuned-Testing-{task}",
        evaluation_strategy = "epoch",
        per_device_eval_batch_size=batch_size,
        weight_decay=0.01,
        metric_for_best_model=metric_name,
        eval_accumulation_steps=5
    )
    
    def compute_metrics(eval_pred):
        predictions, labels = eval_pred
        if task != "stsb":
            predictions = np.argmax(predictions, axis=1)
        else:
            predictions = predictions[:, 0]
        return metric.compute(predictions=predictions, references=labels)
    
    validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
    trainer = Trainer(
        model_deberta,
        args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset[validation_key],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )
    
    trainer.evaluate()
    

    
    ### Collect Predictions  ###
    
    prediction_deberta = trainer.predict(encoded_dataset[validation_key])
    

    
    ## Clear the Cache
    gc.collect()
    torch.cuda.empty_cache()
    
    
    ###############################################
    
    #         ELECTRA SECTION
    
    ###############################################
    
    
    ###  Tokenizing Section  ####
    
    #Load model
    model_checkpoint = "google/electra-small-discriminator"
    
    # Create tokenizer for respective model
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True, truncation=True, model_max_length=512)
    
    def tokenizer_func(examples):
        if sentence2_key is None:
            return tokenizer(examples[sentence1_key], truncation=True,)
        return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True,)
    
    # tokenize sentence(s)
    encoded_dataset = dataset.map(tokenizer_func, batched=True)
    
    #model_checkpoint = "electra-small-discriminator-finetuned-cola/"
    #model_checkpoint = "Electra_fintuned_cola/"
    model_checkpoint = "Electra_fintuned_"+actual_task+"/"
    
    ###  Model Section  ####
    
    # Create model and attach ForSequenceClassification head
    model_electra = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
    
    # Type of metric for given task
    metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
    
    
    args = TrainingArguments(
        f"{model_checkpoint}-finetuned-Testing-{task}",
        evaluation_strategy = "epoch",
        per_device_eval_batch_size=batch_size,
        weight_decay=0.01,
        metric_for_best_model=metric_name,
        eval_accumulation_steps=5
    )
    
    def compute_metrics(eval_pred):
        predictions, labels = eval_pred
        if task != "stsb":
            predictions = np.argmax(predictions, axis=1)
        else:
            predictions = predictions[:, 0]
        return metric.compute(predictions=predictions, references=labels)
    
    validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
    trainer = Trainer(
        model_electra,
        args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset[validation_key],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )
    
    trainer.evaluate()
    
    

    
    ### Collect Predictions  ###
    ## Clear the Cache
    gc.collect()
    torch.cuda.empty_cache()
    prediction_electra = trainer.predict(encoded_dataset[validation_key])
    

    
    
    ## Clear the Cache
    gc.collect()
    torch.cuda.empty_cache()
    


    ###############################################
    
    #         XLNET SECTION
    
    ###############################################
    
    
    ###  Tokenizing Section  ####
    
    #Load model
    model_checkpoint = "xlnet-base-cased"
    
    # Create tokenizer for respective model
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True, truncation=True, model_max_length=512)
    
    def tokenizer_func(examples):
        if sentence2_key is None:
            return tokenizer(examples[sentence1_key], truncation=True,)
        return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True,)
    
    # tokenize sentence(s)
    encoded_dataset = dataset.map(tokenizer_func, batched=True)
    
    #model_checkpoint = "electra-small-discriminator-finetuned-cola/"
    #model_checkpoint = "Electra_fintuned_cola/"
    model_checkpoint = "xlnet-base-cased_baseline_"+actual_task+"/"
    
    ###  Model Section  ####
    
    # Create model and attach ForSequenceClassification head
    model_xlnet = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
    
    # Type of metric for given task
    metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
    
    
    args = TrainingArguments(
        f"{model_checkpoint}-finetuned-Testing-{task}",
        evaluation_strategy = "epoch",
        per_device_eval_batch_size=batch_size,
        weight_decay=0.01,
        metric_for_best_model=metric_name,
        eval_accumulation_steps=5
    )
    
    def compute_metrics(eval_pred):
        predictions, labels = eval_pred
        if task != "stsb":
            predictions = np.argmax(predictions, axis=1)
        else:
            predictions = predictions[:, 0]
        return metric.compute(predictions=predictions, references=labels)
    
    validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
    trainer = Trainer(
        model_xlnet,
        args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset[validation_key],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )
    
    trainer.evaluate()
    

    
    ### Collect Predictions  ###
    ## Clear the Cache
    gc.collect()
    torch.cuda.empty_cache()
    prediction_xlnet = trainer.predict(encoded_dataset[validation_key])
    

    
    ## Clear the Cache
    gc.collect()
    torch.cuda.empty_cache()
    

    
    

    ###############################################
    
    # Combine Model Predicions to create Input Features
    
    ###############################################
    
    
    import pandas as pd
    
    #Labels
    val_labels = prediction_deberta.label_ids
    
    
    #DeBERTa
    df_deberta = pd.DataFrame(prediction_deberta[0])
    df_deberta=df_deberta.rename(columns=dict(zip(df_deberta.columns,['deberta_'+str(col) for col in df_deberta.columns])))
    print(df_deberta.head(),'\n')
    
    
    #Electra
    df_electra = pd.DataFrame(prediction_electra[0])
    df_electra=df_electra.rename(columns=dict(zip(df_electra.columns,['electra_'+str(col) for col in df_electra.columns])))
    print(df_electra.head(),'\n')
    
    
    #XLNet
    df_xlnet = pd.DataFrame(prediction_xlnet[0])
    df_xlnet=df_xlnet.rename(columns=dict(zip(df_xlnet.columns,['xlnet_'+str(col) for col in df_xlnet.columns])))
    print(df_xlnet.head(),'\n')
    
    

    
    
    #Combine the dataframes
    df_combine = pd.concat([df_deberta, df_electra, df_xlnet], axis=1)
    df_combine.head()
    
    
    ###############################################
    
    #         ENSEMBLE SECTION
    
    ###############################################
    
    
    # Importing the required packages
    
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import f1_score
    from sklearn.metrics import matthews_corrcoef
    from sklearn.metrics import classification_report
    from sklearn.metrics import roc_auc_score
    

    
    # Split the dataset into train and test
    
    X_train, X_test, y_train, y_test = train_test_split(df_combine, val_labels, test_size=0.3, random_state=100)
    X_train.head()
    
    
    # Perform training with random forest with all columns
    # Initialize random forest classifier
    clf = RandomForestClassifier(n_estimators=100)
    
    # Perform training
    clf.fit(X_train, y_train)
    
    # Predicton on test using all features
    y_pred = clf.predict(X_test)
    y_pred_score = clf.predict_proba(X_test)
    

    
    # Print basic Report, then specify for the model
    
    print("\n")
    print("Results Using All Features: \n")
    
    print("Classification Report: ")
    print(classification_report(y_test,y_pred))
    print("\n")
    
    if metric_name == 'accuracy':
        ensemble_score = accuracy_score(y_test, y_pred)
        try:
            ensemble_f = f1_score(y_test, y_pred)
        except:
            pass
    
    elif metric_name == 'matthews_correlation':
        ensemble_score = matthews_corrcoef(y_test, y_pred)
        ensemble_f = 999
    
    elif metric_name == "pearson":
        ensemble_score = accuracy_score(y_test, y_pred)
        ensemble_f = 999
    else:
        ensemble_score = 999
        ensemble_f = 999
        print('ERROR')
    
    try:
        deberta_f = prediction_deberta.metrics['test_f1'], 
        electra_f = prediction_electra.metrics['test_f1'], 
        xlnet_f = prediction_xlnet.metrics['test_f1']
    except:
        ensemble_f = deberta_f = electra_f = xlnet_f = 999
    
    print("Accuracy : ", ensemble_score * 100, '\nFscore : ', ensemble_f)
        
    print('-------------------')
    print("DeBERTa : ", prediction_deberta.metrics['test_'+metric_name]*100)
    print("Electra : ", prediction_electra.metrics['test_'+metric_name]*100)
    print("XLNet : ", prediction_xlnet.metrics['test_'+metric_name]*100)
    
    
    metric_collector.append([task,
                             ensemble_score,
                             prediction_deberta.metrics['test_'+metric_name], 
                             prediction_electra.metrics['test_'+metric_name], 
                             prediction_xlnet.metrics['test_'+metric_name],
                             ensemble_f, deberta_f, electra_f, xlnet_f])
    

Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

The following columns in the evaluation set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence, idx.
***** Running Evaluation *****
  Num examples = 1043
  Batch size = 10


The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence, idx.
***** Running Prediction *****
  Num examples = 1043
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_ty

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_cola/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "vocab_size": 30522
}

loading weights file Electra_fintuned_cola/pytorch_model.bin
All model checkpoint weights were used when i

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: sentence, idx.
***** Running Prediction *****
  Num examples = 1043
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_cola/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 250
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "untie_r": true,
  "

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: sentence, idx.
***** Running Prediction *****
  Num examples = 1043
  Batch size = 10


   deberta_0  deberta_1
0  -1.497016   1.722220
1  -1.920744   2.116446
2  -2.035378   2.234880
3  -1.882711   2.098204
4  -0.732498   0.953038 

   electra_0  electra_1
0  -3.543517   3.213759
1  -3.588737   3.270477
2  -1.802421   1.629006
3  -3.582784   3.244232
4  -3.103451   2.799271 

    xlnet_0   xlnet_1
0 -2.494728  2.634280
1 -2.420513  2.597448
2 -0.492801  0.315644
3 -2.395937  2.596020
4 -1.864613  1.893518 



Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)




Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.80      0.73      0.76       102
           1       0.87      0.91      0.89       211

    accuracy                           0.85       313
   macro avg       0.84      0.82      0.83       313
weighted avg       0.85      0.85      0.85       313



Accuracy :  65.86501823116294 
Fscore :  999
-------------------
DeBERTa :  62.08288813242873
Electra :  60.702868360014484
XLNet :  39.99158683209528


loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.

  0%|          | 0/393 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

loading configuration file deberta-v3-small_baseline_mnli/config.json
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "sh

The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: premise, hypothesis, idx.
***** Running Prediction *****
  Num examples = 9815
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_e

  0%|          | 0/393 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_mnli/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
 

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: premise, hypothesis, idx.
***** Running Prediction *****
  Num examples = 9815
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": 

  0%|          | 0/393 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_mnli/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-genera

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: premise, hypothesis, idx.
***** Running Prediction *****
  Num examples = 9815
  Batch size = 10


   deberta_0  deberta_1  deberta_2
0  -1.370998   2.892838  -2.043836
1  -3.777659  -1.623189   4.587751
2   0.896223   2.040462  -3.222241
3  -3.378071  -0.047155   2.729046
4  -3.616945  -2.134711   4.897952 

   electra_0  electra_1  electra_2
0  -0.894975   2.626698  -2.080640
1  -2.909977  -1.566361   4.227887
2   2.903684   0.026978  -2.943892
3  -2.990260  -0.573303   3.617604
4  -2.842806  -1.581113   4.140175 

    xlnet_0   xlnet_1   xlnet_2
0 -1.391367  5.151418 -2.664597
1 -3.558790 -2.372521  5.860384
2  4.415154 -0.168978 -3.215842
3 -3.986264 -1.329955  5.289119
4 -3.739260 -2.241869  5.892507 



Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.90      0.90      0.90      1057
           1       0.83      0.83      0.83       932
           2       0.88      0.89      0.89       956

    accuracy                           0.88      2945
   macro avg       0.87      0.87      0.87      2945


Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],


The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: premise, hypothesis, idx.
***** Running Prediction *****
  Num examples = 9832
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_e

  0%|          | 0/10 [00:00<?, ?ba/s]

Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-c8df7054506c5a62.arrow
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-f25d97191fdeaab2.arrow
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-1b3b6420e81c0a62.arrow
loading configuration file Electra_fintuned_mnli/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "init

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: premise, hypothesis, idx.
***** Running Prediction *****
  Num examples = 9832
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": 

  0%|          | 0/10 [00:00<?, ?ba/s]

Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-fc4e39de2bdacca5.arrow
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-75b41e558d2b05ed.arrow
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-47d5bb9449eb30e8.arrow
loading configuration file xlnet-base-cased_baseline_mnli/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "id2label": {
    "0

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: premise, hypothesis, idx.
***** Running Prediction *****
  Num examples = 9832
  Batch size = 10


   deberta_0  deberta_1  deberta_2
0  -3.826644  -1.975078   5.015347
1  -2.485083  -0.341503   2.454723
2   4.179366  -0.982661  -2.747273
3  -5.013869   0.990257   3.142657
4   3.667701  -0.569059  -2.962291 

   electra_0  electra_1  electra_2
0  -2.765969  -1.752192   4.190103
1  -1.666653   0.228624   1.577240
2   3.336241  -1.528789  -2.166361
3  -3.198709   0.464110   2.847435
4   3.338363  -0.958126  -2.558912 

    xlnet_0   xlnet_1   xlnet_2
0 -3.535259 -2.464460  5.897135
1 -3.091162 -0.396457  2.150785
2  5.375926 -1.288295 -2.902307
3 -4.119745 -1.406669  5.515969
4  5.301913 -0.836325 -3.139710 



Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.89      0.88      0.89      1036
           1       0.82      0.84      0.83       957
           2       0.90      0.89      0.89       957

    accuracy                           0.87      2950
   macro avg       0.87      0.87      0.87      2950


Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],


  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file deberta-v3-small_baseline_mrpc/config.json
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file 

The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 408
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_e

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_mrpc/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "vocab_size": 30522
}

loading weights file Electra_fintuned_mrpc/pytorch_model.bin
All model checkpoint weights were used when i

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 408
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": 

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_mrpc/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 250
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "untie_r": true,
  "

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 408
  Batch size = 10


   deberta_0  deberta_1
0  -2.507830   2.017155
1   0.735865  -0.833833
2   0.466184  -0.602167
3  -1.100339   0.881516
4   0.654589  -0.730300 

   electra_0  electra_1
0  -2.092534   1.582743
1   1.030919  -0.818579
2   0.700650  -0.515424
3  -1.143724   0.870898
4   0.970264  -0.756922 

    xlnet_0   xlnet_1
0 -5.012310  4.562526
1  3.906815 -3.632550
2  3.536700 -3.519268
3  3.823317 -3.761026
4  4.811979 -4.464604 



Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.81      0.77      0.79        39
           1       0.90      0.92      0.91        84

    accuracy                           0.87       123
   macro avg       0.85      0.84      0.85       123
weighted avg       0.87      0.87      0.87       123



Accuracy :  86.99186991869918 
Fscore :  0.9058823529411765
-------------------
DeBERTa :  86.51960784313727
Electra :  

Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/qnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


88.23529411764706
XLNet :  89.2156862745098


loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.

  0%|          | 0/105 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

loading configuration file deberta-v3-small_baseline_qnli/config.json
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file 

The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence, question, idx.
***** Running Prediction *****
  Num examples = 5463
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_em

  0%|          | 0/105 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_qnli/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "vocab_size": 30522
}

loading weights file Electra_fintuned_qnli/pytorch_model.bin
All model checkpoint weights were used when i

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: sentence, question, idx.
***** Running Prediction *****
  Num examples = 5463
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "

  0%|          | 0/105 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_qnli/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 250
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "untie_r": true,
  "

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: sentence, question, idx.
***** Running Prediction *****
  Num examples = 5463
  Batch size = 10


   deberta_0  deberta_1
0   2.013455  -1.521519
1  -1.680719   1.497915
2  -0.411688   0.348003
3   2.644571  -2.006953
4   2.278837  -1.688128 

   electra_0  electra_1
0   1.271772  -1.121765
1  -0.850017   0.703188
2  -0.478245   0.348218
3   0.577753  -0.566421
4   2.409446  -2.009556 

    xlnet_0   xlnet_1
0  3.348554 -3.165122
1 -2.397635  2.915510
2 -2.471846  2.961958
3  3.403030 -3.221354
4  3.197044 -2.982866 



Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/qqp/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)




Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.91      0.92      0.92       812
           1       0.92      0.91      0.92       827

    accuracy                           0.92      1639
   macro avg       0.92      0.92      0.92      1639
weighted avg       0.92      0.92      0.92      1639



Accuracy :  91.70225747406955 
Fscore :  999
-------------------
DeBERTa :  91.54310818231741
Electra :  88.90719384953323
XLNet :  87.79059125022881


loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.

  0%|          | 0/364 [00:00<?, ?ba/s]

  0%|          | 0/41 [00:00<?, ?ba/s]

  0%|          | 0/391 [00:00<?, ?ba/s]

loading configuration file deberta-v3-small_baseline_qqp/config.json
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file d

The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: question2, idx, question1.
***** Running Prediction *****
  Num examples = 40430
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position

  0%|          | 0/364 [00:00<?, ?ba/s]

  0%|          | 0/41 [00:00<?, ?ba/s]

  0%|          | 0/391 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_qqp/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "vocab_size": 30522
}

loading weights file Electra_fintuned_qqp/pytorch_model.bin
All model checkpoint weights were used when ini

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: question2, idx, question1.
***** Running Prediction *****
  Num examples = 40430
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type"

  0%|          | 0/364 [00:00<?, ?ba/s]

  0%|          | 0/41 [00:00<?, ?ba/s]

  0%|          | 0/391 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_qqp/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 250
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "untie_r": true,
  "u

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: question2, idx, question1.
***** Running Prediction *****
  Num examples = 40430
  Batch size = 10


   deberta_0  deberta_1
0   2.120597  -2.798326
1   3.906451  -5.420879
2  -1.571658   1.492146
3   4.163735  -5.794355
4   1.802726  -2.324427 

   electra_0  electra_1
0   0.677404  -0.816953
1   4.643112  -4.630460
2  -1.484326   1.510216
3   4.674413  -4.648614
4   0.351680  -0.475998 

    xlnet_0   xlnet_1
0  3.299190 -3.181788
1  3.649760 -3.658085
2 -1.964923  2.473604
3  3.676419 -3.694395
4  2.621842 -2.427980 



Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.94      0.91      0.93      7712
           1       0.86      0.89      0.88      4417

    accuracy                           0.91     12129
   macro avg       0.90      0.90      0.90     12129
weighted avg       0.91      0.91      0.91     12129



Accuracy :  90.70821996867014 
Fscore :  0.8751799756340679
-------------------
DeBERTa :  90.63071976255256
Electra :  90.03215434083602
XLNet :  87.37818451644819


Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
 

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

loading configuration file deberta-v3-small_baseline_rte/config.json
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file d

The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 277
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_e

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_rte/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "vocab_size": 30522
}

loading weights file Electra_fintuned_rte/pytorch_model.bin
All model checkpoint weights were used when ini

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 277
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": 

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_rte/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 250
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "untie_r": true,
  "u

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 277
  Batch size = 10


   deberta_0  deberta_1
0  -0.022848   0.131482
1  -0.121065   0.181684
2   0.168376  -0.086241
3  -0.030430   0.109342
4   0.524834  -0.499871 

   electra_0  electra_1
0   2.027848  -1.854611
1  -2.115922   2.061073
2   2.001615  -1.870931
3  -1.133618   1.068260
4   2.014058  -1.909097 

    xlnet_0   xlnet_1
0 -0.469100  0.915669
1 -0.120146 -0.027909
2  1.072247 -0.768862
3 -1.309347  1.096496
4  0.597862 -0.290516 



Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.73      0.68      0.71        44
           1       0.67      0.72      0.70        40

    accuracy                           0.70        84
   macro avg       0.70      0.70      0.70        84
weighted avg       0.70      0.70      0.70        84



Accuracy :  70.23809523809523 
Fscore :  999
-------------------
DeBERTa :  66.78700361010831
Electra :  68.23104693140795
XLNet :  74.0072202166065


Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],


  0%|          | 0/68 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file deberta-v3-small_baseline_sst2/config.json
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file 

The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence, idx.
***** Running Prediction *****
  Num examples = 872
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_typ

  0%|          | 0/68 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_sst2/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "vocab_size": 30522
}

loading weights file Electra_fintuned_sst2/pytorch_model.bin
All model checkpoint weights were used when i

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: sentence, idx.
***** Running Prediction *****
  Num examples = 872
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "

  0%|          | 0/68 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_sst2/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 250
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "untie_r": true,
  "

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: sentence, idx.
***** Running Prediction *****
  Num examples = 872
  Batch size = 10


   deberta_0  deberta_1
0  -4.381270   4.325088
1   2.726245  -2.237343
2  -3.404875   3.525752
3  -3.892137   3.907598
4   3.713424  -3.151703 

   electra_0  electra_1
0  -3.949970   3.518444
1  -0.269825   0.341586
2  -2.391326   2.085906
3  -2.848605   2.513922
4   2.949082  -2.783178 

    xlnet_0   xlnet_1
0 -4.524834  4.417871
1  3.288627 -3.207935
2 -4.376652  4.318546
3 -4.310987  4.279883
4  3.446086 -3.255106 



Reusing dataset glue (/home/ubuntu/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)




Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.98      0.93      0.96       136
           1       0.93      0.98      0.96       126

    accuracy                           0.96       262
   macro avg       0.96      0.96      0.96       262
weighted avg       0.96      0.96      0.96       262



Accuracy :  95.80152671755725 
Fscore :  999
-------------------
DeBERTa :  93.46330275229357
Electra :  91.74311926605505
XLNet :  94.03669724770643


loading configuration file https://huggingface.co/microsoft/deberta-v3-small/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/8e0c12a7672d1d36f647c86e5fc3a911f189d8704e2bc94dde4a1ffe38f648fa.9df96bac06c2c492bc77ad040068f903c93beec14607428f25bf9081644ad0da
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file deberta-v3-small_baseline_wnli/config.json
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file 

The following columns in the test set  don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 71
  Batch size = 10
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_em

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file Electra_fintuned_wnli/config.json
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "type_vocab_size": 2,
  "vocab_size": 30522
}

loading weights file Electra_fintuned_wnli/pytorch_model.bin
All model checkpoint weights were used when i

The following columns in the test set  don't have a corresponding argument in `ElectraForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 71
  Batch size = 10
Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/xlnet-base-cased/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/06bdb0f5882dbb833618c81c3b4c996a0c79422fa2c95ffea3827f92fc2dba6b.da982e2e596ec73828dbae86525a1870e513bd63aae5a2dc773ccc840ac5c346
Model config XLNetConfig {
  "architectures": [
    "XLNetLMHeadModel"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file xlnet-base-cased_baseline_wnli/config.json
Model config XLNetConfig {
  "_name_or_path": "xlnet-base-cased",
  "architectures": [
    "XLNetForSequenceClassification"
  ],
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "problem_type": "single_label_classification",
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 250
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.9.2",
  "untie_r": true,
  "

The following columns in the test set  don't have a corresponding argument in `XLNetForSequenceClassification.forward` and have been ignored: sentence2, sentence1, idx.
***** Running Prediction *****
  Num examples = 71
  Batch size = 10


   deberta_0  deberta_1
0  -0.095248  -0.074567
1  -0.095816  -0.077622
2  -0.095825  -0.074736
3  -0.092846  -0.072272
4  -0.093128  -0.073961 

   electra_0  electra_1
0   0.063873   0.019810
1   0.070295   0.057328
2   0.062983   0.051700
3   0.066377   0.015463
4   0.070587   0.046793 

    xlnet_0   xlnet_1
0  0.353091 -0.020062
1  0.336589 -0.030646
2  0.345355 -0.035084
3  0.356083 -0.065204
4  0.369883 -0.018039 



Results Using All Features: 

Classification Report: 
              precision    recall  f1-score   support

           0       0.75      0.75      0.75        12
           1       0.70      0.70      0.70        10

    accuracy                           0.73        22
   macro avg       0.72      0.72      0.72        22
weighted avg       0.73      0.73      0.73        22



Accuracy :  72.72727272727273 
Fscore :  999
-------------------
DeBERTa :  43.66197183098591
Electra :  46.478873239436616
XLNet :  56.33802816901409


In [3]:
print('Done')

Done


In [8]:
ensemble_metrics = pd.DataFrame(metric_collector, columns = ['Task','Ensemble', 'DeBERTa', 'Electra', 'XLNet', 
                                                             'Ensemble_f','DeBERTa_f', 'Electra_f', 'XLNet_f', ])
ensemble_metrics.head(15)

Unnamed: 0,Task,Ensemble,DeBERTa,Electra,XLNet,Ensemble_f,DeBERTa_f,Electra_f,XLNet_f
0,cola,0.65865,0.620829,0.607029,0.399916,999.0,999,999,999.0
1,mnli,0.875042,0.87458,0.817422,0.856648,999.0,999,999,999.0
2,mnli-mm,0.871186,0.872356,0.821298,0.857811,999.0,999,999,999.0
3,mrpc,0.869919,0.865196,0.882353,0.892157,0.905882,"(0.9029982363315697,)","(0.9145907473309609,)",0.922535
4,qnli,0.917023,0.915431,0.889072,0.877906,999.0,999,999,999.0
5,qqp,0.907082,0.906307,0.900322,0.873782,0.87518,"(0.8749917497194905,)","(0.8659972068896721,)",0.833317
6,rte,0.702381,0.66787,0.68231,0.740072,999.0,999,999,999.0
7,sst2,0.958015,0.934633,0.917431,0.940367,999.0,999,999,999.0
8,wnli,0.727273,0.43662,0.464789,0.56338,999.0,999,999,999.0


In [31]:
#Remove tuples from the f-scores in some columns

for column in ['DeBERTa_f', 'Electra_f']:
    col = ensemble_metrics[column]

    for val in range(col.shape[0]):
        #Correct tuples
        if type(col[val]) == tuple:
            ensemble_metrics[column][val] = col[val][0]
            
ensemble_metrics.head(15)

Unnamed: 0,Task,Ensemble,DeBERTa,Electra,XLNet,Ensemble_f,DeBERTa_f,Electra_f,XLNet_f
0,cola,0.65865,0.620829,0.607029,0.399916,999.0,999.0,999.0,999.0
1,mnli,0.875042,0.87458,0.817422,0.856648,999.0,999.0,999.0,999.0
2,mnli-mm,0.871186,0.872356,0.821298,0.857811,999.0,999.0,999.0,999.0
3,mrpc,0.869919,0.865196,0.882353,0.892157,0.905882,0.902998,0.914591,0.922535
4,qnli,0.917023,0.915431,0.889072,0.877906,999.0,999.0,999.0,999.0
5,qqp,0.907082,0.906307,0.900322,0.873782,0.87518,0.874992,0.865997,0.833317
6,rte,0.702381,0.66787,0.68231,0.740072,999.0,999.0,999.0,999.0
7,sst2,0.958015,0.934633,0.917431,0.940367,999.0,999.0,999.0,999.0
8,wnli,0.727273,0.43662,0.464789,0.56338,999.0,999.0,999.0,999.0


In [32]:
ensemble_metrics.to_csv('Ensemble_save.csv')
ensemble_metrics.to_excel('Ensemble_save.xlsx')