**Twitter Set Testing File**
7/08/2022

In [1]:
# dataloader
import pandas as pd

data_df = pd.read_csv('reduced_set')
data_df['label'] = data_df['label'].replace({2:1})
data_df = data_df[:20000]

In [2]:
# train test split
from sklearn.model_selection import train_test_split

# training = 0.7, validation = 0.1, test = 0.2
training_tweets, temp_tweets, training_labels, temp_labels = train_test_split(list(data_df['text']),list(data_df['label']), test_size=0.3, random_state=123)
val_tweets, test_tweets, val_labels, test_labels = train_test_split(temp_tweets, temp_labels, test_size=(1/3), random_state=123)

In [3]:
# create dict
train = {'text': training_tweets, 'label': training_labels}
val = {'text': val_tweets, 'label': val_labels}
test = {'text': test_tweets, 'label': test_labels}

In [4]:
# convert to dataset
from datasets import Dataset

train_dataset = Dataset.from_dict(train)
val_dataset = Dataset.from_dict(val)
test_dataset = Dataset.from_dict(test)

In [5]:
# tokenize function
def tokenize_function(dataset):
    model_inputs = tokenizer(dataset['text'], padding="max_length", truncation=True, max_length=100)
    return model_inputs

In [6]:
def tokenize_set():
    tok_train_dataset = train_dataset.map(tokenize_function, batched=True)
    tok_val_dataset = val_dataset.map(tokenize_function, batched=True)
    tok_test_dataset = test_dataset.map(tokenize_function, batched=True)
    return tok_train_dataset,tok_val_dataset,tok_test_dataset

In [7]:
# training args
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="transformer_checkpoints",
    num_train_epochs=5
)

In [8]:
# train model
from transformers import Trainer

def train_transformer(model,tok_train_dataset, tok_val_dataset):

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tok_train_dataset,
        eval_dataset=tok_val_dataset,
    )

    trainer.train()
    return model

In [9]:
# predict test set
import numpy as np
import torch

def predict_nn(trained_model, tok_test_dataset):

    output = trained_model(attention_mask=torch.tensor(tok_test_dataset["attention_mask"]).cuda(), input_ids=torch.tensor(tok_test_dataset["input_ids"]).cuda())

    pred_labs = np.argmax(output["logits"].cpu().detach().numpy(), axis=1)

    gold_labs = tok_test_dataset["label"]

    return gold_labs, pred_labs

In [10]:
def stats(model):
    return predict_nn(model, test_dataset)

In [11]:
# empty gpu
import gc

def empty_gpu():
    model = None
    gc.collect()
    torch.cuda.empty_cache()

In [12]:
from transformers import BertTokenizer,RobertaTokenizer, ElectraForSequenceClassification, AutoModelForSequenceClassification, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score

model_name = []
acc_scores = []
f1_scores = []

tokenizer = BertTokenizer.from_pretrained('prajjwal1/bert-tiny')
train, val, test = tokenize_set()
model = AutoModelForSequenceClassification.from_pretrained('prajjwal1/bert-tiny', num_labels=2)
for param in model.bert.parameters():
    param.requires_grad = False
trained_model = train_transformer(model, train, val)
gold, pred = predict_nn(trained_model, test)
model_name += ['prajjwal1/bert-tiny']
acc_scores += [accuracy_score(gold,pred)]
f1_scores+= [f1_score(gold,pred, average='macro')]
empty_gpu()

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

Step,Training Loss
500,0.697
1000,0.6964
1500,0.6941
2000,0.6912
2500,0.6904
3000,0.6922
3500,0.6928
4000,0.6909
4500,0.6925
5000,0.6897


Saving model checkpoint to transformer_checkpoints/checkpoint-500
Configuration saved in transformer_checkpoints/checkpoint-500/config.json
Model weights saved in transformer_checkpoints/checkpoint-500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1000
Configuration saved in transformer_checkpoints/checkpoint-1000/config.json
Model weights saved in transformer_checkpoints/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1500
Configuration saved in transformer_checkpoints/checkpoint-1500/config.json
Model weights saved in transformer_checkpoints/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2000
Configuration saved in transformer_checkpoints/checkpoint-2000/config.json
Model weights saved in transformer_checkpoints/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2500
Configuration saved in transformer_checkpoin

In [13]:
f1_scores

[0.5266549314450234]

In [14]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train, val, test = tokenize_set()
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
for param in model.bert.parameters():
    param.requires_grad = False
trained_model = train_transformer(model, train, val)
gold, pred = predict_nn(trained_model, test)
model_name += ['bert-base-uncased']
acc_scores += [accuracy_score(gold,pred)]
f1_scores+= [f1_score(gold,pred, average='macro')]
empty_gpu()

https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpo0a55_7i


Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
creating metadata file for /root/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp6iuzkkhv


Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79
creating metadata file for /root/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79
loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
loading file https://huggingface.co/bert-base-uncased/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/t

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-uncased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
creating metadata file for /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "h

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

https://huggingface.co/bert-base-uncased/resolve/main/pytorch_mo

Downloading pytorch_model.bin:   0%|          | 0.00/420M [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/a8041bf617d7f94ea26d15e218abd04afc2004805632abc0ed2066aa16d50d04.faf6ea826ae9c5867d12b22257f9877e6b8367890837bd60f7c54a29633f7f2f
creating metadata file for /root/.cache/huggingface/transformers/a8041bf617d7f94ea26d15e218abd04afc2004805632abc0ed2066aa16d50d04.faf6ea826ae9c5867d12b22257f9877e6b8367890837bd60f7c54a29633f7f2f
loading weights file https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/a8041bf617d7f94ea26d15e218abd04afc2004805632abc0ed2066aa16d50d04.faf6ea826ae9c5867d12b22257f9877e6b8367890837bd60f7c54a29633f7f2f
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.den

Step,Training Loss
500,0.6955
1000,0.6953
1500,0.6954
2000,0.693
2500,0.6946
3000,0.6939
3500,0.6916
4000,0.6891
4500,0.6935
5000,0.6916


Saving model checkpoint to transformer_checkpoints/checkpoint-500
Configuration saved in transformer_checkpoints/checkpoint-500/config.json
Model weights saved in transformer_checkpoints/checkpoint-500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1000
Configuration saved in transformer_checkpoints/checkpoint-1000/config.json
Model weights saved in transformer_checkpoints/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1500
Configuration saved in transformer_checkpoints/checkpoint-1500/config.json
Model weights saved in transformer_checkpoints/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2000
Configuration saved in transformer_checkpoints/checkpoint-2000/config.json
Model weights saved in transformer_checkpoints/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2500
Configuration saved in transformer_checkpoin

In [15]:

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
train, val, test = tokenize_set()
model = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=2)
for param in model.bert.parameters():
    param.requires_grad = False
trained_model = train_transformer(model, train, val)
gold, pred = predict_nn(trained_model, test)
model_name += ['bert-base-cased']
acc_scores += [accuracy_score(gold,pred)]
f1_scores+= [f1_score(gold,pred, average='macro')]
empty_gpu() 

https://huggingface.co/bert-base-cased/resolve/main/vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpog45drzf


Downloading vocab.txt:   0%|          | 0.00/208k [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-cased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/6508e60ab3c1200bffa26c95f4b58ac6b6d95fba4db1f195f632fa3cd7bc64cc.437aa611e89f6fc6675a049d2b5545390adbc617e7d655286421c191d2be2791
creating metadata file for /root/.cache/huggingface/transformers/6508e60ab3c1200bffa26c95f4b58ac6b6d95fba4db1f195f632fa3cd7bc64cc.437aa611e89f6fc6675a049d2b5545390adbc617e7d655286421c191d2be2791
https://huggingface.co/bert-base-cased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpdf6n06ou


Downloading tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-cased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/ec84e86ee39bfe112543192cf981deebf7e6cbe8c91b8f7f8f63c9be44366158.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f
creating metadata file for /root/.cache/huggingface/transformers/ec84e86ee39bfe112543192cf981deebf7e6cbe8c91b8f7f8f63c9be44366158.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f
loading file https://huggingface.co/bert-base-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/6508e60ab3c1200bffa26c95f4b58ac6b6d95fba4db1f195f632fa3cd7bc64cc.437aa611e89f6fc6675a049d2b5545390adbc617e7d655286421c191d2be2791
loading file https://huggingface.co/bert-base-cased/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/bert-base-cased/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/bert-base-cased/resolve/main/tokenizer_c

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-cased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/a803e0468a8fe090683bdc453f4fac622804f49de86d7cecaee92365d4a0f829.a64a22196690e0e82ead56f388a3ef3a50de93335926ccfa20610217db589307
creating metadata file for /root/.cache/huggingface/transformers/a803e0468a8fe090683bdc453f4fac622804f49de86d7cecaee92365d4a0f829.a64a22196690e0e82ead56f388a3ef3a50de93335926ccfa20610217db589307
loading configuration file https://huggingface.co/bert-base-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/a803e0468a8fe090683bdc453f4fac622804f49de86d7cecaee92365d4a0f829.a64a22196690e0e82ead56f388a3ef3a50de93335926ccfa20610217db589307
Model config BertConfig {
  "_name_or_path": "bert-base-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/bert-base-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/a803e0468a8fe090683bdc453f4fac622804f49de86d7cecaee92365d4a0f829.a64a22196690e0e82ead56f388a3ef3a50de93335926ccfa20610217db589307
Model config BertConfig {
  "_name_or_path": "bert-base-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 28996
}

https://huggingface.co/bert-base-cased/resolve/main/pytorch_model.bi

Downloading pytorch_model.bin:   0%|          | 0.00/416M [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-cased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/092cc582560fc3833e556b3f833695c26343cb54b7e88cd02d40821462a74999.1f48cab6c959fc6c360d22bea39d06959e90f5b002e77e836d2da45464875cda
creating metadata file for /root/.cache/huggingface/transformers/092cc582560fc3833e556b3f833695c26343cb54b7e88cd02d40821462a74999.1f48cab6c959fc6c360d22bea39d06959e90f5b002e77e836d2da45464875cda
loading weights file https://huggingface.co/bert-base-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/092cc582560fc3833e556b3f833695c26343cb54b7e88cd02d40821462a74999.1f48cab6c959fc6c360d22bea39d06959e90f5b002e77e836d2da45464875cda
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bia

Step,Training Loss
500,0.6991
1000,0.702
1500,0.7
2000,0.6961
2500,0.6966
3000,0.6948
3500,0.692
4000,0.6922
4500,0.695
5000,0.6938


Saving model checkpoint to transformer_checkpoints/checkpoint-500
Configuration saved in transformer_checkpoints/checkpoint-500/config.json
Model weights saved in transformer_checkpoints/checkpoint-500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1000
Configuration saved in transformer_checkpoints/checkpoint-1000/config.json
Model weights saved in transformer_checkpoints/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1500
Configuration saved in transformer_checkpoints/checkpoint-1500/config.json
Model weights saved in transformer_checkpoints/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2000
Configuration saved in transformer_checkpoints/checkpoint-2000/config.json
Model weights saved in transformer_checkpoints/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2500
Configuration saved in transformer_checkpoin

In [16]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
train, val, test = tokenize_set()
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
for param in model.roberta.parameters():
    param.requires_grad = False
trained_model = train_transformer(model, train, val)
gold, pred = predict_nn(trained_model, test)
model_name += ['roberta-base']
acc_scores += [accuracy_score(gold,pred)]
f1_scores+= [f1_score(gold,pred, average='macro')]
empty_gpu()

# tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
# train, val, test = tokenize_set()
# model = RobertaForSequenceClassification.from_pretrained('roberta-large', num_labels=3)
# for param in model.roberta.parameters():
#     param.requires_grad = False
# trained_model = train_transformer(model, train, val)
# gold, pred = predict_nn(trained_model, test)
# model_name += ['roberta-large']
# acc_scores += [accuracy_score(gold,pred)]
# f1_scores+= [f1_score(gold,pred, average='macro')]
# empty_gpu()
  


https://huggingface.co/roberta-base/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp2r_dicjv


Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

storing https://huggingface.co/roberta-base/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
creating metadata file for /root/.cache/huggingface/transformers/d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
https://huggingface.co/roberta-base/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp8fk2uzy5


Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

storing https://huggingface.co/roberta-base/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
creating metadata file for /root/.cache/huggingface/transformers/cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/roberta-base/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/roberta-base/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/roberta-bas

Downloading config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

storing https://huggingface.co/roberta-base/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
creating metadata file for /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "_name_or_path": "roberta-base",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hid

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin not found in cache or forc

Downloading pytorch_model.bin:   0%|          | 0.00/478M [00:00<?, ?B/s]

storing https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7
creating metadata file for /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7
loading weights file https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer

Step,Training Loss
500,0.6948
1000,0.6925
1500,0.6886
2000,0.6848
2500,0.687
3000,0.6879
3500,0.6831
4000,0.6816
4500,0.6859
5000,0.6823


Saving model checkpoint to transformer_checkpoints/checkpoint-500
Configuration saved in transformer_checkpoints/checkpoint-500/config.json
Model weights saved in transformer_checkpoints/checkpoint-500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1000
Configuration saved in transformer_checkpoints/checkpoint-1000/config.json
Model weights saved in transformer_checkpoints/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1500
Configuration saved in transformer_checkpoints/checkpoint-1500/config.json
Model weights saved in transformer_checkpoints/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2000
Configuration saved in transformer_checkpoints/checkpoint-2000/config.json
Model weights saved in transformer_checkpoints/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2500
Configuration saved in transformer_checkpoin

In [17]:
tokenizer = BertTokenizer.from_pretrained('google/electra-small-discriminator')
train, val, test = tokenize_set()
model = ElectraForSequenceClassification.from_pretrained('google/electra-small-discriminator', num_labels=2)
for param in model.electra.parameters():
    param.requires_grad = False
trained_model = train_transformer(model, train, val)
gold, pred = predict_nn(trained_model, test)
model_name += ['google/electra-small-discriminator']
acc_scores += [accuracy_score(gold,pred)]
f1_scores+= [f1_score(gold,pred, average='macro')]
empty_gpu()


https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp5o9lmpg4


Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/ece45ade3e01224cf31fed8e183b306d17b84e8abd415363474cfe72274f7814.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
creating metadata file for /root/.cache/huggingface/transformers/ece45ade3e01224cf31fed8e183b306d17b84e8abd415363474cfe72274f7814.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
https://huggingface.co/google/electra-small-discriminator/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpzzrnny5e


Downloading tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-small-discriminator/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/8b3aea9f7242b3d19268df5b1bfed8f66e08671a72ac0809ada08e5ef1adc592.19eda9a6da5fb0e52a45200c95876729561dde16a69b9116953af6edca1d1e92
creating metadata file for /root/.cache/huggingface/transformers/8b3aea9f7242b3d19268df5b1bfed8f66e08671a72ac0809ada08e5ef1adc592.19eda9a6da5fb0e52a45200c95876729561dde16a69b9116953af6edca1d1e92
loading file https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/ece45ade3e01224cf31fed8e183b306d17b84e8abd415363474cfe72274f7814.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
loading file https://huggingface.co/google/electra-small-discriminator/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/google/electra-small-discriminator/resolve/main/special_tokens_map.json from cache at None


Downloading config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
creating metadata file for /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "_name_or_path": "google/electra-small-discriminator",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 128,
  "hidde

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ca13c16218c6780ec76753d3afa19fcb7cc759e3f63ee87e441562d374762b3d.3dd1921e571dfa18c0bdaa17b9b38f111097812281989b1cb22263738e66ef73
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_version": "4.21.1",
  "type_vocab_size": 2,
  "use_cache":

Downloading pytorch_model.bin:   0%|          | 0.00/51.7M [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-small-discriminator/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/1ebdea26ed1a6268cdf5d1fe36450e89c70e306c97d39e62ede8a31f1c43f9ad.baa63624f08a59503441bce3d427225c61fe79bfa9f6d4c30cde7d072d863e0c
creating metadata file for /root/.cache/huggingface/transformers/1ebdea26ed1a6268cdf5d1fe36450e89c70e306c97d39e62ede8a31f1c43f9ad.baa63624f08a59503441bce3d427225c61fe79bfa9f6d4c30cde7d072d863e0c
loading weights file https://huggingface.co/google/electra-small-discriminator/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/1ebdea26ed1a6268cdf5d1fe36450e89c70e306c97d39e62ede8a31f1c43f9ad.baa63624f08a59503441bce3d427225c61fe79bfa9f6d4c30cde7d072d863e0c
Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', '

Step,Training Loss
500,0.6919
1000,0.6908
1500,0.6897
2000,0.6873
2500,0.6878
3000,0.6888
3500,0.685
4000,0.6841
4500,0.6881
5000,0.6856


Saving model checkpoint to transformer_checkpoints/checkpoint-500
Configuration saved in transformer_checkpoints/checkpoint-500/config.json
Model weights saved in transformer_checkpoints/checkpoint-500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1000
Configuration saved in transformer_checkpoints/checkpoint-1000/config.json
Model weights saved in transformer_checkpoints/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1500
Configuration saved in transformer_checkpoints/checkpoint-1500/config.json
Model weights saved in transformer_checkpoints/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2000
Configuration saved in transformer_checkpoints/checkpoint-2000/config.json
Model weights saved in transformer_checkpoints/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2500
Configuration saved in transformer_checkpoin

In [18]:
tokenizer = BertTokenizer.from_pretrained('google/electra-base-discriminator')
train, val, test = tokenize_set()
model = ElectraForSequenceClassification.from_pretrained('google/electra-base-discriminator', num_labels=2)
for param in model.electra.parameters():
    param.requires_grad = False
trained_model = train_transformer(model, train, val)
gold, pred = predict_nn(trained_model, test)
model_name += ['google/electra-base-discriminator']
acc_scores += [accuracy_score(gold,pred)]
f1_scores += [f1_score(gold,pred, average='macro')]
empty_gpu()

https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpnudbin4m


Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/fe616facc71d8e3afc69de3edac76bf1e4a0a741e80d9a99a2cc6a9a8f5f74b5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
creating metadata file for /root/.cache/huggingface/transformers/fe616facc71d8e3afc69de3edac76bf1e4a0a741e80d9a99a2cc6a9a8f5f74b5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
https://huggingface.co/google/electra-base-discriminator/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpp1m58qre


Downloading tokenizer_config.json:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-discriminator/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/6f8b3f5095b6f44f5c75cee3c56b971b3208b08132ba2f9fb775a4a7b7140942.4f2213f5603276adf12967b32e4444c0f187f34ca4f8b22a65f03e13514589e9
creating metadata file for /root/.cache/huggingface/transformers/6f8b3f5095b6f44f5c75cee3c56b971b3208b08132ba2f9fb775a4a7b7140942.4f2213f5603276adf12967b32e4444c0f187f34ca4f8b22a65f03e13514589e9
loading file https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/fe616facc71d8e3afc69de3edac76bf1e4a0a741e80d9a99a2cc6a9a8f5f74b5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
loading file https://huggingface.co/google/electra-base-discriminator/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/google/electra-base-discriminator/resolve/main/special_tokens_map.json from cache at None
load

Downloading config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-discriminator/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/7d1569a4df2372d67341bda716bce4e3edf3e3ffadb97251bc4b6b35d459f624.57c13443a51769ce892714c93bb3ee3952bad66d7d9662d9de382b808377c3f8
creating metadata file for /root/.cache/huggingface/transformers/7d1569a4df2372d67341bda716bce4e3edf3e3ffadb97251bc4b6b35d459f624.57c13443a51769ce892714c93bb3ee3952bad66d7d9662d9de382b808377c3f8
loading configuration file https://huggingface.co/google/electra-base-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7d1569a4df2372d67341bda716bce4e3edf3e3ffadb97251bc4b6b35d459f624.57c13443a51769ce892714c93bb3ee3952bad66d7d9662d9de382b808377c3f8
Model config ElectraConfig {
  "_name_or_path": "google/electra-base-discriminator",
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_a

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/google/electra-base-discriminator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7d1569a4df2372d67341bda716bce4e3edf3e3ffadb97251bc4b6b35d459f624.57c13443a51769ce892714c93bb3ee3952bad66d7d9662d9de382b808377c3f8
Model config ElectraConfig {
  "architectures": [
    "ElectraForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_version": "4.21.1",
  "type_vocab_size": 2,
  "use_cache":

Downloading pytorch_model.bin:   0%|          | 0.00/420M [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-discriminator/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/aed576b8aec823c870feda40d60bd803ac8e40056ecb7d7f43dd0b2bfd82e373.db390a2059e53ead2bb00e1a2f8cd50b0a47e1969d180cd70339ec3f6f29dce1
creating metadata file for /root/.cache/huggingface/transformers/aed576b8aec823c870feda40d60bd803ac8e40056ecb7d7f43dd0b2bfd82e373.db390a2059e53ead2bb00e1a2f8cd50b0a47e1969d180cd70339ec3f6f29dce1
loading weights file https://huggingface.co/google/electra-base-discriminator/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/aed576b8aec823c870feda40d60bd803ac8e40056ecb7d7f43dd0b2bfd82e373.db390a2059e53ead2bb00e1a2f8cd50b0a47e1969d180cd70339ec3f6f29dce1
Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'dis

Step,Training Loss
500,0.6933
1000,0.6909
1500,0.6857
2000,0.6837
2500,0.683
3000,0.6844
3500,0.6843
4000,0.6795
4500,0.6832
5000,0.6809


Saving model checkpoint to transformer_checkpoints/checkpoint-500
Configuration saved in transformer_checkpoints/checkpoint-500/config.json
Model weights saved in transformer_checkpoints/checkpoint-500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1000
Configuration saved in transformer_checkpoints/checkpoint-1000/config.json
Model weights saved in transformer_checkpoints/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1500
Configuration saved in transformer_checkpoints/checkpoint-1500/config.json
Model weights saved in transformer_checkpoints/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2000
Configuration saved in transformer_checkpoints/checkpoint-2000/config.json
Model weights saved in transformer_checkpoints/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2500
Configuration saved in transformer_checkpoin

In [19]:
tokenizer = BertTokenizer.from_pretrained('google/electra-base-generator')
train, val, test = tokenize_set()
model = ElectraForSequenceClassification.from_pretrained('google/electra-base-generator', num_labels=2)
for param in model.electra.parameters():
    param.requires_grad = False
trained_model = train_transformer(model, train, val)
gold, pred = predict_nn(trained_model, test)
model_name += ['google/electra-base-generator']
acc_scores += [accuracy_score(gold,pred)]
f1_scores+= [f1_score(gold,pred, average='macro')]
empty_gpu()

https://huggingface.co/google/electra-base-generator/resolve/main/vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpa6rrkl8p


Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-generator/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/b1bb2598a31edf91e6e496cfa77edf8ce7b0b6c3d81ba88916839e22ee1f8e78.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
creating metadata file for /root/.cache/huggingface/transformers/b1bb2598a31edf91e6e496cfa77edf8ce7b0b6c3d81ba88916839e22ee1f8e78.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
https://huggingface.co/google/electra-base-generator/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpvt2jb43r


Downloading tokenizer_config.json:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-generator/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/dfa3ab7072263f521f2d873714f4a16ebd2e03b47b60b5d9f5623114795eece0.4f2213f5603276adf12967b32e4444c0f187f34ca4f8b22a65f03e13514589e9
creating metadata file for /root/.cache/huggingface/transformers/dfa3ab7072263f521f2d873714f4a16ebd2e03b47b60b5d9f5623114795eece0.4f2213f5603276adf12967b32e4444c0f187f34ca4f8b22a65f03e13514589e9
loading file https://huggingface.co/google/electra-base-generator/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/b1bb2598a31edf91e6e496cfa77edf8ce7b0b6c3d81ba88916839e22ee1f8e78.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
loading file https://huggingface.co/google/electra-base-generator/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/google/electra-base-generator/resolve/main/special_tokens_map.json from cache at None
loading file https:/

Downloading config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-generator/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/1b8851729292ac310aa08b6d130a27579aebd8557635c997b1940825c1b543b9.8cfd0b697dea70b75d1b291b655114b572235f235c65599c6e82ab6b858f69d3
creating metadata file for /root/.cache/huggingface/transformers/1b8851729292ac310aa08b6d130a27579aebd8557635c997b1940825c1b543b9.8cfd0b697dea70b75d1b291b655114b572235f235c65599c6e82ab6b858f69d3
loading configuration file https://huggingface.co/google/electra-base-generator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1b8851729292ac310aa08b6d130a27579aebd8557635c997b1940825c1b543b9.8cfd0b697dea70b75d1b291b655114b572235f235c65599c6e82ab6b858f69d3
Model config ElectraConfig {
  "_name_or_path": "google/electra-base-generator",
  "architectures": [
    "ElectraForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/google/electra-base-generator/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/1b8851729292ac310aa08b6d130a27579aebd8557635c997b1940825c1b543b9.8cfd0b697dea70b75d1b291b655114b572235f235c65599c6e82ab6b858f69d3
Model config ElectraConfig {
  "architectures": [
    "ElectraForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "electra",
  "num_attention_heads": 4,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "summary_activation": "gelu",
  "summary_last_dropout": 0.1,
  "summary_type": "first",
  "summary_use_proj": true,
  "transformers_version": "4.21.1",
  "type_vocab_size": 2,
  "use_cache": true,
 

Downloading pytorch_model.bin:   0%|          | 0.00/129M [00:00<?, ?B/s]

storing https://huggingface.co/google/electra-base-generator/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/aa8b1a6defad3704fa06d906422dba45d9d2fc7f6a0a10e4f6d6e6ea573e9551.2a44944123c60252346c02589e5f73650d732d44a7335fdeb61f189131fd17ac
creating metadata file for /root/.cache/huggingface/transformers/aa8b1a6defad3704fa06d906422dba45d9d2fc7f6a0a10e4f6d6e6ea573e9551.2a44944123c60252346c02589e5f73650d732d44a7335fdeb61f189131fd17ac
loading weights file https://huggingface.co/google/electra-base-generator/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/aa8b1a6defad3704fa06d906422dba45d9d2fc7f6a0a10e4f6d6e6ea573e9551.2a44944123c60252346c02589e5f73650d732d44a7335fdeb61f189131fd17ac
Some weights of the model checkpoint at google/electra-base-generator were not used when initializing ElectraForSequenceClassification: ['generator_predictions.dense.weight', 'generator_lm_head.weight', 'generator_predictions.dense.bias', 'genera

Step,Training Loss
500,0.6927
1000,0.6937
1500,0.6935
2000,0.6933
2500,0.6929
3000,0.6936
3500,0.6933
4000,0.6927
4500,0.6934
5000,0.6927


Saving model checkpoint to transformer_checkpoints/checkpoint-500
Configuration saved in transformer_checkpoints/checkpoint-500/config.json
Model weights saved in transformer_checkpoints/checkpoint-500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1000
Configuration saved in transformer_checkpoints/checkpoint-1000/config.json
Model weights saved in transformer_checkpoints/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-1500
Configuration saved in transformer_checkpoints/checkpoint-1500/config.json
Model weights saved in transformer_checkpoints/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2000
Configuration saved in transformer_checkpoints/checkpoint-2000/config.json
Model weights saved in transformer_checkpoints/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to transformer_checkpoints/checkpoint-2500
Configuration saved in transformer_checkpoin

In [20]:
data = {'Model': model_name, 'Accuracy': acc_scores, 'Macro F1': f1_scores}
df = pd.DataFrame(data)

In [21]:
df

Unnamed: 0,Model,Accuracy,Macro F1
0,prajjwal1/bert-tiny,0.527,0.526655
1,bert-base-uncased,0.5135,0.512104
2,bert-base-cased,0.524,0.523198
3,roberta-base,0.5735,0.573369
4,google/electra-small-discriminator,0.551,0.550912
5,google/electra-base-discriminator,0.566,0.564955
6,google/electra-base-generator,0.5115,0.444847
