## BERT Double on ToS dataset

##### Code Repository Reference - https://github.com/reglab/casehold
##### Dataset for ToS - http://claudette.eui.eu/ToS.zip

##### Step 1 : Download the ToS dataset, store Labels and Sentences folder it in data folder and execute the following command blocks

In [1]:
import os
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore")

In [2]:
all_sentences_files = os.listdir("./data/Sentences/")

In [3]:
prefix_sentence = "./data/Sentences/"
prefix_label = "./data/Labels/"

In [4]:
clauses = []
for file in all_sentences_files:
    sentence_file_path = prefix_sentence + file 
    label_file_path = prefix_label + file
    sentences_df = pd.read_csv(sentence_file_path, sep="dummy_separator", header=None)
    sentences_df.columns = ["sentences"]
    label_df = pd.read_csv(label_file_path, sep=" ", header=None)
    label_df.columns = ["label"]
    label_df["label_converted"] = np.where(label_df["label"] == -1, 0, 1)
    df_concat = pd.concat([label_df["label_converted"], sentences_df], axis=1)
    clauses.append(df_concat)

In [5]:
colnames = ["label_converted", "sentences"]
clauses_df = pd.DataFrame(columns = colnames)
for df in clauses:
    clauses_df = clauses_df.append(df)

In [6]:
clauses_df.rename(columns={'label_converted': 'label', 'sentences': 'sentences'}, inplace=True)

In [7]:
clauses_df.to_csv("./data/tos_clauses.csv", index=False)

##### Step 2: Compute per example average pretrain loss

In [8]:
%env TOKENIZERS_PARALLELISM=false

env: TOKENIZERS_PARALLELISM=false


In [9]:
!python3 classification/run_glue.py \
  --model_name_or_path zlucia/bert-double \
  --train_file data/tos_clauses.csv \
  --validation_file data/tos_clauses.csv \
  --ptl=True \
  --max_seq_length 128 \
  --output_dir logs/tos/bertdouble \
  --overwrite_output_dir

10/13/2022 18:18:37 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(
_n_gpu=0,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_ski

[INFO|configuration_utils.py:653] 2022-10-13 18:18:39,100 >> loading configuration file config.json from cache at /Users/shreyaprabhu/.cache/huggingface/hub/models--zlucia--bert-double/snapshots/0ce1a5b13ad6781ac7784521d1b56e1f48c89cf7/config.json
[INFO|configuration_utils.py:705] 2022-10-13 18:18:39,101 >> Model config BertConfig {
  "_name_or_path": "zlucia/bert-double",
  "architectures": [
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_

##### Step 3: Split data into train and dev. Fine tune on train dataset and evaluate performance on dev dataset. Logs andPredictions are directed to output directory logs/tos/bertdouble

In [10]:
train, dev = train_test_split(clauses_df, test_size=0.2, random_state=42, stratify=clauses_df[['label']])

In [11]:
train.to_csv("./data/tos_clauses_train.csv", index=False)
dev.to_csv("./data/tos_clauses_dev.csv", index=False)

In [12]:
# Download model from Hugging Face model repository
!python3 classification/run_glue.py \
  --model_name_or_path zlucia/bert-double \
  --train_file data/tos_clauses_train.csv \
  --validation_file data/tos_clauses_dev.csv \
  --do_train \
  --do_eval \
  --evaluation_strategy steps \
  --max_seq_length 128 \
  --per_device_train_batch_size=16 \
  --learning_rate=1e-5 \
  --num_train_epochs=2.0 \
  --output_dir logs/tos/bertdouble \
  --overwrite_output_dir \
  --logging_steps 50

10/13/2022 18:26:30 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(
_n_gpu=0,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
evaluation_strategy=steps,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip

[INFO|configuration_utils.py:653] 2022-10-13 18:26:33,068 >> loading configuration file config.json from cache at /Users/shreyaprabhu/.cache/huggingface/hub/models--zlucia--bert-double/snapshots/0ce1a5b13ad6781ac7784521d1b56e1f48c89cf7/config.json
[INFO|configuration_utils.py:705] 2022-10-13 18:26:33,069 >> Model config BertConfig {
  "_name_or_path": "zlucia/bert-double",
  "architectures": [
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_

{'loss': 0.4588, 'learning_rate': 9.469214437367304e-06, 'epoch': 0.11}         
  5%|██▏                                       | 50/942 [02:18<39:09,  2.63s/it][INFO|trainer.py:725] 2022-10-13 18:28:53,312 >> The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentences. If sentences are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2907] 2022-10-13 18:28:53,313 >> ***** Running Evaluation *****
[INFO|trainer.py:2909] 2022-10-13 18:28:53,313 >>   Num examples = 1883
[INFO|trainer.py:2912] 2022-10-13 18:28:53,313 >>   Batch size = 8

  0%|                                                   | 0/236 [00:00<?, ?it/s][A
  1%|▎                                          | 2/236 [00:00<00:47,  4.96it/s][A
  1%|▌                                          | 3/236 [00:00<01:00,  3.88it/s][A
  2%|▋                                          |

 38%|████████████████                          | 90/236 [00:35<00:58,  2.49it/s][A
 39%|████████████████▏                         | 91/236 [00:35<00:58,  2.48it/s][A
 39%|████████████████▎                         | 92/236 [00:36<00:57,  2.49it/s][A
 39%|████████████████▌                         | 93/236 [00:36<00:57,  2.48it/s][A
 40%|████████████████▋                         | 94/236 [00:37<00:57,  2.49it/s][A
 40%|████████████████▉                         | 95/236 [00:37<00:56,  2.48it/s][A
 41%|█████████████████                         | 96/236 [00:38<00:56,  2.47it/s][A
 41%|█████████████████▎                        | 97/236 [00:38<00:56,  2.47it/s][A
 42%|█████████████████▍                        | 98/236 [00:38<00:55,  2.48it/s][A
 42%|█████████████████▌                        | 99/236 [00:39<00:55,  2.48it/s][A
 42%|█████████████████▎                       | 100/236 [00:39<00:54,  2.48it/s][A
 43%|█████████████████▌                       | 101/236 [00:40<00:54,  2.48i

 79%|████████████████████████████████▍        | 187/236 [01:15<00:20,  2.36it/s][A
 80%|████████████████████████████████▋        | 188/236 [01:16<00:20,  2.36it/s][A
 80%|████████████████████████████████▊        | 189/236 [01:16<00:19,  2.37it/s][A
 81%|█████████████████████████████████        | 190/236 [01:16<00:19,  2.38it/s][A
 81%|█████████████████████████████████▏       | 191/236 [01:17<00:18,  2.39it/s][A
 81%|█████████████████████████████████▎       | 192/236 [01:17<00:18,  2.39it/s][A
 82%|█████████████████████████████████▌       | 193/236 [01:18<00:17,  2.40it/s][A
 82%|█████████████████████████████████▋       | 194/236 [01:18<00:17,  2.39it/s][A
 83%|█████████████████████████████████▉       | 195/236 [01:18<00:17,  2.37it/s][A
 83%|██████████████████████████████████       | 196/236 [01:19<00:16,  2.37it/s][A
 83%|██████████████████████████████████▏      | 197/236 [01:19<00:16,  2.37it/s][A
 84%|██████████████████████████████████▍      | 198/236 [01:20<00:15,  2.39i

 14%|█████▋                                    | 32/236 [00:13<01:28,  2.31it/s][A
 14%|█████▊                                    | 33/236 [00:13<01:27,  2.31it/s][A
 14%|██████                                    | 34/236 [00:14<01:27,  2.30it/s][A
 15%|██████▏                                   | 35/236 [00:14<01:27,  2.31it/s][A
 15%|██████▍                                   | 36/236 [00:15<01:26,  2.31it/s][A
 16%|██████▌                                   | 37/236 [00:15<01:26,  2.31it/s][A
 16%|██████▊                                   | 38/236 [00:15<01:25,  2.32it/s][A
 17%|██████▉                                   | 39/236 [00:16<01:24,  2.33it/s][A
 17%|███████                                   | 40/236 [00:16<01:24,  2.32it/s][A
 17%|███████▎                                  | 41/236 [00:17<01:24,  2.31it/s][A
 18%|███████▍                                  | 42/236 [00:17<01:23,  2.31it/s][A
 18%|███████▋                                  | 43/236 [00:18<01:23,  2.32i

 55%|██████████████████████▍                  | 129/236 [00:55<00:46,  2.32it/s][A
 55%|██████████████████████▌                  | 130/236 [00:55<00:45,  2.31it/s][A
 56%|██████████████████████▊                  | 131/236 [00:55<00:45,  2.31it/s][A
 56%|██████████████████████▉                  | 132/236 [00:56<00:44,  2.32it/s][A
 56%|███████████████████████                  | 133/236 [00:56<00:44,  2.33it/s][A
 57%|███████████████████████▎                 | 134/236 [00:57<00:44,  2.31it/s][A
 57%|███████████████████████▍                 | 135/236 [00:57<00:43,  2.31it/s][A
 58%|███████████████████████▋                 | 136/236 [00:58<00:42,  2.33it/s][A
 58%|███████████████████████▊                 | 137/236 [00:58<00:42,  2.33it/s][A
 58%|███████████████████████▉                 | 138/236 [00:58<00:42,  2.33it/s][A
 59%|████████████████████████▏                | 139/236 [00:59<00:41,  2.34it/s][A
 59%|████████████████████████▎                | 140/236 [00:59<00:41,  2.33i

 96%|███████████████████████████████████████▎ | 226/236 [01:36<00:04,  2.41it/s][A
 96%|███████████████████████████████████████▍ | 227/236 [01:37<00:03,  2.45it/s][A
 97%|███████████████████████████████████████▌ | 228/236 [01:37<00:03,  2.47it/s][A
 97%|███████████████████████████████████████▊ | 229/236 [01:38<00:02,  2.49it/s][A
 97%|███████████████████████████████████████▉ | 230/236 [01:38<00:02,  2.49it/s][A
 98%|████████████████████████████████████████▏| 231/236 [01:38<00:01,  2.51it/s][A
 98%|████████████████████████████████████████▎| 232/236 [01:39<00:01,  2.52it/s][A
 99%|████████████████████████████████████████▍| 233/236 [01:39<00:01,  2.51it/s][A
 99%|████████████████████████████████████████▋| 234/236 [01:40<00:00,  2.49it/s][A
100%|████████████████████████████████████████▊| 235/236 [01:40<00:00,  2.50it/s][A
100%|█████████████████████████████████████████| 236/236 [01:40<00:00,  3.03it/s][A10/13/2022 18:34:28 - INFO - /Library/Frameworks/Python.framework/Versions/3.

 30%|████████████▋                             | 71/236 [00:28<01:06,  2.48it/s][A
 31%|████████████▊                             | 72/236 [00:28<01:06,  2.48it/s][A
 31%|████████████▉                             | 73/236 [00:28<01:05,  2.48it/s][A
 31%|█████████████▏                            | 74/236 [00:29<01:05,  2.48it/s][A
 32%|█████████████▎                            | 75/236 [00:29<01:04,  2.48it/s][A
 32%|█████████████▌                            | 76/236 [00:30<01:04,  2.49it/s][A
 33%|█████████████▋                            | 77/236 [00:30<01:03,  2.49it/s][A
 33%|█████████████▉                            | 78/236 [00:30<01:03,  2.49it/s][A
 33%|██████████████                            | 79/236 [00:31<01:02,  2.49it/s][A
 34%|██████████████▏                           | 80/236 [00:31<01:02,  2.50it/s][A
 34%|██████████████▍                           | 81/236 [00:32<01:02,  2.49it/s][A
 35%|██████████████▌                           | 82/236 [00:32<01:02,  2.48i

 71%|█████████████████████████████▏           | 168/236 [01:07<00:27,  2.47it/s][A
 72%|█████████████████████████████▎           | 169/236 [01:07<00:27,  2.48it/s][A
 72%|█████████████████████████████▌           | 170/236 [01:07<00:26,  2.48it/s][A
 72%|█████████████████████████████▋           | 171/236 [01:08<00:26,  2.48it/s][A
 73%|█████████████████████████████▉           | 172/236 [01:08<00:25,  2.50it/s][A
 73%|██████████████████████████████           | 173/236 [01:09<00:25,  2.47it/s][A
 74%|██████████████████████████████▏          | 174/236 [01:09<00:24,  2.48it/s][A
 74%|██████████████████████████████▍          | 175/236 [01:10<00:24,  2.48it/s][A
 75%|██████████████████████████████▌          | 176/236 [01:10<00:24,  2.47it/s][A
 75%|██████████████████████████████▊          | 177/236 [01:10<00:23,  2.48it/s][A
 75%|██████████████████████████████▉          | 178/236 [01:11<00:23,  2.48it/s][A
 76%|███████████████████████████████          | 179/236 [01:11<00:23,  2.46i

  6%|██▎                                       | 13/236 [00:04<01:29,  2.49it/s][A
  6%|██▍                                       | 14/236 [00:05<01:29,  2.48it/s][A
  6%|██▋                                       | 15/236 [00:05<01:29,  2.47it/s][A
  7%|██▊                                       | 16/236 [00:06<01:28,  2.48it/s][A
  7%|███                                       | 17/236 [00:06<01:28,  2.47it/s][A
  8%|███▏                                      | 18/236 [00:06<01:28,  2.47it/s][A
  8%|███▍                                      | 19/236 [00:07<01:27,  2.47it/s][A
  8%|███▌                                      | 20/236 [00:07<01:26,  2.49it/s][A
  9%|███▋                                      | 21/236 [00:08<01:26,  2.48it/s][A
  9%|███▉                                      | 22/236 [00:08<01:26,  2.48it/s][A
 10%|████                                      | 23/236 [00:08<01:26,  2.47it/s][A
 10%|████▎                                     | 24/236 [00:09<01:25,  2.47i

 47%|███████████████████                      | 110/236 [00:44<00:50,  2.48it/s][A
 47%|███████████████████▎                     | 111/236 [00:44<00:50,  2.49it/s][A
 47%|███████████████████▍                     | 112/236 [00:44<00:50,  2.48it/s][A
 48%|███████████████████▋                     | 113/236 [00:45<00:49,  2.48it/s][A
 48%|███████████████████▊                     | 114/236 [00:45<00:49,  2.49it/s][A
 49%|███████████████████▉                     | 115/236 [00:46<00:48,  2.49it/s][A
 49%|████████████████████▏                    | 116/236 [00:46<00:47,  2.51it/s][A
 50%|████████████████████▎                    | 117/236 [00:46<00:47,  2.50it/s][A
 50%|████████████████████▌                    | 118/236 [00:47<00:47,  2.50it/s][A
 50%|████████████████████▋                    | 119/236 [00:47<00:46,  2.49it/s][A
 51%|████████████████████▊                    | 120/236 [00:48<00:46,  2.48it/s][A
 51%|█████████████████████                    | 121/236 [00:48<00:45,  2.50i

 88%|███████████████████████████████████▉     | 207/236 [01:23<00:11,  2.53it/s][A
 88%|████████████████████████████████████▏    | 208/236 [01:23<00:11,  2.52it/s][A
 89%|████████████████████████████████████▎    | 209/236 [01:24<00:10,  2.52it/s][A
 89%|████████████████████████████████████▍    | 210/236 [01:24<00:10,  2.52it/s][A
 89%|████████████████████████████████████▋    | 211/236 [01:25<00:09,  2.51it/s][A
 90%|████████████████████████████████████▊    | 212/236 [01:25<00:09,  2.52it/s][A
 90%|█████████████████████████████████████    | 213/236 [01:25<00:09,  2.51it/s][A
 91%|█████████████████████████████████████▏   | 214/236 [01:26<00:08,  2.52it/s][A
 91%|█████████████████████████████████████▎   | 215/236 [01:26<00:08,  2.52it/s][A
 92%|█████████████████████████████████████▌   | 216/236 [01:27<00:07,  2.51it/s][A
 92%|█████████████████████████████████████▋   | 217/236 [01:27<00:07,  2.51it/s][A
 92%|█████████████████████████████████████▊   | 218/236 [01:27<00:07,  2.50i

 22%|█████████▎                                | 52/236 [00:20<01:16,  2.42it/s][A
 22%|█████████▍                                | 53/236 [00:21<01:15,  2.41it/s][A
 23%|█████████▌                                | 54/236 [00:21<01:16,  2.38it/s][A
 23%|█████████▊                                | 55/236 [00:22<01:15,  2.41it/s][A
 24%|█████████▉                                | 56/236 [00:22<01:14,  2.40it/s][A
 24%|██████████▏                               | 57/236 [00:23<01:15,  2.38it/s][A
 25%|██████████▎                               | 58/236 [00:23<01:14,  2.40it/s][A
 25%|██████████▌                               | 59/236 [00:23<01:13,  2.42it/s][A
 25%|██████████▋                               | 60/236 [00:24<01:12,  2.42it/s][A
 26%|██████████▊                               | 61/236 [00:24<01:11,  2.44it/s][A
 26%|███████████                               | 62/236 [00:25<01:11,  2.42it/s][A
 27%|███████████▏                              | 63/236 [00:25<01:12,  2.38i

 63%|█████████████████████████▉               | 149/236 [01:01<00:35,  2.44it/s][A
 64%|██████████████████████████               | 150/236 [01:02<00:35,  2.40it/s][A
 64%|██████████████████████████▏              | 151/236 [01:02<00:35,  2.42it/s][A
 64%|██████████████████████████▍              | 152/236 [01:02<00:34,  2.44it/s][A
 65%|██████████████████████████▌              | 153/236 [01:03<00:34,  2.43it/s][A
 65%|██████████████████████████▊              | 154/236 [01:03<00:33,  2.42it/s][A
 66%|██████████████████████████▉              | 155/236 [01:04<00:33,  2.40it/s][A
 66%|███████████████████████████              | 156/236 [01:04<00:33,  2.40it/s][A
 67%|███████████████████████████▎             | 157/236 [01:04<00:32,  2.40it/s][A
 67%|███████████████████████████▍             | 158/236 [01:05<00:32,  2.41it/s][A
 67%|███████████████████████████▌             | 159/236 [01:05<00:32,  2.36it/s][A
 68%|███████████████████████████▊             | 160/236 [01:06<00:32,  2.37i


  0%|                                                   | 0/236 [00:00<?, ?it/s][A
  1%|▎                                          | 2/236 [00:00<00:47,  4.93it/s][A
  1%|▌                                          | 3/236 [00:00<01:01,  3.81it/s][A
  2%|▋                                          | 4/236 [00:01<01:10,  3.30it/s][A
  2%|▉                                          | 5/236 [00:01<01:17,  3.00it/s][A
  3%|█                                          | 6/236 [00:02<01:21,  2.82it/s][A
  3%|█▎                                         | 7/236 [00:02<01:24,  2.72it/s][A
  3%|█▍                                         | 8/236 [00:02<01:26,  2.64it/s][A
  4%|█▋                                         | 9/236 [00:03<01:28,  2.57it/s][A
  4%|█▊                                        | 10/236 [00:03<01:28,  2.54it/s][A
  5%|█▉                                        | 11/236 [00:04<01:28,  2.54it/s][A
  5%|██▏                                       | 12/236 [00:04<01:28,  2.52

 42%|█████████████████▍                        | 98/236 [00:39<00:55,  2.49it/s][A
 42%|█████████████████▌                        | 99/236 [00:39<00:54,  2.50it/s][A
 42%|█████████████████▎                       | 100/236 [00:39<00:54,  2.49it/s][A
 43%|█████████████████▌                       | 101/236 [00:40<00:54,  2.49it/s][A
 43%|█████████████████▋                       | 102/236 [00:40<00:53,  2.49it/s][A
 44%|█████████████████▉                       | 103/236 [00:41<00:53,  2.49it/s][A
 44%|██████████████████                       | 104/236 [00:41<00:53,  2.49it/s][A
 44%|██████████████████▏                      | 105/236 [00:41<00:52,  2.49it/s][A
 45%|██████████████████▍                      | 106/236 [00:42<00:52,  2.49it/s][A
 45%|██████████████████▌                      | 107/236 [00:42<00:51,  2.48it/s][A
 46%|██████████████████▊                      | 108/236 [00:43<00:51,  2.48it/s][A
 46%|██████████████████▉                      | 109/236 [00:43<00:51,  2.48i

 83%|█████████████████████████████████▉       | 195/236 [01:19<00:16,  2.41it/s][A
 83%|██████████████████████████████████       | 196/236 [01:20<00:16,  2.42it/s][A
 83%|██████████████████████████████████▏      | 197/236 [01:20<00:16,  2.43it/s][A
 84%|██████████████████████████████████▍      | 198/236 [01:20<00:15,  2.45it/s][A
 84%|██████████████████████████████████▌      | 199/236 [01:21<00:15,  2.47it/s][A
 85%|██████████████████████████████████▋      | 200/236 [01:21<00:14,  2.47it/s][A
 85%|██████████████████████████████████▉      | 201/236 [01:22<00:14,  2.48it/s][A
 86%|███████████████████████████████████      | 202/236 [01:22<00:13,  2.47it/s][A
 86%|███████████████████████████████████▎     | 203/236 [01:23<00:13,  2.46it/s][A
 86%|███████████████████████████████████▍     | 204/236 [01:23<00:12,  2.46it/s][A
 87%|███████████████████████████████████▌     | 205/236 [01:23<00:12,  2.46it/s][A
 87%|███████████████████████████████████▊     | 206/236 [01:24<00:12,  2.47i

 17%|███████                                   | 40/236 [00:19<01:32,  2.12it/s][A
 17%|███████▎                                  | 41/236 [00:19<01:43,  1.89it/s][A
 18%|███████▍                                  | 42/236 [00:20<01:38,  1.96it/s][A
 18%|███████▋                                  | 43/236 [00:20<01:38,  1.95it/s][A
 19%|███████▊                                  | 44/236 [00:21<01:48,  1.78it/s][A
 19%|████████                                  | 45/236 [00:22<01:47,  1.78it/s][A
 19%|████████▏                                 | 46/236 [00:22<01:44,  1.82it/s][A
 20%|████████▎                                 | 47/236 [00:23<01:42,  1.85it/s][A
 20%|████████▌                                 | 48/236 [00:23<01:36,  1.95it/s][A
 21%|████████▋                                 | 49/236 [00:23<01:32,  2.02it/s][A
 21%|████████▉                                 | 50/236 [00:24<01:37,  1.90it/s][A
 22%|█████████                                 | 51/236 [00:25<01:32,  1.99i

 58%|███████████████████████▊                 | 137/236 [01:06<00:46,  2.12it/s][A
 58%|███████████████████████▉                 | 138/236 [01:07<00:45,  2.14it/s][A
 59%|████████████████████████▏                | 139/236 [01:07<00:44,  2.17it/s][A
 59%|████████████████████████▎                | 140/236 [01:07<00:43,  2.19it/s][A
 60%|████████████████████████▍                | 141/236 [01:08<00:43,  2.17it/s][A
 60%|████████████████████████▋                | 142/236 [01:08<00:42,  2.19it/s][A
 61%|████████████████████████▊                | 143/236 [01:09<00:42,  2.20it/s][A
 61%|█████████████████████████                | 144/236 [01:09<00:41,  2.21it/s][A
 61%|█████████████████████████▏               | 145/236 [01:10<00:41,  2.20it/s][A
 62%|█████████████████████████▎               | 146/236 [01:10<00:41,  2.18it/s][A
 62%|█████████████████████████▌               | 147/236 [01:11<00:41,  2.16it/s][A
 63%|█████████████████████████▋               | 148/236 [01:11<00:40,  2.18i

 99%|████████████████████████████████████████▋| 234/236 [01:50<00:00,  2.17it/s][A
100%|████████████████████████████████████████▊| 235/236 [01:50<00:00,  2.16it/s][A
100%|█████████████████████████████████████████| 236/236 [01:51<00:00,  2.62it/s][A10/13/2022 18:54:28 - INFO - /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/datasets/metric.py -   Removing /Users/shreyaprabhu/.cache/huggingface/metrics/f1/default/default_experiment-1-0.arrow
                                                                                
[A{'eval_loss': 0.17240814864635468, 'eval_f1': 0.7238095238095237, 'eval_runtime': 112.5011, 'eval_samples_per_second': 16.738, 'eval_steps_per_second': 2.098, 'epoch': 0.74}
 37%|███████████████▏                         | 350/942 [27:53<30:01,  3.04s/it]
100%|█████████████████████████████████████████| 236/236 [01:52<00:00,  2.62it/s][A
{'loss': 0.179, 'learning_rate': 5.753715498938429e-06, 'epoch': 0.85}          [A
 42%|███████████

 33%|██████████████                            | 79/236 [00:32<01:06,  2.38it/s][A
 34%|██████████████▏                           | 80/236 [00:33<01:05,  2.37it/s][A
 34%|██████████████▍                           | 81/236 [00:33<01:05,  2.37it/s][A
 35%|██████████████▌                           | 82/236 [00:34<01:04,  2.38it/s][A
 35%|██████████████▊                           | 83/236 [00:34<01:04,  2.37it/s][A
 36%|██████████████▉                           | 84/236 [00:35<01:04,  2.37it/s][A
 36%|███████████████▏                          | 85/236 [00:35<01:03,  2.37it/s][A
 36%|███████████████▎                          | 86/236 [00:35<01:03,  2.38it/s][A
 37%|███████████████▍                          | 87/236 [00:36<01:02,  2.38it/s][A
 37%|███████████████▋                          | 88/236 [00:36<01:02,  2.37it/s][A
 38%|███████████████▊                          | 89/236 [00:37<01:02,  2.37it/s][A
 38%|████████████████                          | 90/236 [00:37<01:01,  2.38i

 75%|██████████████████████████████▌          | 176/236 [01:14<00:25,  2.35it/s][A
 75%|██████████████████████████████▊          | 177/236 [01:14<00:25,  2.34it/s][A
 75%|██████████████████████████████▉          | 178/236 [01:15<00:24,  2.36it/s][A
 76%|███████████████████████████████          | 179/236 [01:15<00:24,  2.36it/s][A
 76%|███████████████████████████████▎         | 180/236 [01:16<00:23,  2.37it/s][A
 77%|███████████████████████████████▍         | 181/236 [01:16<00:23,  2.37it/s][A
 77%|███████████████████████████████▌         | 182/236 [01:16<00:22,  2.37it/s][A
 78%|███████████████████████████████▊         | 183/236 [01:17<00:23,  2.30it/s][A
 78%|███████████████████████████████▉         | 184/236 [01:17<00:22,  2.31it/s][A
 78%|████████████████████████████████▏        | 185/236 [01:18<00:21,  2.32it/s][A
 79%|████████████████████████████████▎        | 186/236 [01:18<00:22,  2.25it/s][A
 79%|████████████████████████████████▍        | 187/236 [01:19<00:22,  2.20i

  9%|███▋                                      | 21/236 [00:08<01:32,  2.34it/s][A
  9%|███▉                                      | 22/236 [00:08<01:31,  2.35it/s][A
 10%|████                                      | 23/236 [00:09<01:31,  2.34it/s][A
 10%|████▎                                     | 24/236 [00:09<01:30,  2.35it/s][A
 11%|████▍                                     | 25/236 [00:10<01:29,  2.36it/s][A
 11%|████▋                                     | 26/236 [00:10<01:29,  2.35it/s][A
 11%|████▊                                     | 27/236 [00:11<01:29,  2.35it/s][A
 12%|████▉                                     | 28/236 [00:11<01:28,  2.36it/s][A
 12%|█████▏                                    | 29/236 [00:11<01:27,  2.36it/s][A
 13%|█████▎                                    | 30/236 [00:12<01:27,  2.36it/s][A
 13%|█████▌                                    | 31/236 [00:12<01:26,  2.38it/s][A
 14%|█████▋                                    | 32/236 [00:13<01:26,  2.37i

 50%|████████████████████▌                    | 118/236 [00:49<00:49,  2.37it/s][A
 50%|████████████████████▋                    | 119/236 [00:50<00:49,  2.35it/s][A
 51%|████████████████████▊                    | 120/236 [00:50<00:48,  2.37it/s][A
 51%|█████████████████████                    | 121/236 [00:50<00:48,  2.38it/s][A
 52%|█████████████████████▏                   | 122/236 [00:51<00:47,  2.39it/s][A
 52%|█████████████████████▎                   | 123/236 [00:51<00:47,  2.37it/s][A
 53%|█████████████████████▌                   | 124/236 [00:52<00:47,  2.36it/s][A
 53%|█████████████████████▋                   | 125/236 [00:52<00:47,  2.36it/s][A
 53%|█████████████████████▉                   | 126/236 [00:53<00:46,  2.36it/s][A
 54%|██████████████████████                   | 127/236 [00:53<00:46,  2.35it/s][A
 54%|██████████████████████▏                  | 128/236 [00:53<00:45,  2.35it/s][A
 55%|██████████████████████▍                  | 129/236 [00:54<00:45,  2.35i

 91%|█████████████████████████████████████▎   | 215/236 [01:30<00:08,  2.36it/s][A
 92%|█████████████████████████████████████▌   | 216/236 [01:31<00:08,  2.33it/s][A
 92%|█████████████████████████████████████▋   | 217/236 [01:31<00:08,  2.23it/s][A
 92%|█████████████████████████████████████▊   | 218/236 [01:32<00:08,  2.17it/s][A
 93%|██████████████████████████████████████   | 219/236 [01:32<00:07,  2.23it/s][A
 93%|██████████████████████████████████████▏  | 220/236 [01:32<00:07,  2.27it/s][A
 94%|██████████████████████████████████████▍  | 221/236 [01:33<00:06,  2.29it/s][A
 94%|██████████████████████████████████████▌  | 222/236 [01:33<00:06,  2.31it/s][A
 94%|██████████████████████████████████████▋  | 223/236 [01:34<00:05,  2.33it/s][A
 95%|██████████████████████████████████████▉  | 224/236 [01:34<00:05,  2.34it/s][A
 95%|███████████████████████████████████████  | 225/236 [01:34<00:04,  2.35it/s][A
 96%|███████████████████████████████████████▎ | 226/236 [01:35<00:04,  2.36i

 25%|██████████▋                               | 60/236 [00:25<01:15,  2.34it/s][A
 26%|██████████▊                               | 61/236 [00:25<01:15,  2.33it/s][A
 26%|███████████                               | 62/236 [00:26<01:14,  2.33it/s][A
 27%|███████████▏                              | 63/236 [00:26<01:14,  2.33it/s][A
 27%|███████████▍                              | 64/236 [00:27<01:13,  2.33it/s][A
 28%|███████████▌                              | 65/236 [00:27<01:13,  2.33it/s][A
 28%|███████████▋                              | 66/236 [00:27<01:12,  2.34it/s][A
 28%|███████████▉                              | 67/236 [00:28<01:12,  2.34it/s][A
 29%|████████████                              | 68/236 [00:28<01:11,  2.34it/s][A
 29%|████████████▎                             | 69/236 [00:29<01:11,  2.34it/s][A
 30%|████████████▍                             | 70/236 [00:29<01:10,  2.34it/s][A
 30%|████████████▋                             | 71/236 [00:30<01:10,  2.35i

 67%|███████████████████████████▎             | 157/236 [01:06<00:33,  2.32it/s][A
 67%|███████████████████████████▍             | 158/236 [01:07<00:33,  2.32it/s][A
 67%|███████████████████████████▌             | 159/236 [01:07<00:33,  2.32it/s][A
 68%|███████████████████████████▊             | 160/236 [01:08<00:32,  2.31it/s][A
 68%|███████████████████████████▉             | 161/236 [01:08<00:32,  2.31it/s][A
 69%|████████████████████████████▏            | 162/236 [01:09<00:32,  2.31it/s][A
 69%|████████████████████████████▎            | 163/236 [01:09<00:31,  2.32it/s][A
 69%|████████████████████████████▍            | 164/236 [01:09<00:31,  2.32it/s][A
 70%|████████████████████████████▋            | 165/236 [01:10<00:30,  2.32it/s][A
 70%|████████████████████████████▊            | 166/236 [01:10<00:30,  2.32it/s][A
 71%|█████████████████████████████            | 167/236 [01:11<00:29,  2.33it/s][A
 71%|█████████████████████████████▏           | 168/236 [01:11<00:29,  2.32i

{'loss': 0.1145, 'learning_rate': 4.16135881104034e-06, 'epoch': 1.17}          
 58%|███████████████████████▉                 | 550/942 [41:56<16:50,  2.58s/it][INFO|trainer.py:725] 2022-10-13 19:08:31,522 >> The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentences. If sentences are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
[INFO|trainer.py:2907] 2022-10-13 19:08:31,522 >> ***** Running Evaluation *****
[INFO|trainer.py:2909] 2022-10-13 19:08:31,522 >>   Num examples = 1883
[INFO|trainer.py:2912] 2022-10-13 19:08:31,523 >>   Batch size = 8

  0%|                                                   | 0/236 [00:00<?, ?it/s][A
  1%|▎                                          | 2/236 [00:00<00:49,  4.70it/s][A
  1%|▌                                          | 3/236 [00:00<01:04,  3.60it/s][A
  2%|▋                                          |

 38%|████████████████                          | 90/236 [00:38<01:03,  2.31it/s][A
 39%|████████████████▏                         | 91/236 [00:38<01:02,  2.33it/s][A
 39%|████████████████▎                         | 92/236 [00:39<01:02,  2.31it/s][A
 39%|████████████████▌                         | 93/236 [00:39<01:01,  2.32it/s][A
 40%|████████████████▋                         | 94/236 [00:40<01:01,  2.30it/s][A
 40%|████████████████▉                         | 95/236 [00:40<01:00,  2.32it/s][A
 41%|█████████████████                         | 96/236 [00:40<01:00,  2.33it/s][A
 41%|█████████████████▎                        | 97/236 [00:41<00:59,  2.32it/s][A
 42%|█████████████████▍                        | 98/236 [00:41<00:58,  2.34it/s][A
 42%|█████████████████▌                        | 99/236 [00:42<00:59,  2.32it/s][A
 42%|█████████████████▎                       | 100/236 [00:42<00:58,  2.31it/s][A
 43%|█████████████████▌                       | 101/236 [00:43<00:58,  2.30i

 79%|████████████████████████████████▍        | 187/236 [01:20<00:21,  2.33it/s][A
 80%|████████████████████████████████▋        | 188/236 [01:20<00:20,  2.32it/s][A
 80%|████████████████████████████████▊        | 189/236 [01:21<00:20,  2.33it/s][A
 81%|█████████████████████████████████        | 190/236 [01:21<00:19,  2.33it/s][A
 81%|█████████████████████████████████▏       | 191/236 [01:21<00:19,  2.32it/s][A
 81%|█████████████████████████████████▎       | 192/236 [01:22<00:18,  2.33it/s][A
 82%|█████████████████████████████████▌       | 193/236 [01:22<00:18,  2.32it/s][A
 82%|█████████████████████████████████▋       | 194/236 [01:23<00:18,  2.33it/s][A
 83%|█████████████████████████████████▉       | 195/236 [01:23<00:17,  2.33it/s][A
 83%|██████████████████████████████████       | 196/236 [01:24<00:17,  2.34it/s][A
 83%|██████████████████████████████████▏      | 197/236 [01:24<00:16,  2.34it/s][A
 84%|██████████████████████████████████▍      | 198/236 [01:24<00:16,  2.34i

 14%|█████▋                                    | 32/236 [00:13<01:28,  2.31it/s][A
 14%|█████▊                                    | 33/236 [00:13<01:27,  2.31it/s][A
 14%|██████                                    | 34/236 [00:14<01:27,  2.31it/s][A
 15%|██████▏                                   | 35/236 [00:14<01:26,  2.32it/s][A
 15%|██████▍                                   | 36/236 [00:15<01:26,  2.32it/s][A
 16%|██████▌                                   | 37/236 [00:15<01:25,  2.33it/s][A
 16%|██████▊                                   | 38/236 [00:16<01:25,  2.32it/s][A
 17%|██████▉                                   | 39/236 [00:16<01:24,  2.32it/s][A
 17%|███████                                   | 40/236 [00:16<01:24,  2.33it/s][A
 17%|███████▎                                  | 41/236 [00:17<01:24,  2.32it/s][A
 18%|███████▍                                  | 42/236 [00:17<01:23,  2.33it/s][A
 18%|███████▋                                  | 43/236 [00:18<01:22,  2.33i

 55%|██████████████████████▍                  | 129/236 [00:55<00:46,  2.31it/s][A
 55%|██████████████████████▌                  | 130/236 [00:55<00:46,  2.30it/s][A
 56%|██████████████████████▊                  | 131/236 [00:56<00:45,  2.30it/s][A
 56%|██████████████████████▉                  | 132/236 [00:56<00:44,  2.31it/s][A
 56%|███████████████████████                  | 133/236 [00:57<00:44,  2.31it/s][A
 57%|███████████████████████▎                 | 134/236 [00:57<00:44,  2.29it/s][A
 57%|███████████████████████▍                 | 135/236 [00:58<00:43,  2.30it/s][A
 58%|███████████████████████▋                 | 136/236 [00:58<00:43,  2.30it/s][A
 58%|███████████████████████▊                 | 137/236 [00:58<00:42,  2.30it/s][A
 58%|███████████████████████▉                 | 138/236 [00:59<00:43,  2.28it/s][A
 59%|████████████████████████▏                | 139/236 [00:59<00:42,  2.28it/s][A
 59%|████████████████████████▎                | 140/236 [01:00<00:42,  2.26i

 96%|███████████████████████████████████████▎ | 226/236 [01:37<00:04,  2.32it/s][A
 96%|███████████████████████████████████████▍ | 227/236 [01:37<00:03,  2.32it/s][A
 97%|███████████████████████████████████████▌ | 228/236 [01:38<00:03,  2.32it/s][A
 97%|███████████████████████████████████████▊ | 229/236 [01:38<00:03,  2.32it/s][A
 97%|███████████████████████████████████████▉ | 230/236 [01:39<00:02,  2.32it/s][A
 98%|████████████████████████████████████████▏| 231/236 [01:39<00:02,  2.31it/s][A
 98%|████████████████████████████████████████▎| 232/236 [01:40<00:01,  2.31it/s][A
 99%|████████████████████████████████████████▍| 233/236 [01:40<00:01,  2.31it/s][A
 99%|████████████████████████████████████████▋| 234/236 [01:40<00:00,  2.31it/s][A
100%|████████████████████████████████████████▊| 235/236 [01:41<00:00,  2.30it/s][A
100%|█████████████████████████████████████████| 236/236 [01:41<00:00,  2.77it/s][A10/13/2022 19:14:05 - INFO - /Library/Frameworks/Python.framework/Versions/3.

 30%|████████████▋                             | 71/236 [00:30<01:12,  2.27it/s][A
 31%|████████████▊                             | 72/236 [00:31<01:12,  2.28it/s][A
 31%|████████████▉                             | 73/236 [00:31<01:10,  2.30it/s][A
 31%|█████████████▏                            | 74/236 [00:31<01:10,  2.30it/s][A
 32%|█████████████▎                            | 75/236 [00:32<01:09,  2.30it/s][A
 32%|█████████████▌                            | 76/236 [00:32<01:09,  2.32it/s][A
 33%|█████████████▋                            | 77/236 [00:33<01:08,  2.30it/s][A
 33%|█████████████▉                            | 78/236 [00:33<01:08,  2.30it/s][A
 33%|██████████████                            | 79/236 [00:34<01:08,  2.30it/s][A
 34%|██████████████▏                           | 80/236 [00:34<01:07,  2.32it/s][A
 34%|██████████████▍                           | 81/236 [00:34<01:06,  2.32it/s][A
 35%|██████████████▌                           | 82/236 [00:35<01:07,  2.30i

 71%|█████████████████████████████▏           | 168/236 [01:12<00:29,  2.30it/s][A
 72%|█████████████████████████████▎           | 169/236 [01:13<00:29,  2.30it/s][A
 72%|█████████████████████████████▌           | 170/236 [01:13<00:28,  2.30it/s][A
 72%|█████████████████████████████▋           | 171/236 [01:14<00:28,  2.31it/s][A
 73%|█████████████████████████████▉           | 172/236 [01:14<00:27,  2.30it/s][A
 73%|██████████████████████████████           | 173/236 [01:15<00:27,  2.28it/s][A
 74%|██████████████████████████████▏          | 174/236 [01:15<00:27,  2.29it/s][A
 74%|██████████████████████████████▍          | 175/236 [01:15<00:26,  2.31it/s][A
 75%|██████████████████████████████▌          | 176/236 [01:16<00:26,  2.30it/s][A
 75%|██████████████████████████████▊          | 177/236 [01:16<00:25,  2.32it/s][A
 75%|██████████████████████████████▉          | 178/236 [01:17<00:24,  2.33it/s][A
 76%|███████████████████████████████          | 179/236 [01:17<00:24,  2.33i

  6%|██▎                                       | 13/236 [00:05<01:32,  2.41it/s][A
  6%|██▍                                       | 14/236 [00:05<01:32,  2.40it/s][A
  6%|██▋                                       | 15/236 [00:05<01:32,  2.39it/s][A
  7%|██▊                                       | 16/236 [00:06<01:31,  2.39it/s][A
  7%|███                                       | 17/236 [00:06<01:31,  2.38it/s][A
  8%|███▏                                      | 18/236 [00:07<01:31,  2.38it/s][A
  8%|███▍                                      | 19/236 [00:07<01:31,  2.37it/s][A
  8%|███▌                                      | 20/236 [00:08<01:31,  2.37it/s][A
  9%|███▋                                      | 21/236 [00:08<01:30,  2.37it/s][A
  9%|███▉                                      | 22/236 [00:08<01:30,  2.37it/s][A
 10%|████                                      | 23/236 [00:09<01:29,  2.37it/s][A
 10%|████▎                                     | 24/236 [00:09<01:29,  2.36i

 47%|███████████████████                      | 110/236 [00:46<00:52,  2.38it/s][A
 47%|███████████████████▎                     | 111/236 [00:46<00:52,  2.38it/s][A
 47%|███████████████████▍                     | 112/236 [00:46<00:52,  2.38it/s][A
 48%|███████████████████▋                     | 113/236 [00:47<00:51,  2.38it/s][A
 48%|███████████████████▊                     | 114/236 [00:47<00:51,  2.37it/s][A
 49%|███████████████████▉                     | 115/236 [00:48<00:51,  2.37it/s][A
 49%|████████████████████▏                    | 116/236 [00:48<00:50,  2.37it/s][A
 50%|████████████████████▎                    | 117/236 [00:48<00:50,  2.37it/s][A
 50%|████████████████████▌                    | 118/236 [00:49<00:49,  2.37it/s][A
 50%|████████████████████▋                    | 119/236 [00:49<00:49,  2.37it/s][A
 51%|████████████████████▊                    | 120/236 [00:50<00:48,  2.38it/s][A
 51%|█████████████████████                    | 121/236 [00:50<00:48,  2.38i

 88%|███████████████████████████████████▉     | 207/236 [01:26<00:12,  2.38it/s][A
 88%|████████████████████████████████████▏    | 208/236 [01:27<00:11,  2.38it/s][A
 89%|████████████████████████████████████▎    | 209/236 [01:27<00:11,  2.38it/s][A
 89%|████████████████████████████████████▍    | 210/236 [01:28<00:10,  2.38it/s][A
 89%|████████████████████████████████████▋    | 211/236 [01:28<00:10,  2.36it/s][A
 90%|████████████████████████████████████▊    | 212/236 [01:29<00:10,  2.36it/s][A
 90%|█████████████████████████████████████    | 213/236 [01:29<00:09,  2.35it/s][A
 91%|█████████████████████████████████████▏   | 214/236 [01:29<00:09,  2.36it/s][A
 91%|█████████████████████████████████████▎   | 215/236 [01:30<00:08,  2.37it/s][A
 92%|█████████████████████████████████████▌   | 216/236 [01:30<00:08,  2.37it/s][A
 92%|█████████████████████████████████████▋   | 217/236 [01:31<00:08,  2.37it/s][A
 92%|█████████████████████████████████████▊   | 218/236 [01:31<00:07,  2.37i

 22%|█████████▎                                | 52/236 [00:22<01:21,  2.26it/s][A
 22%|█████████▍                                | 53/236 [00:22<01:20,  2.27it/s][A
 23%|█████████▌                                | 54/236 [00:23<01:20,  2.27it/s][A
 23%|█████████▊                                | 55/236 [00:23<01:19,  2.26it/s][A
 24%|█████████▉                                | 56/236 [00:24<01:19,  2.26it/s][A
 24%|██████████▏                               | 57/236 [00:24<01:19,  2.25it/s][A
 25%|██████████▎                               | 58/236 [00:25<01:19,  2.25it/s][A
 25%|██████████▌                               | 59/236 [00:25<01:18,  2.26it/s][A
 25%|██████████▋                               | 60/236 [00:26<01:17,  2.26it/s][A
 26%|██████████▊                               | 61/236 [00:26<01:17,  2.26it/s][A
 26%|███████████                               | 62/236 [00:26<01:17,  2.24it/s][A
 27%|███████████▏                              | 63/236 [00:27<01:16,  2.26i

 63%|█████████████████████████▉               | 149/236 [01:07<00:41,  2.10it/s][A
 64%|██████████████████████████               | 150/236 [01:08<00:40,  2.12it/s][A
 64%|██████████████████████████▏              | 151/236 [01:08<00:41,  2.07it/s][A
 64%|██████████████████████████▍              | 152/236 [01:09<00:39,  2.12it/s][A
 65%|██████████████████████████▌              | 153/236 [01:09<00:38,  2.14it/s][A
 65%|██████████████████████████▊              | 154/236 [01:10<00:37,  2.19it/s][A
 66%|██████████████████████████▉              | 155/236 [01:10<00:36,  2.23it/s][A
 66%|███████████████████████████              | 156/236 [01:11<00:35,  2.25it/s][A
 67%|███████████████████████████▎             | 157/236 [01:11<00:34,  2.28it/s][A
 67%|███████████████████████████▍             | 158/236 [01:11<00:34,  2.26it/s][A
 67%|███████████████████████████▌             | 159/236 [01:12<00:33,  2.29it/s][A
 68%|███████████████████████████▊             | 160/236 [01:12<00:32,  2.31i


  0%|                                                   | 0/236 [00:00<?, ?it/s][A
  1%|▎                                          | 2/236 [00:00<00:51,  4.59it/s][A
  1%|▌                                          | 3/236 [00:00<01:08,  3.39it/s][A
  2%|▋                                          | 4/236 [00:01<01:18,  2.96it/s][A
  2%|▉                                          | 5/236 [00:01<01:25,  2.69it/s][A
  3%|█                                          | 6/236 [00:02<01:28,  2.58it/s][A
  3%|█▎                                         | 7/236 [00:02<01:31,  2.50it/s][A
  3%|█▍                                         | 8/236 [00:03<01:33,  2.44it/s][A
  4%|█▋                                         | 9/236 [00:03<01:34,  2.40it/s][A
  4%|█▊                                        | 10/236 [00:03<01:35,  2.37it/s][A
  5%|█▉                                        | 11/236 [00:04<01:36,  2.34it/s][A
  5%|██▏                                       | 12/236 [00:04<01:41,  2.21

 42%|█████████████████▍                        | 98/236 [00:42<00:59,  2.31it/s][A
 42%|█████████████████▌                        | 99/236 [00:42<00:59,  2.31it/s][A
 42%|█████████████████▎                       | 100/236 [00:43<00:59,  2.28it/s][A
 43%|█████████████████▌                       | 101/236 [00:43<00:59,  2.28it/s][A
 43%|█████████████████▋                       | 102/236 [00:44<00:58,  2.27it/s][A
 44%|█████████████████▉                       | 103/236 [00:44<00:58,  2.28it/s][A
 44%|██████████████████                       | 104/236 [00:44<00:57,  2.31it/s][A
 44%|██████████████████▏                      | 105/236 [00:45<00:56,  2.30it/s][A
 45%|██████████████████▍                      | 106/236 [00:45<00:56,  2.31it/s][A
 45%|██████████████████▌                      | 107/236 [00:46<00:55,  2.32it/s][A
 46%|██████████████████▊                      | 108/236 [00:46<00:54,  2.34it/s][A
 46%|██████████████████▉                      | 109/236 [00:47<00:54,  2.33i

 83%|█████████████████████████████████▉       | 195/236 [01:24<00:17,  2.31it/s][A
 83%|██████████████████████████████████       | 196/236 [01:24<00:17,  2.31it/s][A
 83%|██████████████████████████████████▏      | 197/236 [01:25<00:16,  2.31it/s][A
 84%|██████████████████████████████████▍      | 198/236 [01:25<00:16,  2.31it/s][A
 84%|██████████████████████████████████▌      | 199/236 [01:26<00:15,  2.32it/s][A
 85%|██████████████████████████████████▋      | 200/236 [01:26<00:15,  2.32it/s][A
 85%|██████████████████████████████████▉      | 201/236 [01:27<00:15,  2.32it/s][A
 86%|███████████████████████████████████      | 202/236 [01:27<00:14,  2.32it/s][A
 86%|███████████████████████████████████▎     | 203/236 [01:27<00:14,  2.32it/s][A
 86%|███████████████████████████████████▍     | 204/236 [01:28<00:13,  2.31it/s][A
 87%|███████████████████████████████████▌     | 205/236 [01:28<00:13,  2.31it/s][A
 87%|███████████████████████████████████▊     | 206/236 [01:29<00:13,  2.30i

 17%|███████                                   | 40/236 [00:18<01:41,  1.93it/s][A
 17%|███████▎                                  | 41/236 [00:18<01:38,  1.98it/s][A
 18%|███████▍                                  | 42/236 [00:18<01:35,  2.04it/s][A
 18%|███████▋                                  | 43/236 [00:19<01:32,  2.08it/s][A
 19%|███████▊                                  | 44/236 [00:19<01:30,  2.13it/s][A
 19%|████████                                  | 45/236 [00:20<01:29,  2.14it/s][A
 19%|████████▏                                 | 46/236 [00:20<01:28,  2.15it/s][A
 20%|████████▎                                 | 47/236 [00:21<01:26,  2.18it/s][A
 20%|████████▌                                 | 48/236 [00:21<01:25,  2.19it/s][A
 21%|████████▋                                 | 49/236 [00:22<01:25,  2.19it/s][A
 21%|████████▉                                 | 50/236 [00:22<01:24,  2.20it/s][A
 22%|█████████                                 | 51/236 [00:23<01:23,  2.20i

 58%|███████████████████████▊                 | 137/236 [01:02<00:43,  2.30it/s][A
 58%|███████████████████████▉                 | 138/236 [01:02<00:42,  2.30it/s][A
 59%|████████████████████████▏                | 139/236 [01:03<00:42,  2.30it/s][A
 59%|████████████████████████▎                | 140/236 [01:03<00:41,  2.30it/s][A
 60%|████████████████████████▍                | 141/236 [01:03<00:41,  2.30it/s][A
 60%|████████████████████████▋                | 142/236 [01:04<00:40,  2.31it/s][A
 61%|████████████████████████▊                | 143/236 [01:04<00:40,  2.31it/s][A
 61%|█████████████████████████                | 144/236 [01:05<00:39,  2.32it/s][A
 61%|█████████████████████████▏               | 145/236 [01:05<00:39,  2.32it/s][A
 62%|█████████████████████████▎               | 146/236 [01:06<00:38,  2.32it/s][A
 62%|█████████████████████████▌               | 147/236 [01:06<00:38,  2.32it/s][A
 63%|█████████████████████████▋               | 148/236 [01:07<00:38,  2.30i

 99%|████████████████████████████████████████▋| 234/236 [01:44<00:00,  2.19it/s][A
100%|████████████████████████████████████████▊| 235/236 [01:44<00:00,  2.23it/s][A
100%|█████████████████████████████████████████| 236/236 [01:45<00:00,  2.68it/s][A10/13/2022 20:10:58 - INFO - /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/datasets/metric.py -   Removing /Users/shreyaprabhu/.cache/huggingface/metrics/f1/default/default_experiment-1-0.arrow
                                                                                
[A{'eval_loss': 0.18179234862327576, 'eval_f1': 0.7607655502392345, 'eval_runtime': 106.407, 'eval_samples_per_second': 17.696, 'eval_steps_per_second': 2.218, 'epoch': 1.8}
 90%|███████████████████████████████████▏   | 850/942 [1:44:23<04:16,  2.79s/it]
100%|█████████████████████████████████████████| 236/236 [01:45<00:00,  2.68it/s][A
{'loss': 0.171, 'learning_rate': 4.45859872611465e-07, 'epoch': 1.91}           [A
 96%|█████████████

 33%|██████████████                            | 79/236 [00:35<01:12,  2.18it/s][A
 34%|██████████████▏                           | 80/236 [00:36<01:11,  2.17it/s][A
 34%|██████████████▍                           | 81/236 [00:36<01:10,  2.20it/s][A
 35%|██████████████▌                           | 82/236 [00:37<01:10,  2.20it/s][A
 35%|██████████████▊                           | 83/236 [00:37<01:09,  2.21it/s][A
 36%|██████████████▉                           | 84/236 [00:37<01:08,  2.22it/s][A
 36%|███████████████▏                          | 85/236 [00:38<01:08,  2.21it/s][A
 36%|███████████████▎                          | 86/236 [00:38<01:08,  2.20it/s][A
 37%|███████████████▍                          | 87/236 [00:39<01:07,  2.21it/s][A
 37%|███████████████▋                          | 88/236 [00:39<01:06,  2.21it/s][A
 38%|███████████████▊                          | 89/236 [00:40<01:06,  2.20it/s][A
 38%|████████████████                          | 90/236 [00:40<01:06,  2.19i

 75%|██████████████████████████████▌          | 176/236 [01:19<00:27,  2.18it/s][A
 75%|██████████████████████████████▊          | 177/236 [01:20<00:26,  2.20it/s][A
 75%|██████████████████████████████▉          | 178/236 [01:20<00:26,  2.20it/s][A
 76%|███████████████████████████████          | 179/236 [01:21<00:25,  2.20it/s][A
 76%|███████████████████████████████▎         | 180/236 [01:21<00:25,  2.20it/s][A
 77%|███████████████████████████████▍         | 181/236 [01:22<00:25,  2.19it/s][A
 77%|███████████████████████████████▌         | 182/236 [01:22<00:24,  2.19it/s][A
 78%|███████████████████████████████▊         | 183/236 [01:23<00:24,  2.19it/s][A
 78%|███████████████████████████████▉         | 184/236 [01:23<00:23,  2.18it/s][A
 78%|████████████████████████████████▏        | 185/236 [01:24<00:23,  2.20it/s][A
 79%|████████████████████████████████▎        | 186/236 [01:24<00:22,  2.20it/s][A
 79%|████████████████████████████████▍        | 187/236 [01:24<00:22,  2.19i