# All Data

In [2]:
f1 = load_metric("f1")
recall = load_metric("recall")
precision =  load_metric("precision")
def preprocess_function(examples, tok):
    return tok(examples["text"], truncation=True, max_length=512)


def compute_metrics(p):    
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    metric = f1.compute(predictions=predictions, references=labels, average="macro")
    metric.update(recall.compute(predictions=predictions, references=labels, average="macro"))
    metric.update(precision.compute(predictions=predictions, references=labels, average="macro"))
    return metric
data = getData(sub_task = f"A", return_type = "dataset", pre_proccessed = True)
tokenizer = AutoTokenizer.from_pretrained("/scratch/mt/ashapiro/Hate_Speech/Models/Marbertv2")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
tokenized_data = data.map(preprocess_function,fn_kwargs = {'tok':tokenizer}, batched=True)

100%|██████████| 2/2 [00:00<00:00, 559.05it/s]


In [3]:
data

DatasetDict({
    train: Dataset({
        features: ['text', 'labels'],
        num_rows: 44324
    })
    test: Dataset({
        features: ['text', 'labels'],
        num_rows: 1270
    })
})

In [4]:
model = AutoModelForSequenceClassification.from_pretrained("/scratch/mt/ashapiro/Hate_Speech/Models/Marbertv2", num_labels=2)
training_args = TrainingArguments(
    output_dir="/scratch/mt/ashapiro/Hate_Speech/All_data_run/",
    learning_rate=2e-05,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=4,
    evaluation_strategy="epoch",
    save_strategy = "epoch",
    metric_for_best_model="f1",
    num_train_epochs=100,
    weight_decay=0.001,
    load_best_model_at_end=True,
    group_by_length = True, 
    seed = 20,
    report_to = 'none'
    )
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=10)]
)

Some weights of the model checkpoint at /scratch/mt/ashapiro/Hate_Speech/Models/Marbertv2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequence

In [5]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 44324
  Num Epochs = 100
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 277100
  0%|          | 501/277100 [00:34<5:49:27, 13.19it/s]

{'loss': 0.4942, 'learning_rate': 1.9963911945146156e-05, 'epoch': 0.18}


  0%|          | 1001/277100 [01:09<5:21:08, 14.33it/s]

{'loss': 0.4453, 'learning_rate': 1.9927823890292315e-05, 'epoch': 0.36}


  1%|          | 1501/277100 [01:42<5:35:32, 13.69it/s]

{'loss': 0.4208, 'learning_rate': 1.989173583543847e-05, 'epoch': 0.54}


  1%|          | 2001/277100 [02:17<5:58:16, 12.80it/s]

{'loss': 0.4081, 'learning_rate': 1.9855647780584627e-05, 'epoch': 0.72}


  1%|          | 2501/277100 [02:50<5:21:24, 14.24it/s]

{'loss': 0.4101, 'learning_rate': 1.9819559725730782e-05, 'epoch': 0.9}


  1%|          | 2771/277100 [03:08<4:34:45, 16.64it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 9/318 [00:00<00:03, 87.67it/s][A
  6%|▌         | 18/318 [00:00<00:03, 81.05it/s][A
  8%|▊         | 27/318 [00:00<00:03, 78.72it/s][A
 11%|█         | 35/318 [00:00<00:03, 72.30it/s][A
 14%|█▎        | 43/318 [00:00<00:03, 72.81it/s][A
 16%|█▌        | 51/318 [00:00<00:03, 73.57it/s][A
 19%|█▊        | 59/318 [00:00<00:03, 68.93it/s][A
 21%|██        | 66/318 [00:00<00:03, 68.16it/s][A
 23%|██▎       | 74/318 [00:01<00:03, 70.95it/s][A
 26%|██▌       | 82/318 [00:01<00:03, 71.55it/s][A
 28%|██▊       | 90/318 [00:01<00:03, 73.50it/s][A
 

{'eval_loss': 0.340757817029953, 'eval_f1': 0.8346165439057007, 'eval_recall': 0.8272414423890426, 'eval_precision': 0.8438138138138138, 'eval_runtime': 4.5693, 'eval_samples_per_second': 277.944, 'eval_steps_per_second': 69.595, 'epoch': 1.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-2771/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-2771/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-2771/special_tokens_map.json
  1%|          | 3003/277100 [03:32<4:37:52, 16.44it/s] 

{'loss': 0.3413, 'learning_rate': 1.978347167087694e-05, 'epoch': 1.08}


  1%|▏         | 3503/277100 [04:06<5:00:02, 15.20it/s]

{'loss': 0.2763, 'learning_rate': 1.97473836160231e-05, 'epoch': 1.26}


  1%|▏         | 4001/277100 [04:39<5:04:20, 14.96it/s]

{'loss': 0.2799, 'learning_rate': 1.9711295561169253e-05, 'epoch': 1.44}


  2%|▏         | 4503/277100 [05:13<4:50:13, 15.65it/s]

{'loss': 0.2818, 'learning_rate': 1.967520750631541e-05, 'epoch': 1.62}


  2%|▏         | 5003/277100 [05:46<4:49:21, 15.67it/s]

{'loss': 0.2852, 'learning_rate': 1.963911945146157e-05, 'epoch': 1.8}


  2%|▏         | 5503/277100 [06:20<4:43:51, 15.95it/s]

{'loss': 0.2754, 'learning_rate': 1.9603031396607724e-05, 'epoch': 1.98}


  2%|▏         | 5541/277100 [06:22<4:25:56, 17.02it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 8/318 [00:00<00:03, 79.57it/s][A
  5%|▌         | 16/318 [00:00<00:04, 71.88it/s][A
  8%|▊         | 24/318 [00:00<00:04, 70.75it/s][A
 10%|█         | 32/318 [00:00<00:04, 66.46it/s][A
 12%|█▏        | 39/318 [00:00<00:04, 67.01it/s][A
 14%|█▍        | 46/318 [00:00<00:04, 67.86it/s][A
 17%|█▋        | 53/318 [00:00<00:04, 65.66it/s][A
 19%|█▉        | 60/318 [00:00<00:03, 64.70it/s][A
 21%|██        | 67/318 [00:00<00:03, 65.44it/s][A
 24%|██▎       | 75/318 [00:01<00:03, 67.07it/s][A
 26%|██▌       | 82/318 [00:01<00:03, 66.65it/s][A
 

{'eval_loss': 0.4675087332725525, 'eval_f1': 0.8245707611552864, 'eval_recall': 0.8411154048430247, 'eval_precision': 0.8152444951608131, 'eval_runtime': 4.7072, 'eval_samples_per_second': 269.801, 'eval_steps_per_second': 67.556, 'epoch': 2.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-5542/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-5542/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-5542/special_tokens_map.json
  2%|▏         | 6001/277100 [07:02<5:28:58, 13.73it/s]  

{'loss': 0.177, 'learning_rate': 1.9566943341753882e-05, 'epoch': 2.17}


  2%|▏         | 6501/277100 [07:36<5:30:56, 13.63it/s]

{'loss': 0.1837, 'learning_rate': 1.9530855286900037e-05, 'epoch': 2.35}


  3%|▎         | 7001/277100 [08:09<5:31:26, 13.58it/s]

{'loss': 0.1914, 'learning_rate': 1.9494767232046195e-05, 'epoch': 2.53}


  3%|▎         | 7501/277100 [08:42<5:33:09, 13.49it/s]

{'loss': 0.1764, 'learning_rate': 1.945867917719235e-05, 'epoch': 2.71}


  3%|▎         | 8001/277100 [09:16<5:28:58, 13.63it/s]

{'loss': 0.1872, 'learning_rate': 1.9422591122338508e-05, 'epoch': 2.89}


  3%|▎         | 8313/277100 [09:37<4:38:34, 16.08it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 8/318 [00:00<00:04, 74.68it/s][A
  5%|▌         | 16/318 [00:00<00:04, 67.96it/s][A
  7%|▋         | 23/318 [00:00<00:04, 66.46it/s][A
 10%|▉         | 31/318 [00:00<00:04, 68.57it/s][A
 12%|█▏        | 39/318 [00:00<00:03, 69.90it/s][A
 14%|█▍        | 46/318 [00:00<00:03, 68.65it/s][A
 17%|█▋        | 53/318 [00:00<00:04, 62.08it/s][A
 19%|█▉        | 60/318 [00:00<00:04, 63.60it/s][A
 21%|██        | 67/318 [00:01<00:03, 63.60it/s][A
 23%|██▎       | 74/318 [00:01<00:03, 61.48it/s][A
 25%|██▌       | 81/318 [00:01<00:03, 60.19it/s][A
 

{'eval_loss': 0.5474981069564819, 'eval_f1': 0.8307569296375266, 'eval_recall': 0.8298853268698694, 'eval_precision': 0.8316522988505747, 'eval_runtime': 5.2906, 'eval_samples_per_second': 240.047, 'eval_steps_per_second': 60.106, 'epoch': 3.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-8313/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-8313/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-8313/special_tokens_map.json
  3%|▎         | 8503/277100 [09:59<4:34:50, 16.29it/s]  

{'loss': 0.176, 'learning_rate': 1.9386503067484663e-05, 'epoch': 3.07}


  3%|▎         | 9003/277100 [10:32<4:29:41, 16.57it/s]

{'loss': 0.129, 'learning_rate': 1.935041501263082e-05, 'epoch': 3.25}


  3%|▎         | 9503/277100 [11:06<4:28:26, 16.61it/s]

{'loss': 0.1407, 'learning_rate': 1.9314326957776976e-05, 'epoch': 3.43}


  4%|▎         | 10003/277100 [11:39<4:26:52, 16.68it/s]

{'loss': 0.1284, 'learning_rate': 1.9278238902923134e-05, 'epoch': 3.61}


  4%|▍         | 10503/277100 [12:12<4:29:07, 16.51it/s]

{'loss': 0.1428, 'learning_rate': 1.924215084806929e-05, 'epoch': 3.79}


  4%|▍         | 11003/277100 [12:46<4:16:44, 17.27it/s]

{'loss': 0.1542, 'learning_rate': 1.9206062793215447e-05, 'epoch': 3.97}


  4%|▍         | 11083/277100 [12:51<4:48:45, 15.35it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 9/318 [00:00<00:03, 84.81it/s][A
  6%|▌         | 18/318 [00:00<00:03, 75.69it/s][A
  8%|▊         | 26/318 [00:00<00:03, 75.08it/s][A
 11%|█         | 34/318 [00:00<00:03, 73.54it/s][A
 13%|█▎        | 42/318 [00:00<00:03, 73.68it/s][A
 16%|█▌        | 50/318 [00:00<00:03, 73.45it/s][A
 18%|█▊        | 58/318 [00:00<00:03, 69.64it/s][A
 21%|██        | 66/318 [00:00<00:03, 70.39it/s][A
 23%|██▎       | 74/318 [00:01<00:03, 71.22it/s][A
 26%|██▌       | 82/318 [00:01<00:03, 72.04it/s][A
 28%|██▊       | 90/318 [00:01<00:03, 72.93it/s][A


{'eval_loss': 0.7246872186660767, 'eval_f1': 0.8258574137130198, 'eval_recall': 0.8243574646148217, 'eval_precision': 0.827429951439923, 'eval_runtime': 4.4838, 'eval_samples_per_second': 283.24, 'eval_steps_per_second': 70.922, 'epoch': 4.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-11084/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-11084/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-11084/special_tokens_map.json
  4%|▍         | 11503/277100 [13:27<5:15:05, 14.05it/s] 

{'loss': 0.1018, 'learning_rate': 1.91699747383616e-05, 'epoch': 4.15}


  4%|▍         | 12003/277100 [14:00<4:48:04, 15.34it/s]

{'loss': 0.0929, 'learning_rate': 1.913388668350776e-05, 'epoch': 4.33}


  5%|▍         | 12503/277100 [14:32<4:49:24, 15.24it/s]

{'loss': 0.097, 'learning_rate': 1.9097798628653918e-05, 'epoch': 4.51}


  5%|▍         | 13003/277100 [15:06<5:00:15, 14.66it/s]

{'loss': 0.1094, 'learning_rate': 1.9061710573800073e-05, 'epoch': 4.69}


  5%|▍         | 13503/277100 [15:38<4:43:52, 15.48it/s]

{'loss': 0.1234, 'learning_rate': 1.902562251894623e-05, 'epoch': 4.87}


  5%|▌         | 13855/277100 [16:01<4:26:14, 16.48it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 9/318 [00:00<00:03, 86.59it/s][A
  6%|▌         | 18/318 [00:00<00:04, 73.82it/s][A
  8%|▊         | 26/318 [00:00<00:04, 72.38it/s][A
 11%|█         | 34/318 [00:00<00:03, 73.49it/s][A
 13%|█▎        | 42/318 [00:00<00:03, 71.98it/s][A
 16%|█▌        | 50/318 [00:00<00:03, 73.34it/s][A
 18%|█▊        | 58/318 [00:00<00:03, 73.97it/s][A
 21%|██        | 66/318 [00:00<00:03, 72.14it/s][A
 23%|██▎       | 74/318 [00:01<00:03, 72.12it/s][A
 26%|██▌       | 82/318 [00:01<00:03, 71.02it/s][A
 28%|██▊       | 90/318 [00:01<00:03, 71.93it/s][A


{'eval_loss': 0.9343557953834534, 'eval_f1': 0.8175559818865169, 'eval_recall': 0.8207303409324767, 'eval_precision': 0.8146992587689901, 'eval_runtime': 4.608, 'eval_samples_per_second': 275.607, 'eval_steps_per_second': 69.01, 'epoch': 5.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-13855/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-13855/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-13855/special_tokens_map.json
  5%|▌         | 14003/277100 [16:19<4:16:05, 17.12it/s] 

{'loss': 0.0916, 'learning_rate': 1.898953446409239e-05, 'epoch': 5.05}


  5%|▌         | 14503/277100 [16:52<4:36:30, 15.83it/s]

{'loss': 0.0737, 'learning_rate': 1.8953446409238544e-05, 'epoch': 5.23}


  5%|▌         | 15003/277100 [17:25<4:17:19, 16.98it/s]

{'loss': 0.0824, 'learning_rate': 1.8917358354384702e-05, 'epoch': 5.41}


  6%|▌         | 15503/277100 [17:58<4:31:38, 16.05it/s]

{'loss': 0.0813, 'learning_rate': 1.8881270299530857e-05, 'epoch': 5.59}


  6%|▌         | 16003/277100 [18:30<4:11:26, 17.31it/s]

{'loss': 0.0755, 'learning_rate': 1.8845182244677015e-05, 'epoch': 5.77}


  6%|▌         | 16501/277100 [19:03<4:23:36, 16.48it/s]

{'loss': 0.1019, 'learning_rate': 1.880909418982317e-05, 'epoch': 5.95}


  6%|▌         | 16625/277100 [19:12<4:24:51, 16.39it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 9/318 [00:00<00:03, 83.18it/s][A
  6%|▌         | 18/318 [00:00<00:04, 70.09it/s][A
  8%|▊         | 26/318 [00:00<00:04, 71.70it/s][A
 11%|█         | 34/318 [00:00<00:04, 70.94it/s][A
 13%|█▎        | 42/318 [00:00<00:03, 70.05it/s][A
 16%|█▌        | 50/318 [00:00<00:03, 71.34it/s][A
 18%|█▊        | 58/318 [00:00<00:03, 70.96it/s][A
 21%|██        | 66/318 [00:00<00:03, 72.36it/s][A
 23%|██▎       | 74/318 [00:01<00:03, 73.15it/s][A
 26%|██▌       | 82/318 [00:01<00:03, 72.56it/s][A
 28%|██▊       | 90/318 [00:01<00:03, 73.23it/s][A


{'eval_loss': 0.9548495411872864, 'eval_f1': 0.8160330009411993, 'eval_recall': 0.8104949351748107, 'eval_precision': 0.8226717677426254, 'eval_runtime': 4.639, 'eval_samples_per_second': 273.768, 'eval_steps_per_second': 68.55, 'epoch': 6.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-16626/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-16626/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-16626/special_tokens_map.json
  6%|▌         | 17003/277100 [19:45<4:21:46, 16.56it/s] 

{'loss': 0.0642, 'learning_rate': 1.8773006134969328e-05, 'epoch': 6.13}


  6%|▋         | 17503/277100 [20:18<4:23:21, 16.43it/s]

{'loss': 0.0621, 'learning_rate': 1.8736918080115482e-05, 'epoch': 6.32}


  6%|▋         | 18003/277100 [20:51<4:32:03, 15.87it/s]

{'loss': 0.0759, 'learning_rate': 1.870083002526164e-05, 'epoch': 6.5}


  7%|▋         | 18503/277100 [21:24<4:37:43, 15.52it/s]

{'loss': 0.0743, 'learning_rate': 1.8664741970407795e-05, 'epoch': 6.68}


  7%|▋         | 19003/277100 [21:56<4:17:10, 16.73it/s]

{'loss': 0.0769, 'learning_rate': 1.8628653915553953e-05, 'epoch': 6.86}


  7%|▋         | 19397/277100 [22:21<4:05:23, 17.50it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 9/318 [00:00<00:03, 80.15it/s][A
  6%|▌         | 18/318 [00:00<00:03, 79.24it/s][A
  8%|▊         | 26/318 [00:00<00:03, 78.74it/s][A
 11%|█         | 34/318 [00:00<00:03, 78.48it/s][A
 13%|█▎        | 42/318 [00:00<00:03, 78.09it/s][A
 16%|█▌        | 51/318 [00:00<00:03, 78.95it/s][A
 19%|█▊        | 59/318 [00:00<00:03, 78.66it/s][A
 21%|██        | 67/318 [00:00<00:03, 78.10it/s][A
 24%|██▎       | 75/318 [00:00<00:03, 77.64it/s][A
 26%|██▌       | 83/318 [00:01<00:03, 77.59it/s][A
 29%|██▊       | 91/318 [00:01<00:02, 77.31it/s][A


{'eval_loss': 0.9339761137962341, 'eval_f1': 0.8044126134925744, 'eval_recall': 0.8143821599249994, 'eval_precision': 0.7974852327065798, 'eval_runtime': 4.2481, 'eval_samples_per_second': 298.955, 'eval_steps_per_second': 74.857, 'epoch': 7.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-19397/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-19397/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-19397/special_tokens_map.json
  7%|▋         | 19501/277100 [22:35<5:08:31, 13.92it/s] 

{'loss': 0.0683, 'learning_rate': 1.8592565860700108e-05, 'epoch': 7.04}


  7%|▋         | 20001/277100 [23:07<5:58:39, 11.95it/s]

{'loss': 0.0486, 'learning_rate': 1.8556477805846266e-05, 'epoch': 7.22}


  7%|▋         | 20501/277100 [23:39<5:25:47, 13.13it/s]

{'loss': 0.0536, 'learning_rate': 1.852038975099242e-05, 'epoch': 7.4}


  8%|▊         | 21001/277100 [24:11<5:11:59, 13.68it/s]

{'loss': 0.0594, 'learning_rate': 1.848430169613858e-05, 'epoch': 7.58}


  8%|▊         | 21501/277100 [24:43<5:19:42, 13.32it/s]

{'loss': 0.0684, 'learning_rate': 1.8448213641284734e-05, 'epoch': 7.76}


  8%|▊         | 22001/277100 [25:15<5:12:48, 13.59it/s]

{'loss': 0.0741, 'learning_rate': 1.8412125586430892e-05, 'epoch': 7.94}


  8%|▊         | 22167/277100 [25:25<4:24:04, 16.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 8/318 [00:00<00:04, 75.96it/s][A
  5%|▌         | 16/318 [00:00<00:03, 75.73it/s][A
  8%|▊         | 24/318 [00:00<00:03, 76.12it/s][A
 10%|█         | 32/318 [00:00<00:03, 77.14it/s][A
 13%|█▎        | 40/318 [00:00<00:03, 77.43it/s][A
 15%|█▌        | 48/318 [00:00<00:03, 76.93it/s][A
 18%|█▊        | 56/318 [00:00<00:03, 77.24it/s][A
 20%|██        | 64/318 [00:00<00:03, 77.08it/s][A
 23%|██▎       | 72/318 [00:00<00:03, 77.43it/s][A
 25%|██▌       | 80/318 [00:01<00:03, 77.59it/s][A
 28%|██▊       | 88/318 [00:01<00:02, 77.56it/s][A


{'eval_loss': 1.1044666767120361, 'eval_f1': 0.8057255244755245, 'eval_recall': 0.8001823565728396, 'eval_precision': 0.8124345518587273, 'eval_runtime': 4.2324, 'eval_samples_per_second': 300.065, 'eval_steps_per_second': 75.134, 'epoch': 8.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-22168/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-22168/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-22168/special_tokens_map.json
  8%|▊         | 22503/277100 [25:54<4:19:35, 16.35it/s] 

{'loss': 0.0505, 'learning_rate': 1.837603753157705e-05, 'epoch': 8.12}


  8%|▊         | 23003/277100 [26:26<4:06:15, 17.20it/s]

{'loss': 0.0457, 'learning_rate': 1.8339949476723205e-05, 'epoch': 8.3}


  8%|▊         | 23503/277100 [26:58<3:57:03, 17.83it/s]

{'loss': 0.0454, 'learning_rate': 1.8303861421869363e-05, 'epoch': 8.48}


  9%|▊         | 24003/277100 [27:30<4:01:31, 17.46it/s]

{'loss': 0.0605, 'learning_rate': 1.826777336701552e-05, 'epoch': 8.66}


  9%|▉         | 24503/277100 [28:02<4:01:03, 17.46it/s]

{'loss': 0.053, 'learning_rate': 1.8231685312161676e-05, 'epoch': 8.84}


  9%|▉         | 24939/277100 [28:30<4:30:21, 15.55it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 8/318 [00:00<00:04, 72.73it/s][A
  5%|▌         | 16/318 [00:00<00:04, 69.82it/s][A
  7%|▋         | 23/318 [00:00<00:04, 67.20it/s][A
  9%|▉         | 30/318 [00:00<00:04, 64.01it/s][A
 12%|█▏        | 37/318 [00:00<00:04, 60.33it/s][A
 14%|█▍        | 44/318 [00:00<00:04, 58.88it/s][A
 16%|█▌        | 51/318 [00:00<00:04, 60.18it/s][A
 18%|█▊        | 58/318 [00:00<00:04, 59.96it/s][A
 21%|██        | 66/318 [00:01<00:03, 63.41it/s][A
 23%|██▎       | 73/318 [00:01<00:03, 62.97it/s][A
 25%|██▌       | 80/318 [00:01<00:03, 63.70it/s][A


{'eval_loss': 1.0600095987319946, 'eval_f1': 0.7998386842568814, 'eval_recall': 0.8004338828802049, 'eval_precision': 0.7992560663476075, 'eval_runtime': 5.1071, 'eval_samples_per_second': 248.673, 'eval_steps_per_second': 62.266, 'epoch': 9.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-24939/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-24939/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-24939/special_tokens_map.json
  9%|▉         | 25001/277100 [28:43<4:54:54, 14.25it/s] 

{'loss': 0.056, 'learning_rate': 1.8195597257307834e-05, 'epoch': 9.02}


  9%|▉         | 25501/277100 [29:15<5:08:40, 13.58it/s]

{'loss': 0.0371, 'learning_rate': 1.815950920245399e-05, 'epoch': 9.2}


  9%|▉         | 26003/277100 [29:46<4:37:04, 15.10it/s]

{'loss': 0.0391, 'learning_rate': 1.8123421147600147e-05, 'epoch': 9.38}


 10%|▉         | 26501/277100 [30:18<5:14:39, 13.27it/s]

{'loss': 0.042, 'learning_rate': 1.8087333092746302e-05, 'epoch': 9.56}


 10%|▉         | 27003/277100 [30:50<4:43:05, 14.72it/s]

{'loss': 0.0519, 'learning_rate': 1.805124503789246e-05, 'epoch': 9.74}


 10%|▉         | 27503/277100 [31:21<4:40:22, 14.84it/s]

{'loss': 0.0526, 'learning_rate': 1.8015156983038615e-05, 'epoch': 9.92}


 10%|▉         | 27709/277100 [31:34<4:08:49, 16.70it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 9/318 [00:00<00:03, 79.69it/s][A
  5%|▌         | 17/318 [00:00<00:03, 77.69it/s][A
  8%|▊         | 25/318 [00:00<00:03, 77.04it/s][A
 10%|█         | 33/318 [00:00<00:03, 76.25it/s][A
 13%|█▎        | 41/318 [00:00<00:03, 76.65it/s][A
 15%|█▌        | 49/318 [00:00<00:03, 76.54it/s][A
 18%|█▊        | 57/318 [00:00<00:03, 76.68it/s][A
 20%|██        | 65/318 [00:00<00:03, 74.92it/s][A
 23%|██▎       | 73/318 [00:00<00:03, 73.51it/s][A
 25%|██▌       | 81/318 [00:01<00:03, 74.59it/s][A
 28%|██▊       | 90/318 [00:01<00:02, 76.34it/s][A


{'eval_loss': 1.1479723453521729, 'eval_f1': 0.7959932188479626, 'eval_recall': 0.8080311206640295, 'eval_precision': 0.7884292676660216, 'eval_runtime': 4.4094, 'eval_samples_per_second': 288.019, 'eval_steps_per_second': 72.118, 'epoch': 10.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-27710/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-27710/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-27710/special_tokens_map.json
 10%|█         | 28003/277100 [32:01<3:55:23, 17.64it/s] 

{'loss': 0.0358, 'learning_rate': 1.7979068928184773e-05, 'epoch': 10.1}


 10%|█         | 28503/277100 [32:33<3:56:13, 17.54it/s]

{'loss': 0.0331, 'learning_rate': 1.7942980873330928e-05, 'epoch': 10.29}


 10%|█         | 29003/277100 [33:06<3:58:54, 17.31it/s]

{'loss': 0.0407, 'learning_rate': 1.7906892818477086e-05, 'epoch': 10.47}


 11%|█         | 29503/277100 [33:38<4:00:08, 17.18it/s]

{'loss': 0.0413, 'learning_rate': 1.787080476362324e-05, 'epoch': 10.65}


 11%|█         | 30003/277100 [34:09<3:48:41, 18.01it/s]

{'loss': 0.039, 'learning_rate': 1.78347167087694e-05, 'epoch': 10.83}


 11%|█         | 30481/277100 [34:39<3:51:56, 17.72it/s]The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1270
  Batch size = 4

  0%|          | 0/318 [00:00<?, ?it/s][A
  3%|▎         | 9/318 [00:00<00:03, 88.79it/s][A
  6%|▌         | 18/318 [00:00<00:03, 81.51it/s][A
  8%|▊         | 27/318 [00:00<00:03, 79.33it/s][A
 11%|█         | 35/318 [00:00<00:03, 77.55it/s][A
 14%|█▎        | 43/318 [00:00<00:03, 76.55it/s][A
 16%|█▌        | 51/318 [00:00<00:03, 76.49it/s][A
 19%|█▊        | 59/318 [00:00<00:03, 76.02it/s][A
 21%|██        | 67/318 [00:00<00:03, 75.85it/s][A
 24%|██▎       | 75/318 [00:00<00:03, 75.74it/s][A
 26%|██▌       | 83/318 [00:01<00:03, 76.18it/s][A
 29%|██▊       | 91/318 [00:01<00:02, 76.19it/s][A


{'eval_loss': 1.3483388423919678, 'eval_f1': 0.7861308871851042, 'eval_recall': 0.800525346991974, 'eval_precision': 0.7781972990707617, 'eval_runtime': 4.3151, 'eval_samples_per_second': 294.316, 'eval_steps_per_second': 73.695, 'epoch': 11.0}


Model weights saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-30481/pytorch_model.bin
tokenizer config file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-30481/tokenizer_config.json
Special tokens file saved in /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-30481/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /scratch/mt/ashapiro/Hate_Speech/All_data_run/checkpoint-2771 (score: 0.8346165439057007).
 11%|█         | 30481/277100 [34:47<4:41:32, 14.60it/s]

{'train_runtime': 2087.8921, 'train_samples_per_second': 2122.907, 'train_steps_per_second': 132.718, 'train_loss': 0.13503938274884772, 'epoch': 11.0}





TrainOutput(global_step=30481, training_loss=0.13503938274884772, metrics={'train_runtime': 2087.8921, 'train_samples_per_second': 2122.907, 'train_steps_per_second': 132.718, 'train_loss': 0.13503938274884772, 'epoch': 11.0})