In [1]:
import os
os.environ['TRANSFORMERS_CACHE'] = '/mnt/software/'
import sys
import gc
# assuming data, models, engine in flicc directory:
flicc_path = os.path.join(os.path.dirname(os.getcwd()), '')
sys.path.append(flicc_path)
import torch
from data import ClimateDataset
from models import ClassificationModel
from engine import Engine

# import warnings
# warnings.filterwarnings("ignore")

In [2]:
best_config = {'bert-base-uncased':{'lr':5.0e-5},
                'roberta-large':{'lr':5.0e-5},
                'gpt2':{'lr':5.0e-5},
                'bigscience/bloom-560m':{'lr':5.0e-5},
                'facebook/opt-350m':{'lr':1.0e-5},
                'EleutherAI/gpt-neo-1.3B':{'lr':5.0e-5}, 
                'microsoft/deberta-base':{'lr':1.0e-5},
                'microsoft/deberta-v2-xlarge':{'lr':1.0e-5}}

In [3]:
results = {'test_acc':[],
           'test_f1':[],
           'eval_acc':[],
           'eval_f1':[],
           'g':[]}

In [4]:
gamma = [2,4,8,16]

for model_checkpoint in best_config.keys():
    for g in gamma:
        print(f'Grid search {model_checkpoint}, learning rate {best_config[model_checkpoint]["lr"]}')
        data = ClimateDataset(model_to_train=4,model_checkpoint=model_checkpoint,dataset_url=flicc_path,batch_size=32)
        data.setup_dataloaders()
        model = ClassificationModel(model_checkpoint=data.model_checkpoint,num_labels=data.num_labels)
        trainer = Engine(epochs=30,labels=data.labels)
        trainer.model = model.model
        trainer.dataset_encoded = data.dataset_encoded
        test_acc, test_f1, eval_acc, eval_f1 = trainer.run(lr=best_config[model_checkpoint]['lr'],
                                                            wd=0.0,
                                                            train_dataloader=data.train_dataloader,
                                                            eval_dataloader=data.eval_dataloader,
                                                            test_dataloader=data.test_dataloader,
                                                            focalloss=True,
                                                            gamma=g,
                                                            early_stop=3)
        results['test_acc'].append(test_acc)
        results['test_f1'].append(test_f1)
        results['eval_acc'].append(eval_acc)
        results['eval_f1'].append(eval_f1)
        results['g'].append(g)
        print('### '*10)
        print(results)
        print('### '*10)
        with torch.no_grad():
            torch.cuda.empty_cache()
        del data, model, trainer
        gc.collect()

Grid search bert-base-uncased, learning rate 5e-05


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	21.5878	Val Loss:	1.8216	Accuracy:	0.4026	F1:	0.3431 *
2 / 30: Train Loss:	12.3960	Val Loss:	1.6948	Accuracy:	0.4530	F1:	0.4332 *
3 / 30: Train Loss:	6.2709	Val Loss:	1.4211	Accuracy:	0.5252	F1:	0.5152 *
4 / 30: Train Loss:	2.7156	Val Loss:	1.2350	Accuracy:	0.6018	F1:	0.5858 *
5 / 30: Train Loss:	1.1768	Val Loss:	1.2149	Accuracy:	0.6368	F1:	0.6224 *
6 / 30: Train Loss:	0.5279	Val Loss:	1.1698	Accuracy:	0.6543	F1:	0.6415 *
7 / 30: Train Loss:	0.2480	Val Loss:	1.1532	Accuracy:	0.6630	F1:	0.6359
8 / 30: Train Loss:	0.1090	Val Loss:	1.1358	Accuracy:	0.6586	F1:	0.6308
9 / 30: Train Loss:	0.0473	Val Loss:	1.1311	Accuracy:	0.6630	F1:	0.6427 *
10 / 30: Train Loss:	0.0343	Val Loss:	1.1182	Accuracy:	0.6783	F1:	0.6574 *
11 / 30: Train Loss:	0.0320	Val Loss:	1.1245	Accuracy:	0.6696	F1:	0.6398
12 / 30: Train Loss:	0.0250	Val Loss:	1.1340	Accuracy:	0.6761	F1:	0.6510
13 / 30: Train Loss:	0.0174	Val Loss:	1.1267	Accuracy:	0.6761	F1:	0.6507
No improvement for 3 epochs. Stopping earl

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	17.7481	Val Loss:	1.8447	Accuracy:	0.4464	F1:	0.3860 *
2 / 30: Train Loss:	9.3680	Val Loss:	1.6075	Accuracy:	0.4989	F1:	0.4667 *
3 / 30: Train Loss:	4.2528	Val Loss:	1.4378	Accuracy:	0.5295	F1:	0.5149 *
4 / 30: Train Loss:	1.8639	Val Loss:	1.3205	Accuracy:	0.5558	F1:	0.5455 *
5 / 30: Train Loss:	0.8342	Val Loss:	1.2410	Accuracy:	0.6236	F1:	0.6083 *
6 / 30: Train Loss:	0.3459	Val Loss:	1.1654	Accuracy:	0.6346	F1:	0.6152 *
7 / 30: Train Loss:	0.1744	Val Loss:	1.1782	Accuracy:	0.6214	F1:	0.5916
8 / 30: Train Loss:	0.1141	Val Loss:	1.1194	Accuracy:	0.6521	F1:	0.6132
9 / 30: Train Loss:	0.0407	Val Loss:	1.1247	Accuracy:	0.6477	F1:	0.6089
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.74      0.58      0.65        67
               anecdote       0.85      0.81      0.83        43
         cherry picking       0.74      0.57      0.65 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	11.5883	Val Loss:	1.9160	Accuracy:	0.3982	F1:	0.3449 *
2 / 30: Train Loss:	4.9761	Val Loss:	1.7012	Accuracy:	0.4989	F1:	0.4820 *
3 / 30: Train Loss:	1.9743	Val Loss:	1.5282	Accuracy:	0.4814	F1:	0.4778
4 / 30: Train Loss:	0.7401	Val Loss:	1.3385	Accuracy:	0.5821	F1:	0.5565 *
5 / 30: Train Loss:	0.2250	Val Loss:	1.2818	Accuracy:	0.6039	F1:	0.5866 *
6 / 30: Train Loss:	0.0874	Val Loss:	1.2403	Accuracy:	0.6368	F1:	0.6145 *
7 / 30: Train Loss:	0.0601	Val Loss:	1.2451	Accuracy:	0.6236	F1:	0.6055
8 / 30: Train Loss:	0.0273	Val Loss:	1.1987	Accuracy:	0.6411	F1:	0.6206 *
9 / 30: Train Loss:	0.0173	Val Loss:	1.2000	Accuracy:	0.6258	F1:	0.6065
10 / 30: Train Loss:	0.0108	Val Loss:	1.1822	Accuracy:	0.6411	F1:	0.6243 *
11 / 30: Train Loss:	0.0092	Val Loss:	1.1759	Accuracy:	0.6389	F1:	0.6219
12 / 30: Train Loss:	0.0080	Val Loss:	1.1761	Accuracy:	0.6455	F1:	0.6261 *
13 / 30: Train Loss:	0.0060	Val Loss:	1.1690	Accuracy:	0.6477	F1:	0.6291 *
14 / 30: Train Loss:	0.0049	Val Loss:	1.1

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	5.4874	Val Loss:	1.9578	Accuracy:	0.4442	F1:	0.3967 *
2 / 30: Train Loss:	1.7973	Val Loss:	1.8008	Accuracy:	0.4705	F1:	0.4402 *
3 / 30: Train Loss:	0.5656	Val Loss:	1.5226	Accuracy:	0.5799	F1:	0.5692 *
4 / 30: Train Loss:	0.1934	Val Loss:	1.4833	Accuracy:	0.6171	F1:	0.6056 *
5 / 30: Train Loss:	0.0639	Val Loss:	1.4193	Accuracy:	0.6018	F1:	0.5829
6 / 30: Train Loss:	0.0219	Val Loss:	1.3553	Accuracy:	0.6280	F1:	0.6007
7 / 30: Train Loss:	0.0135	Val Loss:	1.3405	Accuracy:	0.6171	F1:	0.6010
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.73      0.61      0.67        67
               anecdote       0.91      0.67      0.77        43
         cherry picking       0.56      0.61      0.58        56
      conspiracy theory       0.75      0.69      0.72        39
           fake experts       0.64      0.75      0.69        12
         

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	25.5600	Val Loss:	2.5146	Accuracy:	0.0306	F1:	0.0050 *
2 / 30: Train Loss:	25.6664	Val Loss:	2.5075	Accuracy:	0.0263	F1:	0.0043
3 / 30: Train Loss:	25.6679	Val Loss:	2.5213	Accuracy:	0.0284	F1:	0.0046
4 / 30: Train Loss:	25.5668	Val Loss:	2.4990	Accuracy:	0.0263	F1:	0.0043
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.00      0.00      0.00        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.00      0.00      0.00        13
      false equivalence       0.03      1.00      0.06        14
impossible expectations       0.00      0.00      0.00        37
      misrepresentation       0.

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	21.1851	Val Loss:	2.5145	Accuracy:	0.0284	F1:	0.0046 *
2 / 30: Train Loss:	21.6485	Val Loss:	2.5322	Accuracy:	0.0263	F1:	0.0043
3 / 30: Train Loss:	21.8272	Val Loss:	2.5151	Accuracy:	0.0284	F1:	0.0046
4 / 30: Train Loss:	21.5902	Val Loss:	2.5111	Accuracy:	0.0263	F1:	0.0043
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.00      0.00      0.00        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.03      1.00      0.06        13
      false equivalence       0.00      0.00      0.00        14
impossible expectations       0.00      0.00      0.00        37
      misrepresentation       0.

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	14.9873	Val Loss:	2.1662	Accuracy:	0.3042	F1:	0.2616 *
2 / 30: Train Loss:	9.2599	Val Loss:	1.6561	Accuracy:	0.4639	F1:	0.4362 *
3 / 30: Train Loss:	5.2970	Val Loss:	1.4738	Accuracy:	0.5383	F1:	0.5447 *
4 / 30: Train Loss:	2.7883	Val Loss:	1.3327	Accuracy:	0.5492	F1:	0.5363
5 / 30: Train Loss:	2.0063	Val Loss:	1.5730	Accuracy:	0.4442	F1:	0.4693
6 / 30: Train Loss:	1.2321	Val Loss:	1.1911	Accuracy:	0.6127	F1:	0.6009 *
7 / 30: Train Loss:	0.4829	Val Loss:	1.1570	Accuracy:	0.6652	F1:	0.6642 *
8 / 30: Train Loss:	0.3539	Val Loss:	1.0743	Accuracy:	0.6805	F1:	0.6624
9 / 30: Train Loss:	0.0812	Val Loss:	1.0577	Accuracy:	0.6958	F1:	0.6743 *
10 / 30: Train Loss:	0.0440	Val Loss:	1.0213	Accuracy:	0.7155	F1:	0.7013 *
11 / 30: Train Loss:	0.0235	Val Loss:	1.0026	Accuracy:	0.7265	F1:	0.7140 *
12 / 30: Train Loss:	0.0236	Val Loss:	1.0341	Accuracy:	0.7024	F1:	0.6819
13 / 30: Train Loss:	0.0175	Val Loss:	1.0201	Accuracy:	0.7221	F1:	0.7029
14 / 30: Train Loss:	0.0087	Val Loss:	1.036

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	8.2210	Val Loss:	2.5205	Accuracy:	0.0263	F1:	0.0043 *
2 / 30: Train Loss:	7.9589	Val Loss:	2.5175	Accuracy:	0.0263	F1:	0.0043
3 / 30: Train Loss:	8.0694	Val Loss:	2.5122	Accuracy:	0.0284	F1:	0.0046 *
4 / 30: Train Loss:	7.8820	Val Loss:	2.5154	Accuracy:	0.0263	F1:	0.0043
5 / 30: Train Loss:	7.8788	Val Loss:	2.5135	Accuracy:	0.0263	F1:	0.0043
6 / 30: Train Loss:	7.8773	Val Loss:	2.5142	Accuracy:	0.0263	F1:	0.0043
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.00      0.00      0.00        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.03      1.00      0.06        13
      false equivale

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	29.0114	Val Loss:	2.4796	Accuracy:	0.0503	F1:	0.0378 *
2 / 30: Train Loss:	24.0249	Val Loss:	2.3909	Accuracy:	0.1204	F1:	0.1164 *
3 / 30: Train Loss:	22.2701	Val Loss:	2.2500	Accuracy:	0.1685	F1:	0.1730 *
4 / 30: Train Loss:	18.4805	Val Loss:	2.0144	Accuracy:	0.2735	F1:	0.2713 *
5 / 30: Train Loss:	14.6018	Val Loss:	1.8825	Accuracy:	0.3414	F1:	0.3447 *
6 / 30: Train Loss:	11.5251	Val Loss:	1.8337	Accuracy:	0.3523	F1:	0.3705 *
7 / 30: Train Loss:	9.0783	Val Loss:	1.7135	Accuracy:	0.4223	F1:	0.4288 *
8 / 30: Train Loss:	7.3557	Val Loss:	1.9040	Accuracy:	0.4004	F1:	0.4063
9 / 30: Train Loss:	6.4608	Val Loss:	1.6113	Accuracy:	0.4814	F1:	0.4768 *
10 / 30: Train Loss:	3.8083	Val Loss:	1.6515	Accuracy:	0.4792	F1:	0.4672
11 / 30: Train Loss:	3.0340	Val Loss:	1.6804	Accuracy:	0.4595	F1:	0.4579
12 / 30: Train Loss:	2.2419	Val Loss:	1.6479	Accuracy:	0.5120	F1:	0.4888 *
13 / 30: Train Loss:	1.6120	Val Loss:	1.6438	Accuracy:	0.5120	F1:	0.4920 *
14 / 30: Train Loss:	1.1143	Val Lo

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	25.6681	Val Loss:	2.4805	Accuracy:	0.0481	F1:	0.0414 *
2 / 30: Train Loss:	20.0781	Val Loss:	2.4035	Accuracy:	0.0853	F1:	0.0868 *
3 / 30: Train Loss:	18.5970	Val Loss:	2.2800	Accuracy:	0.1532	F1:	0.1611 *
4 / 30: Train Loss:	15.5086	Val Loss:	2.0621	Accuracy:	0.2801	F1:	0.2816 *
5 / 30: Train Loss:	12.2179	Val Loss:	1.9339	Accuracy:	0.3042	F1:	0.3139 *
6 / 30: Train Loss:	9.5236	Val Loss:	1.8239	Accuracy:	0.3370	F1:	0.3547 *
7 / 30: Train Loss:	7.8784	Val Loss:	1.7470	Accuracy:	0.3589	F1:	0.3676 *
8 / 30: Train Loss:	6.4253	Val Loss:	1.7554	Accuracy:	0.3786	F1:	0.3943 *
9 / 30: Train Loss:	5.0203	Val Loss:	1.6836	Accuracy:	0.4354	F1:	0.4368 *
10 / 30: Train Loss:	3.3715	Val Loss:	1.6344	Accuracy:	0.4595	F1:	0.4452 *
11 / 30: Train Loss:	2.6341	Val Loss:	1.5245	Accuracy:	0.4814	F1:	0.4716 *
12 / 30: Train Loss:	2.0983	Val Loss:	1.6188	Accuracy:	0.4814	F1:	0.4693
13 / 30: Train Loss:	1.5451	Val Loss:	1.5652	Accuracy:	0.5120	F1:	0.4967 *
14 / 30: Train Loss:	1.1132	Val

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	20.0000	Val Loss:	2.4855	Accuracy:	0.0481	F1:	0.0302 *
2 / 30: Train Loss:	14.1917	Val Loss:	2.4203	Accuracy:	0.0875	F1:	0.0877 *
3 / 30: Train Loss:	12.9548	Val Loss:	2.3189	Accuracy:	0.1335	F1:	0.1445 *
4 / 30: Train Loss:	10.6915	Val Loss:	2.1232	Accuracy:	0.2407	F1:	0.2735 *
5 / 30: Train Loss:	8.2700	Val Loss:	1.9837	Accuracy:	0.2888	F1:	0.3095 *
6 / 30: Train Loss:	6.4008	Val Loss:	1.9189	Accuracy:	0.2910	F1:	0.3070
7 / 30: Train Loss:	4.9913	Val Loss:	1.8202	Accuracy:	0.3370	F1:	0.3502 *
8 / 30: Train Loss:	3.6964	Val Loss:	1.6715	Accuracy:	0.3851	F1:	0.4016 *
9 / 30: Train Loss:	3.3419	Val Loss:	1.7256	Accuracy:	0.3764	F1:	0.3736
10 / 30: Train Loss:	2.3196	Val Loss:	1.6507	Accuracy:	0.3917	F1:	0.3846
11 / 30: Train Loss:	1.8885	Val Loss:	1.5880	Accuracy:	0.4267	F1:	0.4261 *
12 / 30: Train Loss:	1.3761	Val Loss:	1.6306	Accuracy:	0.4486	F1:	0.4432 *
13 / 30: Train Loss:	1.0155	Val Loss:	1.5742	Accuracy:	0.4748	F1:	0.4570 *
14 / 30: Train Loss:	0.6151	Val Loss

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	13.3194	Val Loss:	2.4877	Accuracy:	0.0613	F1:	0.0348 *
2 / 30: Train Loss:	7.4155	Val Loss:	2.4527	Accuracy:	0.0656	F1:	0.0518 *
3 / 30: Train Loss:	6.8128	Val Loss:	2.3988	Accuracy:	0.0810	F1:	0.0880 *
4 / 30: Train Loss:	6.0414	Val Loss:	2.3244	Accuracy:	0.1291	F1:	0.1461 *
5 / 30: Train Loss:	4.8900	Val Loss:	2.1968	Accuracy:	0.2144	F1:	0.2520 *
6 / 30: Train Loss:	4.0430	Val Loss:	2.1113	Accuracy:	0.2538	F1:	0.2873 *
7 / 30: Train Loss:	3.1220	Val Loss:	2.0042	Accuracy:	0.2932	F1:	0.3033 *
8 / 30: Train Loss:	2.5315	Val Loss:	1.8996	Accuracy:	0.3107	F1:	0.3363 *
9 / 30: Train Loss:	2.1274	Val Loss:	1.8574	Accuracy:	0.3370	F1:	0.3457 *
10 / 30: Train Loss:	1.5788	Val Loss:	1.8067	Accuracy:	0.3545	F1:	0.3536 *
11 / 30: Train Loss:	1.5806	Val Loss:	1.7579	Accuracy:	0.3392	F1:	0.3425
12 / 30: Train Loss:	1.1282	Val Loss:	1.6648	Accuracy:	0.4530	F1:	0.4436 *
13 / 30: Train Loss:	0.7817	Val Loss:	1.7353	Accuracy:	0.4026	F1:	0.4015
14 / 30: Train Loss:	0.6873	Val Loss:

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	102.0945	Val Loss:	2.4711	Accuracy:	0.3632	F1:	0.3459 *
2 / 30: Train Loss:	14.6911	Val Loss:	1.5979	Accuracy:	0.5602	F1:	0.5397 *
3 / 30: Train Loss:	5.1869	Val Loss:	2.4140	Accuracy:	0.5077	F1:	0.5006
4 / 30: Train Loss:	10.5564	Val Loss:	3.0940	Accuracy:	0.4267	F1:	0.4271
5 / 30: Train Loss:	6.0418	Val Loss:	2.7342	Accuracy:	0.4902	F1:	0.4875
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.57      0.58      0.57        67
               anecdote       0.71      0.84      0.77        43
         cherry picking       0.67      0.25      0.36        56
      conspiracy theory       0.63      0.79      0.70        39
           fake experts       0.75      0.50      0.60        12
           false choice       0.42      0.77      0.54        13
      false equivalence       0.33      0.21      0.26        14
impossible expectations

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	106.4590	Val Loss:	1.9401	Accuracy:	0.3982	F1:	0.3775 *
2 / 30: Train Loss:	15.8555	Val Loss:	2.5981	Accuracy:	0.4114	F1:	0.3818 *
3 / 30: Train Loss:	6.3178	Val Loss:	2.3193	Accuracy:	0.5011	F1:	0.4894 *
4 / 30: Train Loss:	6.1056	Val Loss:	3.0187	Accuracy:	0.4398	F1:	0.4303
5 / 30: Train Loss:	3.6230	Val Loss:	2.8803	Accuracy:	0.4420	F1:	0.4428
6 / 30: Train Loss:	2.6105	Val Loss:	2.9753	Accuracy:	0.4770	F1:	0.4815
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.59      0.43      0.50        67
               anecdote       1.00      0.40      0.57        43
         cherry picking       0.37      0.50      0.43        56
      conspiracy theory       0.77      0.77      0.77        39
           fake experts       0.64      0.75      0.69        12
           false choice       1.00      0.62      0.76        13
      false equ

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	106.8290	Val Loss:	2.1779	Accuracy:	0.3457	F1:	0.3036 *
2 / 30: Train Loss:	12.5890	Val Loss:	2.1887	Accuracy:	0.3786	F1:	0.3915 *
3 / 30: Train Loss:	7.4262	Val Loss:	2.6263	Accuracy:	0.4333	F1:	0.3655
4 / 30: Train Loss:	5.7980	Val Loss:	2.3255	Accuracy:	0.4442	F1:	0.4442 *
5 / 30: Train Loss:	3.8827	Val Loss:	2.8876	Accuracy:	0.4398	F1:	0.4332
6 / 30: Train Loss:	4.0170	Val Loss:	2.5209	Accuracy:	0.4661	F1:	0.4517 *
7 / 30: Train Loss:	3.2462	Val Loss:	4.3205	Accuracy:	0.3479	F1:	0.3050
8 / 30: Train Loss:	2.3075	Val Loss:	2.4623	Accuracy:	0.5295	F1:	0.5453 *
9 / 30: Train Loss:	0.9785	Val Loss:	1.8751	Accuracy:	0.5886	F1:	0.5722 *
10 / 30: Train Loss:	0.5968	Val Loss:	1.9179	Accuracy:	0.5602	F1:	0.5466
11 / 30: Train Loss:	0.8513	Val Loss:	2.4548	Accuracy:	0.5492	F1:	0.5261
12 / 30: Train Loss:	0.9071	Val Loss:	2.2775	Accuracy:	0.5733	F1:	0.5512
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         preci

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	105.0740	Val Loss:	2.4627	Accuracy:	0.3260	F1:	0.3127 *
2 / 30: Train Loss:	8.9022	Val Loss:	1.9056	Accuracy:	0.4639	F1:	0.3918 *
3 / 30: Train Loss:	4.8087	Val Loss:	1.9258	Accuracy:	0.4311	F1:	0.4050 *
4 / 30: Train Loss:	5.7753	Val Loss:	1.8913	Accuracy:	0.5514	F1:	0.5030 *
5 / 30: Train Loss:	4.6854	Val Loss:	3.2320	Accuracy:	0.3589	F1:	0.3908
6 / 30: Train Loss:	5.8892	Val Loss:	3.7771	Accuracy:	0.4070	F1:	0.3954
7 / 30: Train Loss:	5.0823	Val Loss:	4.8128	Accuracy:	0.3239	F1:	0.3202
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.57      0.51      0.54        67
               anecdote       0.91      0.72      0.81        43
         cherry picking       0.54      0.68      0.60        56
      conspiracy theory       0.64      0.69      0.67        39
           fake experts       0.40      0.83      0.54        12
       

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	29.1126	Val Loss:	2.1074	Accuracy:	0.2735	F1:	0.1988 *
2 / 30: Train Loss:	23.1855	Val Loss:	2.1239	Accuracy:	0.2691	F1:	0.2181 *
3 / 30: Train Loss:	23.4847	Val Loss:	2.5224	Accuracy:	0.0788	F1:	0.0213
4 / 30: Train Loss:	25.2108	Val Loss:	2.5524	Accuracy:	0.0700	F1:	0.0478
5 / 30: Train Loss:	24.3326	Val Loss:	2.5646	Accuracy:	0.1028	F1:	0.0570
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.47      0.86      0.61        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.20      0.82      0.33        39
           fake experts       1.00      0.25      0.40        12
           false choice       0.57      0.31      0.40        13
      false equivalence       0.00      0.00      0.00        14
impossible expectation

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	25.5495	Val Loss:	2.0333	Accuracy:	0.2976	F1:	0.2565 *
2 / 30: Train Loss:	20.0294	Val Loss:	2.2595	Accuracy:	0.2604	F1:	0.1929
3 / 30: Train Loss:	22.1518	Val Loss:	2.5065	Accuracy:	0.1597	F1:	0.0770
4 / 30: Train Loss:	22.3698	Val Loss:	2.4978	Accuracy:	0.1400	F1:	0.0972
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.50      0.19      0.28        67
               anecdote       0.33      0.88      0.48        43
         cherry picking       0.50      0.02      0.03        56
      conspiracy theory       0.22      0.79      0.35        39
           fake experts       0.70      0.58      0.64        12
           false choice       1.00      0.23      0.38        13
      false equivalence       0.00      0.00      0.00        14
impossible expectations       0.25      0.59      0.35        37
      misrepresentation       0.

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	19.9808	Val Loss:	2.1320	Accuracy:	0.2823	F1:	0.2154 *
2 / 30: Train Loss:	14.8263	Val Loss:	2.2078	Accuracy:	0.2429	F1:	0.1962
3 / 30: Train Loss:	15.8789	Val Loss:	2.4131	Accuracy:	0.1554	F1:	0.1063
4 / 30: Train Loss:	18.6087	Val Loss:	2.5177	Accuracy:	0.0832	F1:	0.0221
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.44      0.16      0.24        67
               anecdote       0.28      0.86      0.42        43
         cherry picking       0.33      0.02      0.03        56
      conspiracy theory       0.36      0.72      0.48        39
           fake experts       0.12      0.67      0.21        12
           false choice       0.33      0.23      0.27        13
      false equivalence       0.00      0.00      0.00        14
impossible expectations       0.24      0.68      0.35        37
      misrepresentation       0.

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	13.3147	Val Loss:	2.1266	Accuracy:	0.2560	F1:	0.2241 *
2 / 30: Train Loss:	8.3470	Val Loss:	2.2280	Accuracy:	0.2166	F1:	0.1735
3 / 30: Train Loss:	8.7788	Val Loss:	2.3781	Accuracy:	0.1554	F1:	0.1124
4 / 30: Train Loss:	7.7095	Val Loss:	2.2562	Accuracy:	0.1488	F1:	0.1413
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.40      0.34      0.37        67
               anecdote       0.70      0.60      0.65        43
         cherry picking       0.17      0.02      0.03        56
      conspiracy theory       0.57      0.51      0.54        39
           fake experts       0.08      0.83      0.15        12
           false choice       0.14      0.08      0.10        13
      false equivalence       0.05      0.14      0.07        14
impossible expectations       0.20      0.54      0.30        37
      misrepresentation       0.17 

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	29.8785	Val Loss:	2.3619	Accuracy:	0.2298	F1:	0.1918 *
2 / 30: Train Loss:	17.6342	Val Loss:	1.6656	Accuracy:	0.4748	F1:	0.4455 *
3 / 30: Train Loss:	6.1793	Val Loss:	1.5575	Accuracy:	0.4967	F1:	0.4868 *
4 / 30: Train Loss:	1.9191	Val Loss:	1.6292	Accuracy:	0.4902	F1:	0.4725
5 / 30: Train Loss:	1.8796	Val Loss:	1.8009	Accuracy:	0.4442	F1:	0.4400
6 / 30: Train Loss:	1.5334	Val Loss:	1.8423	Accuracy:	0.4530	F1:	0.4730
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.55      0.36      0.43        67
               anecdote       0.82      0.72      0.77        43
         cherry picking       0.62      0.46      0.53        56
      conspiracy theory       0.68      0.49      0.57        39
           fake experts       0.45      0.83      0.59        12
           false choice       0.35      0.69      0.46        13
      false equi

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	22.7809	Val Loss:	2.6079	Accuracy:	0.0766	F1:	0.0462 *
2 / 30: Train Loss:	23.6462	Val Loss:	2.5815	Accuracy:	0.0788	F1:	0.0280
3 / 30: Train Loss:	22.4995	Val Loss:	2.5130	Accuracy:	0.0788	F1:	0.0418
4 / 30: Train Loss:	21.8150	Val Loss:	2.5134	Accuracy:	0.0656	F1:	0.0429
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       1.00      0.01      0.03        67
               anecdote       1.00      0.05      0.09        43
         cherry picking       0.08      0.02      0.03        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.03      0.08      0.05        12
           false choice       0.00      0.00      0.00        13
      false equivalence       0.07      0.07      0.07        14
impossible expectations       0.08      0.03      0.04        37
      misrepresentation       0.

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	19.2629	Val Loss:	2.3928	Accuracy:	0.1838	F1:	0.1291 *
2 / 30: Train Loss:	12.1847	Val Loss:	1.9488	Accuracy:	0.3742	F1:	0.3117 *
3 / 30: Train Loss:	5.5638	Val Loss:	1.8231	Accuracy:	0.4136	F1:	0.4234 *
4 / 30: Train Loss:	1.7520	Val Loss:	1.7038	Accuracy:	0.4530	F1:	0.4285 *
5 / 30: Train Loss:	0.4809	Val Loss:	1.5959	Accuracy:	0.4726	F1:	0.4558 *
6 / 30: Train Loss:	0.1713	Val Loss:	1.6929	Accuracy:	0.4354	F1:	0.4255
7 / 30: Train Loss:	0.0631	Val Loss:	1.5239	Accuracy:	0.5142	F1:	0.4997 *
8 / 30: Train Loss:	0.0102	Val Loss:	1.5145	Accuracy:	0.5252	F1:	0.5080 *
9 / 30: Train Loss:	0.0061	Val Loss:	1.5157	Accuracy:	0.5164	F1:	0.4987
10 / 30: Train Loss:	0.0048	Val Loss:	1.5158	Accuracy:	0.5208	F1:	0.5020
11 / 30: Train Loss:	0.0041	Val Loss:	1.5165	Accuracy:	0.5208	F1:	0.5023
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.47  

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	11.3591	Val Loss:	1.9821	Accuracy:	0.3720	F1:	0.3429 *
2 / 30: Train Loss:	2.3739	Val Loss:	1.6758	Accuracy:	0.4354	F1:	0.4600 *
3 / 30: Train Loss:	0.4518	Val Loss:	1.5973	Accuracy:	0.4836	F1:	0.4346
4 / 30: Train Loss:	0.0846	Val Loss:	1.4610	Accuracy:	0.5011	F1:	0.4607 *
5 / 30: Train Loss:	0.1125	Val Loss:	1.5894	Accuracy:	0.4573	F1:	0.4702 *
6 / 30: Train Loss:	0.0340	Val Loss:	1.4403	Accuracy:	0.5186	F1:	0.5215 *
7 / 30: Train Loss:	0.0017	Val Loss:	1.4175	Accuracy:	0.5295	F1:	0.5305 *
8 / 30: Train Loss:	0.0003	Val Loss:	1.4144	Accuracy:	0.5317	F1:	0.5319 *
9 / 30: Train Loss:	0.0002	Val Loss:	1.4117	Accuracy:	0.5339	F1:	0.5335 *
10 / 30: Train Loss:	0.0002	Val Loss:	1.4095	Accuracy:	0.5361	F1:	0.5362 *
11 / 30: Train Loss:	0.0001	Val Loss:	1.4077	Accuracy:	0.5405	F1:	0.5400 *
12 / 30: Train Loss:	0.0001	Val Loss:	1.4060	Accuracy:	0.5405	F1:	0.5432 *
13 / 30: Train Loss:	0.0001	Val Loss:	1.4046	Accuracy:	0.5405	F1:	0.5436 *
14 / 30: Train Loss:	0.0001	Val Los

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	24.8675	Val Loss:	2.3753	Accuracy:	0.1904	F1:	0.1488 *
2 / 30: Train Loss:	20.4916	Val Loss:	1.8513	Accuracy:	0.4486	F1:	0.4032 *
3 / 30: Train Loss:	13.4764	Val Loss:	1.5473	Accuracy:	0.5164	F1:	0.4872 *
4 / 30: Train Loss:	9.2529	Val Loss:	1.4663	Accuracy:	0.5514	F1:	0.5269 *
5 / 30: Train Loss:	6.4986	Val Loss:	1.3216	Accuracy:	0.5908	F1:	0.5728 *
6 / 30: Train Loss:	4.5624	Val Loss:	1.2094	Accuracy:	0.6214	F1:	0.6095 *
7 / 30: Train Loss:	2.9549	Val Loss:	1.1804	Accuracy:	0.6389	F1:	0.6241 *
8 / 30: Train Loss:	2.1415	Val Loss:	1.1626	Accuracy:	0.6346	F1:	0.6266 *
9 / 30: Train Loss:	1.6118	Val Loss:	1.1815	Accuracy:	0.6258	F1:	0.6292 *
10 / 30: Train Loss:	1.1227	Val Loss:	1.1290	Accuracy:	0.6499	F1:	0.6459 *
11 / 30: Train Loss:	0.8260	Val Loss:	1.1151	Accuracy:	0.6565	F1:	0.6570 *
12 / 30: Train Loss:	0.7516	Val Loss:	1.1442	Accuracy:	0.6455	F1:	0.6390
13 / 30: Train Loss:	0.5624	Val Loss:	1.1248	Accuracy:	0.6608	F1:	0.6538
14 / 30: Train Loss:	0.3865	Val Los

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	20.7712	Val Loss:	2.3022	Accuracy:	0.2604	F1:	0.2384 *
2 / 30: Train Loss:	14.3586	Val Loss:	1.7887	Accuracy:	0.4726	F1:	0.4379 *
3 / 30: Train Loss:	8.9665	Val Loss:	1.6031	Accuracy:	0.5317	F1:	0.5107 *
4 / 30: Train Loss:	6.0754	Val Loss:	1.5065	Accuracy:	0.5361	F1:	0.5142 *
5 / 30: Train Loss:	3.9394	Val Loss:	1.3128	Accuracy:	0.6018	F1:	0.5840 *
6 / 30: Train Loss:	2.7325	Val Loss:	1.2417	Accuracy:	0.6171	F1:	0.6114 *
7 / 30: Train Loss:	1.7973	Val Loss:	1.2195	Accuracy:	0.6214	F1:	0.6084
8 / 30: Train Loss:	1.2369	Val Loss:	1.1788	Accuracy:	0.6433	F1:	0.6398 *
9 / 30: Train Loss:	0.8964	Val Loss:	1.1190	Accuracy:	0.6433	F1:	0.6350
10 / 30: Train Loss:	0.6157	Val Loss:	1.1029	Accuracy:	0.6521	F1:	0.6481 *
11 / 30: Train Loss:	0.5081	Val Loss:	1.1176	Accuracy:	0.6499	F1:	0.6496 *
12 / 30: Train Loss:	0.4067	Val Loss:	1.1580	Accuracy:	0.6389	F1:	0.6425
13 / 30: Train Loss:	0.3185	Val Loss:	1.1169	Accuracy:	0.6411	F1:	0.6383
14 / 30: Train Loss:	0.3944	Val Loss:	1.

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	14.6062	Val Loss:	2.3014	Accuracy:	0.2823	F1:	0.2700 *
2 / 30: Train Loss:	9.0579	Val Loss:	1.8742	Accuracy:	0.4683	F1:	0.4425 *
3 / 30: Train Loss:	5.1939	Val Loss:	1.6992	Accuracy:	0.5361	F1:	0.5209 *
4 / 30: Train Loss:	3.2965	Val Loss:	1.5460	Accuracy:	0.5711	F1:	0.5471 *
5 / 30: Train Loss:	2.0122	Val Loss:	1.4079	Accuracy:	0.6105	F1:	0.5992 *
6 / 30: Train Loss:	1.3812	Val Loss:	1.3512	Accuracy:	0.6214	F1:	0.6185 *
7 / 30: Train Loss:	0.9007	Val Loss:	1.2890	Accuracy:	0.6236	F1:	0.6194 *
8 / 30: Train Loss:	0.5572	Val Loss:	1.2505	Accuracy:	0.6324	F1:	0.6322 *
9 / 30: Train Loss:	0.3865	Val Loss:	1.2221	Accuracy:	0.6302	F1:	0.6301
10 / 30: Train Loss:	0.2774	Val Loss:	1.2045	Accuracy:	0.6389	F1:	0.6349 *
11 / 30: Train Loss:	0.2335	Val Loss:	1.2263	Accuracy:	0.6455	F1:	0.6408 *
12 / 30: Train Loss:	0.1650	Val Loss:	1.2261	Accuracy:	0.6346	F1:	0.6341
13 / 30: Train Loss:	0.1376	Val Loss:	1.1718	Accuracy:	0.6543	F1:	0.6532 *
14 / 30: Train Loss:	0.1043	Val Loss:

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	7.3677	Val Loss:	2.4007	Accuracy:	0.1116	F1:	0.0703 *
2 / 30: Train Loss:	4.7185	Val Loss:	2.0234	Accuracy:	0.4398	F1:	0.4327 *
3 / 30: Train Loss:	2.3875	Val Loss:	1.7930	Accuracy:	0.5449	F1:	0.5202 *
4 / 30: Train Loss:	1.3864	Val Loss:	1.6952	Accuracy:	0.5492	F1:	0.5249 *
5 / 30: Train Loss:	0.8274	Val Loss:	1.6075	Accuracy:	0.5777	F1:	0.5716 *
6 / 30: Train Loss:	0.5805	Val Loss:	1.5458	Accuracy:	0.5908	F1:	0.5907 *
7 / 30: Train Loss:	0.3494	Val Loss:	1.4966	Accuracy:	0.6171	F1:	0.6120 *
8 / 30: Train Loss:	0.2219	Val Loss:	1.4797	Accuracy:	0.5908	F1:	0.5953
9 / 30: Train Loss:	0.1611	Val Loss:	1.4489	Accuracy:	0.5930	F1:	0.5974
10 / 30: Train Loss:	0.1289	Val Loss:	1.4304	Accuracy:	0.5952	F1:	0.5922
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.71      0.52      0.60        67
               anecdote       0.85      0.79  

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	22.2779	Val Loss:	1.5979	Accuracy:	0.4792	F1:	0.4281 *
2 / 30: Train Loss:	10.7043	Val Loss:	1.2150	Accuracy:	0.5996	F1:	0.5600 *
3 / 30: Train Loss:	5.4903	Val Loss:	0.9698	Accuracy:	0.6849	F1:	0.6660 *
4 / 30: Train Loss:	2.9622	Val Loss:	0.9316	Accuracy:	0.7046	F1:	0.6992 *
5 / 30: Train Loss:	1.7452	Val Loss:	0.8855	Accuracy:	0.7287	F1:	0.7216 *
6 / 30: Train Loss:	0.8486	Val Loss:	0.8750	Accuracy:	0.7177	F1:	0.7108
7 / 30: Train Loss:	0.4912	Val Loss:	0.8630	Accuracy:	0.7133	F1:	0.7025
8 / 30: Train Loss:	0.2571	Val Loss:	0.8602	Accuracy:	0.7374	F1:	0.7286 *
9 / 30: Train Loss:	0.2032	Val Loss:	0.8560	Accuracy:	0.7287	F1:	0.7139
10 / 30: Train Loss:	0.1469	Val Loss:	0.8704	Accuracy:	0.7243	F1:	0.7133
11 / 30: Train Loss:	0.1178	Val Loss:	0.9646	Accuracy:	0.7046	F1:	0.6958
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.83    

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	21.0395	Val Loss:	2.1011	Accuracy:	0.2867	F1:	0.1971 *
2 / 30: Train Loss:	10.1067	Val Loss:	1.2699	Accuracy:	0.6171	F1:	0.6000 *
3 / 30: Train Loss:	4.0454	Val Loss:	1.0741	Accuracy:	0.6389	F1:	0.6283 *
4 / 30: Train Loss:	1.9878	Val Loss:	0.9700	Accuracy:	0.6652	F1:	0.6700 *
5 / 30: Train Loss:	1.1254	Val Loss:	0.9061	Accuracy:	0.7068	F1:	0.7002 *
6 / 30: Train Loss:	0.4867	Val Loss:	0.8808	Accuracy:	0.7155	F1:	0.7219 *
7 / 30: Train Loss:	0.2945	Val Loss:	0.8893	Accuracy:	0.7090	F1:	0.7130
8 / 30: Train Loss:	0.2070	Val Loss:	0.9257	Accuracy:	0.7002	F1:	0.7013
9 / 30: Train Loss:	0.2254	Val Loss:	0.9679	Accuracy:	0.7002	F1:	0.6907
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.81      0.63      0.71        67
               anecdote       0.97      0.84      0.90        43
         cherry picking       0.63      0.71      0.67

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	11.8173	Val Loss:	1.5854	Accuracy:	0.5580	F1:	0.5152 *
2 / 30: Train Loss:	5.0966	Val Loss:	1.6952	Accuracy:	0.5317	F1:	0.5115
3 / 30: Train Loss:	2.7530	Val Loss:	1.1896	Accuracy:	0.6630	F1:	0.6555 *
4 / 30: Train Loss:	1.8121	Val Loss:	1.1379	Accuracy:	0.6696	F1:	0.6509
5 / 30: Train Loss:	2.8761	Val Loss:	1.1428	Accuracy:	0.6608	F1:	0.6301
6 / 30: Train Loss:	2.8735	Val Loss:	1.1391	Accuracy:	0.6608	F1:	0.6444
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.76      0.67      0.71        67
               anecdote       1.00      0.81      0.90        43
         cherry picking       0.71      0.64      0.67        56
      conspiracy theory       0.69      0.87      0.77        39
           fake experts       0.56      0.75      0.64        12
           false choice       0.60      0.69      0.64        13
      false equival

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	6.3958	Val Loss:	1.7630	Accuracy:	0.5252	F1:	0.4835 *
2 / 30: Train Loss:	2.1492	Val Loss:	1.5539	Accuracy:	0.5930	F1:	0.5705 *
3 / 30: Train Loss:	0.8571	Val Loss:	1.3015	Accuracy:	0.6674	F1:	0.6626 *
4 / 30: Train Loss:	0.4105	Val Loss:	1.2486	Accuracy:	0.6740	F1:	0.6725 *
5 / 30: Train Loss:	0.2148	Val Loss:	1.1620	Accuracy:	0.7112	F1:	0.7002 *
6 / 30: Train Loss:	0.0963	Val Loss:	1.1113	Accuracy:	0.6980	F1:	0.6820
7 / 30: Train Loss:	0.0507	Val Loss:	1.0751	Accuracy:	0.7155	F1:	0.6975
8 / 30: Train Loss:	0.0248	Val Loss:	1.0321	Accuracy:	0.7243	F1:	0.7067 *
9 / 30: Train Loss:	0.0324	Val Loss:	1.0372	Accuracy:	0.6980	F1:	0.6916
10 / 30: Train Loss:	0.0139	Val Loss:	1.0196	Accuracy:	0.7243	F1:	0.7129 *
11 / 30: Train Loss:	0.0136	Val Loss:	1.0242	Accuracy:	0.7265	F1:	0.7125
12 / 30: Train Loss:	0.0078	Val Loss:	1.0191	Accuracy:	0.7221	F1:	0.7123
13 / 30: Train Loss:	0.0079	Val Loss:	1.0079	Accuracy:	0.7199	F1:	0.7064
No improvement for 3 epochs. Stopping early.
b