In [1]:
import os
os.environ['TRANSFORMERS_CACHE'] = '/mnt/software/'
import sys
import gc
# assuming data, models, engine in flicc directory:
flicc_path = os.path.join(os.path.dirname(os.getcwd()), '')
sys.path.append(flicc_path)
import torch
from data import ClimateDataset
from models import ClassificationModel
from engine import Engine

# import warnings
# warnings.filterwarnings("ignore")

In [2]:
best_config = {'bert-base-uncased':{'lr':5.0e-5},
                'roberta-large':{'lr':5.0e-5, 'focalloss':True, 'gamma':8},
                'gpt2':{'lr':5.0e-5},
                'bigscience/bloom-560m':{'lr':5.0e-5, 'focalloss':True, 'gamma':8},
                'facebook/opt-350m':{'lr':1.0e-5},
                'EleutherAI/gpt-neo-1.3B':{'lr':5.0e-5}, 
                'microsoft/deberta-base':{'lr':1.0e-5},
                'microsoft/deberta-v2-xlarge':{'lr':1.0e-5, 'focalloss':True, 'gamma':4}}

In [3]:
results = {'test_acc':[],
           'test_f1':[],
           'eval_acc':[],
           'eval_f1':[],
           'wd':[]}

In [4]:
weight_decay = [0.01, 0.1]

for model_checkpoint in best_config.keys():
    for wd in weight_decay:
        print(f'Grid search {model_checkpoint}, learning rate {best_config[model_checkpoint]["lr"]}')
        data = ClimateDataset(model_to_train=4,model_checkpoint=model_checkpoint,dataset_url=flicc_path,batch_size=32)
        data.setup_dataloaders()
        model = ClassificationModel(model_checkpoint=data.model_checkpoint,num_labels=data.num_labels)
        trainer = Engine(epochs=30,labels=data.labels)
        trainer.model = model.model
        trainer.dataset_encoded = data.dataset_encoded
        test_acc, test_f1, eval_acc, eval_f1 = trainer.run(**best_config[model_checkpoint],
                                                            wd=wd,
                                                            train_dataloader=data.train_dataloader,
                                                            eval_dataloader=data.eval_dataloader,
                                                            test_dataloader=data.test_dataloader,
                                                            early_stop=3)
        results['test_acc'].append(test_acc)
        results['test_f1'].append(test_f1)
        results['eval_acc'].append(eval_acc)
        results['eval_f1'].append(eval_f1)
        results['wd'].append(wd)
        print('### '*10)
        print(results)
        print('### '*10)
        with torch.no_grad():
            torch.cuda.empty_cache()
        del data, model, trainer
        gc.collect()

Grid search bert-base-uncased, learning rate 5e-05


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.0826	Val Loss:	1.6531	Accuracy:	0.4989	F1:	0.3738 *
2 / 30: Train Loss:	1.2707	Val Loss:	1.3252	Accuracy:	0.5799	F1:	0.5194 *
3 / 30: Train Loss:	0.7319	Val Loss:	1.1258	Accuracy:	0.6324	F1:	0.5639 *
4 / 30: Train Loss:	0.3952	Val Loss:	1.4255	Accuracy:	0.5864	F1:	0.5423
5 / 30: Train Loss:	0.2623	Val Loss:	1.4625	Accuracy:	0.6236	F1:	0.5949 *
6 / 30: Train Loss:	0.1807	Val Loss:	1.3490	Accuracy:	0.6389	F1:	0.6072 *
7 / 30: Train Loss:	0.0986	Val Loss:	1.3551	Accuracy:	0.6324	F1:	0.5810
8 / 30: Train Loss:	0.0513	Val Loss:	1.3074	Accuracy:	0.6783	F1:	0.6467 *
9 / 30: Train Loss:	0.0241	Val Loss:	1.3179	Accuracy:	0.6849	F1:	0.6506 *
10 / 30: Train Loss:	0.0155	Val Loss:	1.3399	Accuracy:	0.6915	F1:	0.6668 *
11 / 30: Train Loss:	0.0142	Val Loss:	1.3720	Accuracy:	0.7024	F1:	0.6720 *
12 / 30: Train Loss:	0.0082	Val Loss:	1.4112	Accuracy:	0.6958	F1:	0.6679
13 / 30: Train Loss:	0.0067	Val Loss:	1.4371	Accuracy:	0.6915	F1:	0.6635
14 / 30: Train Loss:	0.0058	Val Loss:	1.46

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.0645	Val Loss:	1.5972	Accuracy:	0.5098	F1:	0.4014 *
2 / 30: Train Loss:	1.2741	Val Loss:	1.2860	Accuracy:	0.5733	F1:	0.4912 *
3 / 30: Train Loss:	0.7838	Val Loss:	1.1558	Accuracy:	0.6346	F1:	0.5662 *
4 / 30: Train Loss:	0.4551	Val Loss:	1.2947	Accuracy:	0.6346	F1:	0.5817 *
5 / 30: Train Loss:	0.2872	Val Loss:	1.1718	Accuracy:	0.6761	F1:	0.6286 *
6 / 30: Train Loss:	0.1453	Val Loss:	1.2508	Accuracy:	0.6630	F1:	0.6413 *
7 / 30: Train Loss:	0.0624	Val Loss:	1.3152	Accuracy:	0.6849	F1:	0.6613 *
8 / 30: Train Loss:	0.0404	Val Loss:	1.3535	Accuracy:	0.6586	F1:	0.6180
9 / 30: Train Loss:	0.0338	Val Loss:	1.3915	Accuracy:	0.6652	F1:	0.6335
10 / 30: Train Loss:	0.0336	Val Loss:	1.5391	Accuracy:	0.6346	F1:	0.6125
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.86      0.57      0.68        67
               anecdote       0.97      0.84  

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	15.3941	Val Loss:	2.5120	Accuracy:	0.0306	F1:	0.0050 *
2 / 30: Train Loss:	15.4550	Val Loss:	2.5078	Accuracy:	0.0263	F1:	0.0043
3 / 30: Train Loss:	15.4795	Val Loss:	2.5130	Accuracy:	0.0284	F1:	0.0046
4 / 30: Train Loss:	15.3396	Val Loss:	2.5017	Accuracy:	0.0263	F1:	0.0043
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.00      0.00      0.00        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.00      0.00      0.00        13
      false equivalence       0.03      1.00      0.06        14
impossible expectations       0.00      0.00      0.00        37
      misrepresentation       0.

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	15.6421	Val Loss:	2.5155	Accuracy:	0.0263	F1:	0.0043 *
2 / 30: Train Loss:	15.5149	Val Loss:	2.5249	Accuracy:	0.0263	F1:	0.0043
3 / 30: Train Loss:	15.6829	Val Loss:	2.5278	Accuracy:	0.0284	F1:	0.0046 *
4 / 30: Train Loss:	15.4163	Val Loss:	2.5176	Accuracy:	0.0263	F1:	0.0043
5 / 30: Train Loss:	15.4339	Val Loss:	2.5215	Accuracy:	0.0263	F1:	0.0043
6 / 30: Train Loss:	15.4083	Val Loss:	2.5245	Accuracy:	0.0263	F1:	0.0043
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.00      0.00      0.00        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.03      1.00      0.06        13
      false eq

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.6266	Val Loss:	2.3424	Accuracy:	0.2013	F1:	0.0824 *
2 / 30: Train Loss:	2.3336	Val Loss:	2.2754	Accuracy:	0.2276	F1:	0.1327 *
3 / 30: Train Loss:	2.1796	Val Loss:	2.0538	Accuracy:	0.3107	F1:	0.2502 *
4 / 30: Train Loss:	1.7931	Val Loss:	1.7489	Accuracy:	0.4048	F1:	0.3452 *
5 / 30: Train Loss:	1.4814	Val Loss:	1.5686	Accuracy:	0.4617	F1:	0.4122 *
6 / 30: Train Loss:	1.2192	Val Loss:	1.5846	Accuracy:	0.4705	F1:	0.4149 *
7 / 30: Train Loss:	1.0167	Val Loss:	1.6382	Accuracy:	0.4923	F1:	0.4706 *
8 / 30: Train Loss:	0.8113	Val Loss:	1.8563	Accuracy:	0.4858	F1:	0.4760 *
9 / 30: Train Loss:	0.7201	Val Loss:	1.7222	Accuracy:	0.4967	F1:	0.4909 *
10 / 30: Train Loss:	0.4860	Val Loss:	1.8272	Accuracy:	0.5252	F1:	0.5069 *
11 / 30: Train Loss:	0.3716	Val Loss:	2.0418	Accuracy:	0.4836	F1:	0.4763
12 / 30: Train Loss:	0.2644	Val Loss:	1.7428	Accuracy:	0.5492	F1:	0.5360 *
13 / 30: Train Loss:	0.1687	Val Loss:	1.8002	Accuracy:	0.5514	F1:	0.5405 *
14 / 30: Train Loss:	0.1315	Val Loss

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.6265	Val Loss:	2.3423	Accuracy:	0.2013	F1:	0.0824 *
2 / 30: Train Loss:	2.3334	Val Loss:	2.2756	Accuracy:	0.2276	F1:	0.1327 *
3 / 30: Train Loss:	2.1786	Val Loss:	2.0516	Accuracy:	0.3107	F1:	0.2493 *
4 / 30: Train Loss:	1.7914	Val Loss:	1.7472	Accuracy:	0.4026	F1:	0.3418 *
5 / 30: Train Loss:	1.4814	Val Loss:	1.5708	Accuracy:	0.4617	F1:	0.4110 *
6 / 30: Train Loss:	1.2124	Val Loss:	1.5917	Accuracy:	0.4770	F1:	0.4261 *
7 / 30: Train Loss:	1.0050	Val Loss:	1.6452	Accuracy:	0.4814	F1:	0.4554 *
8 / 30: Train Loss:	0.7938	Val Loss:	1.8781	Accuracy:	0.4945	F1:	0.4822 *
9 / 30: Train Loss:	0.7139	Val Loss:	1.7647	Accuracy:	0.5120	F1:	0.4978 *
10 / 30: Train Loss:	0.4824	Val Loss:	1.7497	Accuracy:	0.5558	F1:	0.5337 *
11 / 30: Train Loss:	0.3627	Val Loss:	2.1020	Accuracy:	0.4880	F1:	0.4735
12 / 30: Train Loss:	0.2540	Val Loss:	1.7301	Accuracy:	0.5470	F1:	0.5303
13 / 30: Train Loss:	0.1625	Val Loss:	1.7471	Accuracy:	0.5492	F1:	0.5268
No improvement for 3 epochs. Stopping ea

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	106.9985	Val Loss:	2.2956	Accuracy:	0.3239	F1:	0.2751 *
2 / 30: Train Loss:	9.8229	Val Loss:	1.9329	Accuracy:	0.3829	F1:	0.3847 *
3 / 30: Train Loss:	5.7207	Val Loss:	2.3164	Accuracy:	0.4923	F1:	0.4510 *
4 / 30: Train Loss:	4.5515	Val Loss:	2.3392	Accuracy:	0.4792	F1:	0.4556 *
5 / 30: Train Loss:	6.7059	Val Loss:	2.7389	Accuracy:	0.4420	F1:	0.4098
6 / 30: Train Loss:	3.0667	Val Loss:	2.0658	Accuracy:	0.5098	F1:	0.5082 *
7 / 30: Train Loss:	1.3043	Val Loss:	2.4922	Accuracy:	0.4923	F1:	0.4757
8 / 30: Train Loss:	1.8395	Val Loss:	4.9563	Accuracy:	0.2713	F1:	0.2961
9 / 30: Train Loss:	3.8361	Val Loss:	2.7184	Accuracy:	0.4617	F1:	0.4325
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.50      0.66      0.57        67
               anecdote       0.87      0.47      0.61        43
         cherry picking       0.42      0.59      0.49  

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	105.3171	Val Loss:	2.0144	Accuracy:	0.3173	F1:	0.2776 *
2 / 30: Train Loss:	11.3037	Val Loss:	2.7408	Accuracy:	0.2998	F1:	0.2455
3 / 30: Train Loss:	4.5550	Val Loss:	1.8157	Accuracy:	0.5208	F1:	0.5237 *
4 / 30: Train Loss:	5.0867	Val Loss:	2.2948	Accuracy:	0.4551	F1:	0.3902
5 / 30: Train Loss:	3.6427	Val Loss:	2.1893	Accuracy:	0.5055	F1:	0.4604
6 / 30: Train Loss:	3.0571	Val Loss:	3.1745	Accuracy:	0.4004	F1:	0.4168
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.60      0.60      0.60        67
               anecdote       0.82      0.74      0.78        43
         cherry picking       0.51      0.43      0.47        56
      conspiracy theory       0.84      0.54      0.66        39
           fake experts       0.50      0.67      0.57        12
           false choice       0.78      0.54      0.64        13
      false equiv

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.5056	Val Loss:	1.9545	Accuracy:	0.2888	F1:	0.1962 *
2 / 30: Train Loss:	2.1424	Val Loss:	2.2655	Accuracy:	0.2254	F1:	0.1229
3 / 30: Train Loss:	2.1910	Val Loss:	2.4247	Accuracy:	0.1751	F1:	0.0885
4 / 30: Train Loss:	2.4311	Val Loss:	2.3942	Accuracy:	0.1466	F1:	0.0818
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.44      0.37      0.40        67
               anecdote       0.59      0.77      0.67        43
         cherry picking       0.54      0.27      0.36        56
      conspiracy theory       0.17      0.64      0.26        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.00      0.00      0.00        13
      false equivalence       0.00      0.00      0.00        14
impossible expectations       0.21      0.59      0.31        37
      misrepresentation       0.18  

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.5047	Val Loss:	1.9779	Accuracy:	0.2801	F1:	0.1877 *
2 / 30: Train Loss:	2.2058	Val Loss:	2.3489	Accuracy:	0.1751	F1:	0.0994
3 / 30: Train Loss:	2.5033	Val Loss:	2.3760	Accuracy:	0.1488	F1:	0.0489
4 / 30: Train Loss:	2.4035	Val Loss:	2.3347	Accuracy:	0.1729	F1:	0.0632
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.46      0.33      0.38        67
               anecdote       0.61      0.77      0.68        43
         cherry picking       0.53      0.16      0.25        56
      conspiracy theory       0.17      0.67      0.27        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.00      0.00      0.00        13
      false equivalence       0.00      0.00      0.00        14
impossible expectations       0.20      0.70      0.31        37
      misrepresentation       0.19  

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.2442	Val Loss:	1.5294	Accuracy:	0.4792	F1:	0.3912 *
2 / 30: Train Loss:	1.1459	Val Loss:	1.4352	Accuracy:	0.5317	F1:	0.5205 *
3 / 30: Train Loss:	0.3086	Val Loss:	1.8105	Accuracy:	0.5339	F1:	0.5146
4 / 30: Train Loss:	0.1979	Val Loss:	1.9450	Accuracy:	0.5624	F1:	0.5457 *
5 / 30: Train Loss:	0.0969	Val Loss:	1.8695	Accuracy:	0.5470	F1:	0.5191
6 / 30: Train Loss:	0.0780	Val Loss:	1.8435	Accuracy:	0.5733	F1:	0.5599 *
7 / 30: Train Loss:	0.0723	Val Loss:	1.8086	Accuracy:	0.5842	F1:	0.5861 *
8 / 30: Train Loss:	0.0649	Val Loss:	1.8108	Accuracy:	0.5864	F1:	0.5859
9 / 30: Train Loss:	0.0691	Val Loss:	1.8164	Accuracy:	0.5908	F1:	0.5893 *
10 / 30: Train Loss:	0.0607	Val Loss:	1.8229	Accuracy:	0.5996	F1:	0.5971 *
11 / 30: Train Loss:	0.0604	Val Loss:	1.8296	Accuracy:	0.6039	F1:	0.6040 *
12 / 30: Train Loss:	0.0532	Val Loss:	1.8381	Accuracy:	0.6061	F1:	0.6057 *
13 / 30: Train Loss:	0.0469	Val Loss:	1.8469	Accuracy:	0.5974	F1:	0.5955
14 / 30: Train Loss:	0.0507	Val Loss:	1.89

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.1494	Val Loss:	1.4742	Accuracy:	0.4989	F1:	0.4383 *
2 / 30: Train Loss:	0.9676	Val Loss:	1.3882	Accuracy:	0.5514	F1:	0.5557 *
3 / 30: Train Loss:	0.2490	Val Loss:	1.6440	Accuracy:	0.5646	F1:	0.5326
4 / 30: Train Loss:	0.1344	Val Loss:	1.7704	Accuracy:	0.5580	F1:	0.5603 *
5 / 30: Train Loss:	0.1499	Val Loss:	1.6068	Accuracy:	0.5799	F1:	0.5812 *
6 / 30: Train Loss:	0.0823	Val Loss:	1.6461	Accuracy:	0.6149	F1:	0.5870 *
7 / 30: Train Loss:	0.1085	Val Loss:	2.0847	Accuracy:	0.5317	F1:	0.5068
8 / 30: Train Loss:	0.1490	Val Loss:	1.6824	Accuracy:	0.6193	F1:	0.6077 *
9 / 30: Train Loss:	0.0504	Val Loss:	1.9631	Accuracy:	0.5755	F1:	0.5819
10 / 30: Train Loss:	0.0351	Val Loss:	1.9194	Accuracy:	0.6236	F1:	0.6057
11 / 30: Train Loss:	0.0095	Val Loss:	1.8912	Accuracy:	0.5974	F1:	0.5833
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.52      

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.3660	Val Loss:	2.0162	Accuracy:	0.3479	F1:	0.2233 *
2 / 30: Train Loss:	1.7856	Val Loss:	1.5339	Accuracy:	0.4967	F1:	0.3598 *
3 / 30: Train Loss:	1.3403	Val Loss:	1.3740	Accuracy:	0.5558	F1:	0.4308 *
4 / 30: Train Loss:	1.0365	Val Loss:	1.2255	Accuracy:	0.5996	F1:	0.4994 *
5 / 30: Train Loss:	0.7959	Val Loss:	1.1499	Accuracy:	0.6346	F1:	0.5812 *
6 / 30: Train Loss:	0.6224	Val Loss:	1.1295	Accuracy:	0.6521	F1:	0.6190 *
7 / 30: Train Loss:	0.4805	Val Loss:	1.1669	Accuracy:	0.6455	F1:	0.6170
8 / 30: Train Loss:	0.3862	Val Loss:	1.3660	Accuracy:	0.6193	F1:	0.5808
9 / 30: Train Loss:	0.3102	Val Loss:	1.3178	Accuracy:	0.6368	F1:	0.6288 *
10 / 30: Train Loss:	0.2529	Val Loss:	1.2350	Accuracy:	0.6761	F1:	0.6675 *
11 / 30: Train Loss:	0.1917	Val Loss:	1.2433	Accuracy:	0.6740	F1:	0.6729 *
12 / 30: Train Loss:	0.1468	Val Loss:	1.3839	Accuracy:	0.6411	F1:	0.6382
13 / 30: Train Loss:	0.1154	Val Loss:	1.3435	Accuracy:	0.6608	F1:	0.6531
14 / 30: Train Loss:	0.0808	Val Loss:	1.30

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.3659	Val Loss:	2.0159	Accuracy:	0.3479	F1:	0.2233 *
2 / 30: Train Loss:	1.7853	Val Loss:	1.5334	Accuracy:	0.4967	F1:	0.3598 *
3 / 30: Train Loss:	1.3402	Val Loss:	1.3696	Accuracy:	0.5558	F1:	0.4308 *
4 / 30: Train Loss:	1.0260	Val Loss:	1.2266	Accuracy:	0.6061	F1:	0.4942 *
5 / 30: Train Loss:	0.7902	Val Loss:	1.1374	Accuracy:	0.6368	F1:	0.5711 *
6 / 30: Train Loss:	0.6133	Val Loss:	1.1232	Accuracy:	0.6565	F1:	0.6209 *
7 / 30: Train Loss:	0.4791	Val Loss:	1.1839	Accuracy:	0.6477	F1:	0.6259 *
8 / 30: Train Loss:	0.3811	Val Loss:	1.3164	Accuracy:	0.6368	F1:	0.6161
9 / 30: Train Loss:	0.2981	Val Loss:	1.2401	Accuracy:	0.6696	F1:	0.6561 *
10 / 30: Train Loss:	0.2376	Val Loss:	1.2508	Accuracy:	0.6718	F1:	0.6583 *
11 / 30: Train Loss:	0.1918	Val Loss:	1.2296	Accuracy:	0.6827	F1:	0.6846 *
12 / 30: Train Loss:	0.1465	Val Loss:	1.3557	Accuracy:	0.6521	F1:	0.6464
13 / 30: Train Loss:	0.1139	Val Loss:	1.3257	Accuracy:	0.6761	F1:	0.6716
14 / 30: Train Loss:	0.0882	Val Loss:	1.

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	21.0389	Val Loss:	2.1833	Accuracy:	0.2932	F1:	0.2272 *
2 / 30: Train Loss:	11.5078	Val Loss:	1.3374	Accuracy:	0.5777	F1:	0.5519 *
3 / 30: Train Loss:	4.9419	Val Loss:	1.1115	Accuracy:	0.6543	F1:	0.6404 *
4 / 30: Train Loss:	2.4948	Val Loss:	0.9952	Accuracy:	0.6674	F1:	0.6597 *
5 / 30: Train Loss:	1.2027	Val Loss:	0.9343	Accuracy:	0.7155	F1:	0.7126 *
6 / 30: Train Loss:	0.6009	Val Loss:	0.8813	Accuracy:	0.7155	F1:	0.7099
7 / 30: Train Loss:	0.3988	Val Loss:	0.9104	Accuracy:	0.7068	F1:	0.7012
8 / 30: Train Loss:	0.2287	Val Loss:	0.8626	Accuracy:	0.7199	F1:	0.7183 *
9 / 30: Train Loss:	0.1779	Val Loss:	0.8560	Accuracy:	0.7177	F1:	0.7072
10 / 30: Train Loss:	0.0951	Val Loss:	0.8309	Accuracy:	0.7352	F1:	0.7256 *
11 / 30: Train Loss:	0.0695	Val Loss:	0.8607	Accuracy:	0.7330	F1:	0.7156
12 / 30: Train Loss:	0.0662	Val Loss:	0.8494	Accuracy:	0.7243	F1:	0.7170
13 / 30: Train Loss:	0.0421	Val Loss:	0.8227	Accuracy:	0.7330	F1:	0.7278 *
14 / 30: Train Loss:	0.0205	Val Loss:	0.82

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.bias', 'classifier.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	20.8805	Val Loss:	2.0141	Accuracy:	0.3348	F1:	0.2547 *
2 / 30: Train Loss:	10.2292	Val Loss:	1.2911	Accuracy:	0.6193	F1:	0.5923 *
3 / 30: Train Loss:	4.2030	Val Loss:	1.1167	Accuracy:	0.6389	F1:	0.6179 *
4 / 30: Train Loss:	1.9775	Val Loss:	1.0339	Accuracy:	0.6499	F1:	0.6522 *
5 / 30: Train Loss:	1.1588	Val Loss:	0.9071	Accuracy:	0.7046	F1:	0.7013 *
6 / 30: Train Loss:	0.5947	Val Loss:	0.9418	Accuracy:	0.6980	F1:	0.6912
7 / 30: Train Loss:	0.6196	Val Loss:	0.9151	Accuracy:	0.7155	F1:	0.6972
8 / 30: Train Loss:	0.3262	Val Loss:	0.9448	Accuracy:	0.7002	F1:	0.6899
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.74      0.69      0.71        67
               anecdote       0.95      0.86      0.90        43
         cherry picking       0.60      0.79      0.68        56
      conspiracy theory       0.79      0.69      0.74        3

In [5]:
import pandas as pd

In [6]:
pd.DataFrame(results)

Unnamed: 0,test_acc,test_f1,eval_acc,eval_f1,wd
0,0.652344,0.636726,0.702407,0.671951,0.01
1,0.632812,0.624441,0.684902,0.661278,0.1
2,0.03125,0.005051,0.030635,0.004954,0.01
3,0.027344,0.004436,0.028446,0.00461,0.1
4,0.601562,0.570647,0.582057,0.577425,0.01
5,0.539062,0.504938,0.555799,0.533732,0.1
6,0.488281,0.464992,0.509847,0.508219,0.01
7,0.507812,0.511482,0.520788,0.52366,0.1
8,0.296875,0.213467,0.28884,0.196232,0.01
9,0.300781,0.215178,0.280088,0.187681,0.1
