In [1]:
import os
os.environ['TRANSFORMERS_CACHE'] = '/mnt/software/'
import sys
import gc
# assuming data, models, engine in flicc directory:
flicc_path = os.path.join(os.path.dirname(os.getcwd()), '')
sys.path.append(flicc_path)
import torch
from data import ClimateDataset
from models import ClassificationModel
from engine import Engine

# import warnings
# warnings.filterwarnings("ignore")

In [2]:
best_config = {'bert-base-uncased':{'lr':5.0e-5, 'wd':0.0},
                'roberta-large':{'lr':5.0e-5, 'focalloss':True, 'gamma':8, 'wd':0.0},
                'gpt2':{'lr':5.0e-5, 'wd':0.01},
                'bigscience/bloom-560m':{'lr':5.0e-5, 'focalloss':True, 'gamma':8, 'wd':0.0},
                'facebook/opt-350m':{'lr':1.0e-5, 'wd':0.0},
                'EleutherAI/gpt-neo-1.3B':{'lr':5.0e-5, 'wd':0.0}, 
                'microsoft/deberta-base':{'lr':1.0e-5, 'wd':0.01},
                'microsoft/deberta-v2-xlarge':{'lr':1.0e-5, 'focalloss':True, 'gamma':4, 'wd':0.01}}

In [3]:
results = {'test_acc':[],
           'test_f1':[],
           'eval_acc':[],
           'eval_f1':[],
           'rank, alpha':[],
           'model':[]}

In [4]:
alphas = [8, 16]
ranks = [8, 16]

for model_checkpoint in best_config.keys():
    for a,r in zip(alphas, ranks):
        print(f'Grid search {model_checkpoint}, learning rate {best_config[model_checkpoint]["lr"]}')
        data = ClimateDataset(model_to_train=4,model_checkpoint=model_checkpoint,dataset_url=flicc_path,batch_size=32)
        data.setup_dataloaders()
        model = ClassificationModel(model_checkpoint=data.model_checkpoint,
                                    num_labels=data.num_labels,
                                    lora=True,
                                    alpha=a,
                                    r=r,
                                    dropout=0.0)
        trainer = Engine(epochs=30,labels=data.labels)
        trainer.model = model.model
        trainer.dataset_encoded = data.dataset_encoded
        test_acc, test_f1, eval_acc, eval_f1 = trainer.run(**best_config[model_checkpoint],
                                                            train_dataloader=data.train_dataloader,
                                                            eval_dataloader=data.eval_dataloader,
                                                            test_dataloader=data.test_dataloader,
                                                            early_stop=3)
        results['test_acc'].append(test_acc)
        results['test_f1'].append(test_f1)
        results['eval_acc'].append(eval_acc)
        results['eval_f1'].append(eval_f1)
        results['rank, alpha'].append(r)
        results['model'].append(model_checkpoint)
        print('### '*10)
        print(results)
        print('### '*10)
        with torch.no_grad():
            torch.cuda.empty_cache()
        del data, model, trainer
        gc.collect()

Grid search bert-base-uncased, learning rate 5e-05


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.4386	Val Loss:	2.3611	Accuracy:	0.1554	F1:	0.0359 *
2 / 30: Train Loss:	2.3651	Val Loss:	2.3339	Accuracy:	0.1619	F1:	0.0429 *
3 / 30: Train Loss:	2.3422	Val Loss:	2.2988	Accuracy:	0.1860	F1:	0.0723 *
4 / 30: Train Loss:	2.2950	Val Loss:	2.2318	Accuracy:	0.2276	F1:	0.1018 *
5 / 30: Train Loss:	2.2238	Val Loss:	2.1524	Accuracy:	0.2604	F1:	0.1201 *
6 / 30: Train Loss:	2.1352	Val Loss:	2.0615	Accuracy:	0.2845	F1:	0.1507 *
7 / 30: Train Loss:	2.0462	Val Loss:	1.9746	Accuracy:	0.3129	F1:	0.1741 *
8 / 30: Train Loss:	1.9507	Val Loss:	1.8959	Accuracy:	0.3370	F1:	0.1940 *
9 / 30: Train Loss:	1.8689	Val Loss:	1.8234	Accuracy:	0.3545	F1:	0.2184 *
10 / 30: Train Loss:	1.8023	Val Loss:	1.7666	Accuracy:	0.3851	F1:	0.2433 *
11 / 30: Train Loss:	1.7494	Val Loss:	1.7254	Accuracy:	0.4004	F1:	0.2604 *
12 / 30: Train Loss:	1.6952	Val Loss:	1.6919	Accuracy:	0.4267	F1:	0.2891 *
13 / 30: Train Loss:	1.6663	Val Loss:	1.6681	Accuracy:	0.4354	F1:	0.2930 *
14 / 30: Train Loss:	1.6428	Val Lo

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.4376	Val Loss:	2.3581	Accuracy:	0.1554	F1:	0.0357 *
2 / 30: Train Loss:	2.3594	Val Loss:	2.3196	Accuracy:	0.1707	F1:	0.0524 *
3 / 30: Train Loss:	2.3158	Val Loss:	2.2533	Accuracy:	0.2254	F1:	0.0935 *
4 / 30: Train Loss:	2.2332	Val Loss:	2.1610	Accuracy:	0.2560	F1:	0.1147 *
5 / 30: Train Loss:	2.1414	Val Loss:	2.0686	Accuracy:	0.2779	F1:	0.1435 *
6 / 30: Train Loss:	2.0354	Val Loss:	1.9806	Accuracy:	0.3173	F1:	0.1778 *
7 / 30: Train Loss:	1.9464	Val Loss:	1.8908	Accuracy:	0.3567	F1:	0.2157 *
8 / 30: Train Loss:	1.8515	Val Loss:	1.8104	Accuracy:	0.3742	F1:	0.2428 *
9 / 30: Train Loss:	1.7724	Val Loss:	1.7469	Accuracy:	0.3982	F1:	0.2645 *
10 / 30: Train Loss:	1.7145	Val Loss:	1.7007	Accuracy:	0.4114	F1:	0.2837 *
11 / 30: Train Loss:	1.6641	Val Loss:	1.6616	Accuracy:	0.4289	F1:	0.3010 *
12 / 30: Train Loss:	1.6072	Val Loss:	1.6463	Accuracy:	0.4267	F1:	0.3069 *
13 / 30: Train Loss:	1.5771	Val Loss:	1.6105	Accuracy:	0.4398	F1:	0.3134 *
14 / 30: Train Loss:	1.5452	Val Lo

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	15.1312	Val Loss:	2.4830	Accuracy:	0.0328	F1:	0.0189 *
2 / 30: Train Loss:	14.8905	Val Loss:	2.4209	Accuracy:	0.1466	F1:	0.1300 *
3 / 30: Train Loss:	11.8537	Val Loss:	1.9710	Accuracy:	0.3632	F1:	0.3666 *
4 / 30: Train Loss:	7.2394	Val Loss:	1.6302	Accuracy:	0.5383	F1:	0.5128 *
5 / 30: Train Loss:	5.0536	Val Loss:	1.4881	Accuracy:	0.5864	F1:	0.5517 *
6 / 30: Train Loss:	4.0893	Val Loss:	1.4126	Accuracy:	0.6039	F1:	0.5800 *
7 / 30: Train Loss:	3.1532	Val Loss:	1.3103	Accuracy:	0.6389	F1:	0.6077 *
8 / 30: Train Loss:	2.7447	Val Loss:	1.3031	Accuracy:	0.6411	F1:	0.6184 *
9 / 30: Train Loss:	2.3744	Val Loss:	1.2422	Accuracy:	0.6389	F1:	0.6228 *
10 / 30: Train Loss:	2.1588	Val Loss:	1.2075	Accuracy:	0.6674	F1:	0.6523 *
11 / 30: Train Loss:	1.7137	Val Loss:	1.1901	Accuracy:	0.6608	F1:	0.6468
12 / 30: Train Loss:	1.5767	Val Loss:	1.1745	Accuracy:	0.6696	F1:	0.6468
13 / 30: Train Loss:	1.3938	Val Loss:	1.1429	Accuracy:	0.6761	F1:	0.6543 *
14 / 30: Train Loss:	1.2717	Val Los

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	15.1407	Val Loss:	2.4814	Accuracy:	0.0350	F1:	0.0216 *
2 / 30: Train Loss:	14.6096	Val Loss:	2.3184	Accuracy:	0.2123	F1:	0.2138 *
3 / 30: Train Loss:	9.2426	Val Loss:	1.7867	Accuracy:	0.4245	F1:	0.4337 *
4 / 30: Train Loss:	5.7763	Val Loss:	1.5317	Accuracy:	0.5996	F1:	0.5677 *
5 / 30: Train Loss:	4.2387	Val Loss:	1.4097	Accuracy:	0.6105	F1:	0.5866 *
6 / 30: Train Loss:	3.4006	Val Loss:	1.3518	Accuracy:	0.6236	F1:	0.6006 *
7 / 30: Train Loss:	2.6459	Val Loss:	1.2700	Accuracy:	0.6565	F1:	0.6336 *
8 / 30: Train Loss:	2.3657	Val Loss:	1.2621	Accuracy:	0.6586	F1:	0.6431 *
9 / 30: Train Loss:	2.0616	Val Loss:	1.2102	Accuracy:	0.6433	F1:	0.6290
10 / 30: Train Loss:	1.8194	Val Loss:	1.1851	Accuracy:	0.6630	F1:	0.6470 *
11 / 30: Train Loss:	1.4660	Val Loss:	1.1528	Accuracy:	0.6718	F1:	0.6571 *
12 / 30: Train Loss:	1.3156	Val Loss:	1.1371	Accuracy:	0.6718	F1:	0.6459
13 / 30: Train Loss:	1.1863	Val Loss:	1.1277	Accuracy:	0.6718	F1:	0.6452
14 / 30: Train Loss:	1.1140	Val Loss:	

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	5.4689	Val Loss:	3.6016	Accuracy:	0.1028	F1:	0.0501 *
2 / 30: Train Loss:	3.2795	Val Loss:	3.0685	Accuracy:	0.1116	F1:	0.0579 *
3 / 30: Train Loss:	2.8956	Val Loss:	2.8149	Accuracy:	0.1204	F1:	0.0629 *
4 / 30: Train Loss:	2.7121	Val Loss:	2.6848	Accuracy:	0.1357	F1:	0.0729 *
5 / 30: Train Loss:	2.6753	Val Loss:	2.6046	Accuracy:	0.1335	F1:	0.0763 *
6 / 30: Train Loss:	2.6194	Val Loss:	2.5411	Accuracy:	0.1422	F1:	0.0854 *
7 / 30: Train Loss:	2.5153	Val Loss:	2.4992	Accuracy:	0.1444	F1:	0.0866 *
8 / 30: Train Loss:	2.4712	Val Loss:	2.4763	Accuracy:	0.1554	F1:	0.0934 *
9 / 30: Train Loss:	2.4511	Val Loss:	2.4519	Accuracy:	0.1641	F1:	0.0995 *
10 / 30: Train Loss:	2.4300	Val Loss:	2.4301	Accuracy:	0.1729	F1:	0.1057 *
11 / 30: Train Loss:	2.4037	Val Loss:	2.4111	Accuracy:	0.1838	F1:	0.1112 *
12 / 30: Train Loss:	2.4065	Val Loss:	2.3996	Accuracy:	0.1772	F1:	0.1052
13 / 30: Train Loss:	2.3741	Val Loss:	2.3851	Accuracy:	0.1751	F1:	0.1044
14 / 30: Train Loss:	2.3713	Val Loss:	

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	5.0242	Val Loss:	3.4205	Accuracy:	0.1028	F1:	0.0459 *
2 / 30: Train Loss:	3.0267	Val Loss:	2.8438	Accuracy:	0.1291	F1:	0.0675 *
3 / 30: Train Loss:	2.7676	Val Loss:	2.6795	Accuracy:	0.1357	F1:	0.0767 *
4 / 30: Train Loss:	2.6261	Val Loss:	2.5840	Accuracy:	0.1400	F1:	0.0847 *
5 / 30: Train Loss:	2.5984	Val Loss:	2.5267	Accuracy:	0.1488	F1:	0.0915 *
6 / 30: Train Loss:	2.5451	Val Loss:	2.4769	Accuracy:	0.1554	F1:	0.0968 *
7 / 30: Train Loss:	2.4658	Val Loss:	2.4495	Accuracy:	0.1575	F1:	0.0978 *
8 / 30: Train Loss:	2.4342	Val Loss:	2.4358	Accuracy:	0.1532	F1:	0.0925
9 / 30: Train Loss:	2.4172	Val Loss:	2.4173	Accuracy:	0.1685	F1:	0.1008 *
10 / 30: Train Loss:	2.4016	Val Loss:	2.4024	Accuracy:	0.1707	F1:	0.1010 *
11 / 30: Train Loss:	2.3854	Val Loss:	2.3838	Accuracy:	0.1707	F1:	0.0981
12 / 30: Train Loss:	2.3811	Val Loss:	2.3801	Accuracy:	0.1751	F1:	0.1010
13 / 30: Train Loss:	2.3583	Val Loss:	2.3676	Accuracy:	0.1838	F1:	0.1071 *
14 / 30: Train Loss:	2.3560	Val Loss:	2.

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	430.4662	Val Loss:	20.9324	Accuracy:	0.1116	F1:	0.0667 *
2 / 30: Train Loss:	139.4984	Val Loss:	6.1853	Accuracy:	0.1444	F1:	0.1254 *
3 / 30: Train Loss:	40.1736	Val Loss:	3.4558	Accuracy:	0.2057	F1:	0.2028 *
4 / 30: Train Loss:	24.2939	Val Loss:	2.8245	Accuracy:	0.2429	F1:	0.2354 *
5 / 30: Train Loss:	18.1128	Val Loss:	2.5375	Accuracy:	0.2582	F1:	0.2472 *
6 / 30: Train Loss:	14.6536	Val Loss:	2.3664	Accuracy:	0.2735	F1:	0.2714 *
7 / 30: Train Loss:	12.4245	Val Loss:	2.2279	Accuracy:	0.3042	F1:	0.3075 *
8 / 30: Train Loss:	10.5713	Val Loss:	2.1261	Accuracy:	0.3304	F1:	0.3419 *
9 / 30: Train Loss:	9.1347	Val Loss:	2.0580	Accuracy:	0.3523	F1:	0.3621 *
10 / 30: Train Loss:	8.0718	Val Loss:	1.9931	Accuracy:	0.3501	F1:	0.3615
11 / 30: Train Loss:	7.0161	Val Loss:	1.9399	Accuracy:	0.3589	F1:	0.3709 *
12 / 30: Train Loss:	6.2128	Val Loss:	1.8907	Accuracy:	0.3829	F1:	0.4058 *
13 / 30: Train Loss:	5.4675	Val Loss:	1.8419	Accuracy:	0.3807	F1:	0.3968
14 / 30: Train Loss:	4.8343

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloom-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	413.8233	Val Loss:	19.3123	Accuracy:	0.1007	F1:	0.0578 *
2 / 30: Train Loss:	117.1577	Val Loss:	4.8379	Accuracy:	0.1772	F1:	0.1591 *
3 / 30: Train Loss:	30.0735	Val Loss:	2.9307	Accuracy:	0.2276	F1:	0.2075 *
4 / 30: Train Loss:	18.6807	Val Loss:	2.5502	Accuracy:	0.2495	F1:	0.2307 *
5 / 30: Train Loss:	14.1748	Val Loss:	2.3226	Accuracy:	0.2779	F1:	0.2714 *
6 / 30: Train Loss:	11.4957	Val Loss:	2.1631	Accuracy:	0.3129	F1:	0.3057 *
7 / 30: Train Loss:	9.5099	Val Loss:	2.0695	Accuracy:	0.3217	F1:	0.3300 *
8 / 30: Train Loss:	8.0628	Val Loss:	2.0139	Accuracy:	0.3326	F1:	0.3450 *
9 / 30: Train Loss:	6.9235	Val Loss:	1.9767	Accuracy:	0.3589	F1:	0.3759 *
10 / 30: Train Loss:	5.9599	Val Loss:	1.9407	Accuracy:	0.3698	F1:	0.3927 *
11 / 30: Train Loss:	5.2617	Val Loss:	1.9044	Accuracy:	0.3764	F1:	0.3895
12 / 30: Train Loss:	4.5082	Val Loss:	1.8330	Accuracy:	0.3895	F1:	0.4029 *
13 / 30: Train Loss:	3.7308	Val Loss:	1.7574	Accuracy:	0.4070	F1:	0.4189 *
14 / 30: Train Loss:	3.1319

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	3.2485	Val Loss:	2.9195	Accuracy:	0.1050	F1:	0.0545 *
2 / 30: Train Loss:	2.6512	Val Loss:	2.5914	Accuracy:	0.1379	F1:	0.0620 *
3 / 30: Train Loss:	2.5340	Val Loss:	2.5324	Accuracy:	0.1357	F1:	0.0671 *
4 / 30: Train Loss:	2.4906	Val Loss:	2.5009	Accuracy:	0.1379	F1:	0.0730 *
5 / 30: Train Loss:	2.4638	Val Loss:	2.4754	Accuracy:	0.1466	F1:	0.0840 *
6 / 30: Train Loss:	2.4356	Val Loss:	2.4528	Accuracy:	0.1313	F1:	0.0762
7 / 30: Train Loss:	2.4317	Val Loss:	2.4359	Accuracy:	0.1291	F1:	0.0740
8 / 30: Train Loss:	2.4080	Val Loss:	2.4185	Accuracy:	0.1225	F1:	0.0688
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.17      0.48      0.25        67
               anecdote       0.06      0.07      0.06        43
         cherry picking       0.16      0.23      0.19        56
      conspiracy theory       0.50      0.05      0.09        39


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	3.1162	Val Loss:	2.7311	Accuracy:	0.1094	F1:	0.0567 *
2 / 30: Train Loss:	2.5637	Val Loss:	2.5549	Accuracy:	0.1357	F1:	0.0655 *
3 / 30: Train Loss:	2.5094	Val Loss:	2.5164	Accuracy:	0.1247	F1:	0.0648
4 / 30: Train Loss:	2.4706	Val Loss:	2.4868	Accuracy:	0.1291	F1:	0.0789 *
5 / 30: Train Loss:	2.4491	Val Loss:	2.4568	Accuracy:	0.1182	F1:	0.0721
6 / 30: Train Loss:	2.4113	Val Loss:	2.4284	Accuracy:	0.1182	F1:	0.0668
7 / 30: Train Loss:	2.4084	Val Loss:	2.4092	Accuracy:	0.1335	F1:	0.0780
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.15      0.42      0.22        67
               anecdote       0.06      0.07      0.06        43
         cherry picking       0.13      0.18      0.15        56
      conspiracy theory       0.50      0.05      0.09        39
           fake experts       0.00      0.00      0.00        12
           

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	3.0822	Val Loss:	2.7442	Accuracy:	0.1269	F1:	0.0902 *
2 / 30: Train Loss:	2.5431	Val Loss:	2.4945	Accuracy:	0.1751	F1:	0.1308 *
3 / 30: Train Loss:	2.2355	Val Loss:	2.2893	Accuracy:	0.2429	F1:	0.1873 *
4 / 30: Train Loss:	1.9664	Val Loss:	2.1344	Accuracy:	0.2932	F1:	0.2359 *
5 / 30: Train Loss:	1.7230	Val Loss:	2.0176	Accuracy:	0.3239	F1:	0.2628 *
6 / 30: Train Loss:	1.4887	Val Loss:	1.9396	Accuracy:	0.3501	F1:	0.2855 *
7 / 30: Train Loss:	1.2550	Val Loss:	1.9004	Accuracy:	0.3742	F1:	0.3239 *
8 / 30: Train Loss:	1.0152	Val Loss:	1.9086	Accuracy:	0.4004	F1:	0.3511 *
9 / 30: Train Loss:	0.7838	Val Loss:	1.9387	Accuracy:	0.4092	F1:	0.3627 *
10 / 30: Train Loss:	0.5639	Val Loss:	1.9781	Accuracy:	0.4048	F1:	0.3636 *
11 / 30: Train Loss:	0.3850	Val Loss:	2.0031	Accuracy:	0.4026	F1:	0.3602
12 / 30: Train Loss:	0.2544	Val Loss:	2.0253	Accuracy:	0.4114	F1:	0.3691 *
13 / 30: Train Loss:	0.1700	Val Loss:	2.0366	Accuracy:	0.4092	F1:	0.3669
14 / 30: Train Loss:	0.1193	Val Loss:	

Some weights of GPTNeoForSequenceClassification were not initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	3.0576	Val Loss:	2.7264	Accuracy:	0.1269	F1:	0.0907 *
2 / 30: Train Loss:	2.4906	Val Loss:	2.4446	Accuracy:	0.1882	F1:	0.1435 *
3 / 30: Train Loss:	2.1388	Val Loss:	2.2251	Accuracy:	0.2626	F1:	0.2000 *
4 / 30: Train Loss:	1.8230	Val Loss:	2.0675	Accuracy:	0.3173	F1:	0.2458 *
5 / 30: Train Loss:	1.5114	Val Loss:	1.9606	Accuracy:	0.3457	F1:	0.2740 *
6 / 30: Train Loss:	1.1901	Val Loss:	1.9346	Accuracy:	0.3829	F1:	0.3226 *
7 / 30: Train Loss:	0.8686	Val Loss:	1.9646	Accuracy:	0.3786	F1:	0.3321 *
8 / 30: Train Loss:	0.5790	Val Loss:	2.0049	Accuracy:	0.3764	F1:	0.3349 *
9 / 30: Train Loss:	0.3521	Val Loss:	2.0475	Accuracy:	0.3851	F1:	0.3539 *
10 / 30: Train Loss:	0.2136	Val Loss:	2.0648	Accuracy:	0.4026	F1:	0.3692 *
11 / 30: Train Loss:	0.1346	Val Loss:	2.0773	Accuracy:	0.3632	F1:	0.3357
12 / 30: Train Loss:	0.0831	Val Loss:	2.0891	Accuracy:	0.3829	F1:	0.3366
13 / 30: Train Loss:	0.0571	Val Loss:	2.0997	Accuracy:	0.3851	F1:	0.3360
No improvement for 3 epochs. Stopping ea

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.4831	Val Loss:	2.4771	Accuracy:	0.1247	F1:	0.0185 *
2 / 30: Train Loss:	2.4729	Val Loss:	2.4660	Accuracy:	0.1247	F1:	0.0185
3 / 30: Train Loss:	2.4631	Val Loss:	2.4561	Accuracy:	0.1247	F1:	0.0185
4 / 30: Train Loss:	2.4535	Val Loss:	2.4472	Accuracy:	0.1247	F1:	0.0185
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.00      0.00      0.00        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.00      0.00      0.00        13
      false equivalence       0.00      0.00      0.00        14
impossible expectations       0.00      0.00      0.00        37
      misrepresentation       0.00  

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	2.4831	Val Loss:	2.4770	Accuracy:	0.1247	F1:	0.0185 *
2 / 30: Train Loss:	2.4728	Val Loss:	2.4659	Accuracy:	0.1247	F1:	0.0185
3 / 30: Train Loss:	2.4629	Val Loss:	2.4560	Accuracy:	0.1247	F1:	0.0185
4 / 30: Train Loss:	2.4533	Val Loss:	2.4470	Accuracy:	0.1247	F1:	0.0185
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem       0.00      0.00      0.00        67
               anecdote       0.00      0.00      0.00        43
         cherry picking       0.00      0.00      0.00        56
      conspiracy theory       0.00      0.00      0.00        39
           fake experts       0.00      0.00      0.00        12
           false choice       0.00      0.00      0.00        13
      false equivalence       0.00      0.00      0.00        14
impossible expectations       0.00      0.00      0.00        37
      misrepresentation       0.00  

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	21.8202	Val Loss:	2.4792	Accuracy:	0.1050	F1:	0.0430 *
2 / 30: Train Loss:	21.5403	Val Loss:	2.4746	Accuracy:	0.1050	F1:	0.0468 *
3 / 30: Train Loss:	21.3193	Val Loss:	2.4705	Accuracy:	0.1116	F1:	0.0517 *
4 / 30: Train Loss:	21.2921	Val Loss:	2.4683	Accuracy:	0.1204	F1:	0.0603 *
5 / 30: Train Loss:	21.1363	Val Loss:	2.4666	Accuracy:	0.1138	F1:	0.0603
6 / 30: Train Loss:	21.1976	Val Loss:	2.4650	Accuracy:	0.1204	F1:	0.0652 *
7 / 30: Train Loss:	21.2473	Val Loss:	2.4629	Accuracy:	0.1204	F1:	0.0752 *
8 / 30: Train Loss:	20.9751	Val Loss:	2.4610	Accuracy:	0.1313	F1:	0.0893 *
9 / 30: Train Loss:	20.9785	Val Loss:	2.4598	Accuracy:	0.1313	F1:	0.0832
10 / 30: Train Loss:	21.0905	Val Loss:	2.4594	Accuracy:	0.1291	F1:	0.0843
11 / 30: Train Loss:	20.9729	Val Loss:	2.4587	Accuracy:	0.1204	F1:	0.0758
No improvement for 3 epochs. Stopping early.
best (higgest macro f1-score) val results:
                         precision    recall  f1-score   support

             ad hominem    

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['classifier.weight', 'pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	21.8170	Val Loss:	2.4787	Accuracy:	0.1028	F1:	0.0414 *
2 / 30: Train Loss:	21.5306	Val Loss:	2.4738	Accuracy:	0.1050	F1:	0.0467 *
3 / 30: Train Loss:	21.3093	Val Loss:	2.4694	Accuracy:	0.1138	F1:	0.0552 *
4 / 30: Train Loss:	21.2677	Val Loss:	2.4671	Accuracy:	0.1182	F1:	0.0605 *
5 / 30: Train Loss:	21.1078	Val Loss:	2.4653	Accuracy:	0.1182	F1:	0.0633 *
6 / 30: Train Loss:	21.1736	Val Loss:	2.4634	Accuracy:	0.1247	F1:	0.0772 *
7 / 30: Train Loss:	21.2132	Val Loss:	2.4605	Accuracy:	0.1335	F1:	0.0838 *
8 / 30: Train Loss:	20.9446	Val Loss:	2.4580	Accuracy:	0.1291	F1:	0.0816
9 / 30: Train Loss:	20.9299	Val Loss:	2.4562	Accuracy:	0.1357	F1:	0.0920 *
10 / 30: Train Loss:	21.0513	Val Loss:	2.4555	Accuracy:	0.1335	F1:	0.0921 *
11 / 30: Train Loss:	20.8897	Val Loss:	2.4534	Accuracy:	0.1335	F1:	0.0928 *
12 / 30: Train Loss:	20.8101	Val Loss:	2.4517	Accuracy:	0.1225	F1:	0.0858
13 / 30: Train Loss:	21.0208	Val Loss:	2.4491	Accuracy:	0.1225	F1:	0.0888
14 / 30: Train Loss:	20.779

In [5]:
import pandas as pd

In [6]:
pd.DataFrame(results)

Unnamed: 0,test_acc,test_f1,eval_acc,eval_f1,"rank, alpha",model
0,0.484375,0.364616,0.516411,0.389022,8,bert-base-uncased
1,0.484375,0.366146,0.514223,0.393118,16,bert-base-uncased
2,0.628906,0.602685,0.689278,0.673936,8,roberta-large
3,0.667969,0.636544,0.682713,0.671765,16,roberta-large
4,0.175781,0.098885,0.183807,0.111217,8,gpt2
5,0.386719,0.301042,0.411379,0.326773,16,gpt2
6,0.457031,0.435943,0.479212,0.487516,8,bigscience/bloom-560m
7,0.46875,0.438304,0.507659,0.51458,16,bigscience/bloom-560m
8,0.140625,0.071074,0.146608,0.083974,8,facebook/opt-350m
9,0.136719,0.066688,0.129103,0.078936,16,facebook/opt-350m
