In [1]:
import os
os.environ['TRANSFORMERS_CACHE'] = '/mnt/software/'
import sys
import gc
# assuming data, models, engine in flicc directory:
flicc_path = os.path.join(os.path.dirname(os.getcwd()), '')
sys.path.append(flicc_path)
import torch
from data import ClimateDataset
from models import ClassificationModel
from engine import Engine

# import warnings
# warnings.filterwarnings("ignore")

In [2]:
best_config = {'bert-base-uncased':{'lr':5.0e-5, 'wd':0.0},
                'roberta-large':{'lr':5.0e-5, 'focalloss':True, 'gamma':8, 'wd':0.0},
                'gpt2':{'lr':5.0e-5, 'wd':0.01},
                'bigscience/bloom-560m':{'lr':5.0e-5, 'focalloss':True, 'gamma':8, 'wd':0.0},
                'facebook/opt-350m':{'lr':1.0e-5, 'wd':0.0},
                'EleutherAI/gpt-neo-1.3B':{'lr':5.0e-5, 'wd':0.0}, 
                'microsoft/deberta-base':{'lr':1.0e-5, 'wd':0.01},
                'microsoft/deberta-v2-xlarge':{'lr':1.0e-5, 'focalloss':True, 'gamma':4, 'wd':0.01}}

In [3]:
results = {'test_acc':[],
           'test_f1':[],
           'eval_acc':[],
           'eval_f1':[],
           'model':[]}

In [4]:
for model_checkpoint in [list(best_config.keys())[-1]]:
    print(f'Grid search {model_checkpoint}, learning rate {best_config[model_checkpoint]["lr"]}')
    data = ClimateDataset(model_to_train=4,model_checkpoint=model_checkpoint,dataset_url=flicc_path,batch_size=32)
    data.setup_dataloaders()
    model = ClassificationModel(model_checkpoint=data.model_checkpoint,num_labels=data.num_labels)
    trainer = Engine(epochs=30,labels=data.labels)
    trainer.model = model.model
    trainer.dataset_encoded = data.dataset_encoded
    test_acc, test_f1, eval_acc, eval_f1 = trainer.run(**best_config[model_checkpoint],
                                                        train_dataloader=data.train_dataloader,
                                                        eval_dataloader=data.eval_dataloader,
                                                        test_dataloader=data.test_dataloader,
                                                        early_stop=3)
    results['test_acc'].append(test_acc)
    results['test_f1'].append(test_f1)
    results['eval_acc'].append(eval_acc)
    results['eval_f1'].append(eval_f1)
    results['model'].append(model_checkpoint)
    print('### '*10)
    print(results)
    print('### '*10)

Grid search microsoft/deberta-v2-xlarge, learning rate 1e-05


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xlarge and are newly initialized: ['pooler.dense.bias', 'classifier.bias', 'classifier.weight', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1 / 30: Train Loss:	21.0389	Val Loss:	2.1833	Accuracy:	0.2932	F1:	0.2272 *
2 / 30: Train Loss:	11.5078	Val Loss:	1.3374	Accuracy:	0.5777	F1:	0.5519 *
3 / 30: Train Loss:	4.9419	Val Loss:	1.1115	Accuracy:	0.6543	F1:	0.6404 *
4 / 30: Train Loss:	2.4948	Val Loss:	0.9952	Accuracy:	0.6674	F1:	0.6597 *
5 / 30: Train Loss:	1.2027	Val Loss:	0.9343	Accuracy:	0.7155	F1:	0.7126 *
6 / 30: Train Loss:	0.6009	Val Loss:	0.8813	Accuracy:	0.7155	F1:	0.7099
7 / 30: Train Loss:	0.3988	Val Loss:	0.9104	Accuracy:	0.7068	F1:	0.7012
8 / 30: Train Loss:	0.2287	Val Loss:	0.8626	Accuracy:	0.7199	F1:	0.7183 *
9 / 30: Train Loss:	0.1779	Val Loss:	0.8560	Accuracy:	0.7177	F1:	0.7072
10 / 30: Train Loss:	0.0951	Val Loss:	0.8309	Accuracy:	0.7352	F1:	0.7256 *
11 / 30: Train Loss:	0.0695	Val Loss:	0.8607	Accuracy:	0.7330	F1:	0.7156
12 / 30: Train Loss:	0.0662	Val Loss:	0.8494	Accuracy:	0.7243	F1:	0.7170
13 / 30: Train Loss:	0.0421	Val Loss:	0.8227	Accuracy:	0.7330	F1:	0.7278 *
14 / 30: Train Loss:	0.0205	Val Loss:	0.82

In [5]:
trainer.save_best_model()