In [1]:
# Only needed in pycharm
import os
os.chdir('/home/cs-folq1/src/sts_bert/sBERT')

In [2]:
import torch
from torch.utils.data import DataLoader

from BiEncoder import BiEncoder
from CrossEncoder import CrossEncoder
from STSTrainer import STSTrainer
from Datasets import load_sts
from GridRun import GridRun, random_sample

In [3]:
dataset = load_sts()

Reusing dataset stsb_multi_mt (/home/cs-folq1/.cache/huggingface/datasets/stsb_multi_mt/en/1.0.0/bc6de0eaa8d97c28a4c22a07e851b05879ae62c60b0b69dd6b331339e8020f07)


In [4]:
def run_experiment(config):
    subset_indices = random_sample(n=len(dataset['train']), k=config['train_size'],
                                   seed=config['train_subset_seed'])
    train_dataset_subset = torch.utils.data.Subset(dataset['train'], subset_indices)

    encoder, mode = config['mode'].split('/')
    if encoder == 'bi-encoder':
        model = BiEncoder(mode=mode, head='cos-sim')
    else:
        model = CrossEncoder(mode=mode)

    trainer = STSTrainer(model=model, train_dataset=train_dataset_subset, dataset=dataset,
                         num_epochs=config['num_epochs'], batch_size=config['batch_size'],
                         lr=config['lr'], lr_scheduler=config['lr_scheduler'],
                         warmup_percent=config['warmup_percent'])
    result = trainer.train(disable_progress_bar=False)
    save_name = encoder + '_' + mode
    return result, model, save_name


grid = {
    'num_epochs': 10,
    'batch_size': 16,
    'lr': 2e-5,
    'lr_scheduler': 'linear',
    'warmup_percent': 0.2,
    'mode': [
             'bi-encoder/nli-cls-pooling',
             'bi-encoder/nli-mean-pooling',
             'bi-encoder/nli-linear-pooling'],
    #'train_size': [500, 1000, 2000, 3000, 4000, len(dataset['train'])],
    'train_size': [len(dataset['train'])],
    'train_subset_seed': [1, 2, 3]
}

grid_run = GridRun(run_experiment, results_dir='results', experiment_name='pretraining_sts')
#grid_run = GridRun(run_experiment)
grid_run.run(grid, save_best=True, ignore_previous_results=True)
# grid_run.df_results

Loading previous results from results/pretraining_sts/results_210707_152607.csv
Results will be stored in file results_210708_100708
Already done: {'batch_size': [16.0], 'lr': [2e-05], 'lr_scheduler': ['linear'], 'mode': ['bi-encoder/nli-cls-pooling'], 'num_epochs': [10.0], 'test_score': [0.8376018218342469], 'train_size': [5749.0], 'train_subset_seed': [1.0], 'warmup_percent': [0.2]}
Repeating experiment
----------
RUN CONFIG
----------
num_epochs :  10
batch_size :  16
lr :  2e-05
lr_scheduler :  linear
warmup_percent :  0.2
mode :  bi-encoder/nli-cls-pooling
train_size :  5749
train_subset_seed :  1


  0%|          | 0/360 [00:00<?, ?it/s]

Scheduler type: linear, epochs: 10, steps per epoch: 360, total steps: 3600, warmup steps: 720


100%|██████████| 360/360 [00:49<00:00,  7.26it/s]
100%|██████████| 94/94 [00:03<00:00, 23.57it/s]
  0%|          | 0/360 [00:00<?, ?it/s]

Epoch 1   : loss: 0.0378  , score: 0.8405  *


100%|██████████| 360/360 [00:49<00:00,  7.30it/s]
100%|██████████| 94/94 [00:03<00:00, 23.68it/s]
  0%|          | 1/360 [00:00<00:46,  7.77it/s]

Epoch 2   : loss: 0.0322  , score: 0.8593  *


100%|██████████| 360/360 [00:49<00:00,  7.34it/s]
100%|██████████| 94/94 [00:03<00:00, 23.69it/s]
  0%|          | 1/360 [00:00<00:56,  6.40it/s]

Epoch 3   : loss: 0.0306  , score: 0.8640  *


100%|██████████| 360/360 [00:49<00:00,  7.33it/s]
100%|██████████| 94/94 [00:03<00:00, 23.69it/s]
  0%|          | 1/360 [00:00<00:45,  7.81it/s]

Epoch 4   : loss: 0.0314  , score: 0.8638  


100%|██████████| 360/360 [00:49<00:00,  7.34it/s]
100%|██████████| 94/94 [00:03<00:00, 23.74it/s]
  0%|          | 1/360 [00:00<00:57,  6.20it/s]

Epoch 5   : loss: 0.0312  , score: 0.8656  *


100%|██████████| 360/360 [00:48<00:00,  7.35it/s]
100%|██████████| 94/94 [00:03<00:00, 23.67it/s]
  0%|          | 1/360 [00:00<00:46,  7.80it/s]

Epoch 6   : loss: 0.0315  , score: 0.8657  *


100%|██████████| 360/360 [00:49<00:00,  7.29it/s]
100%|██████████| 94/94 [00:03<00:00, 23.71it/s]
  0%|          | 1/360 [00:00<00:46,  7.70it/s]

Epoch 7   : loss: 0.0324  , score: 0.8646  


100%|██████████| 360/360 [00:49<00:00,  7.29it/s]
100%|██████████| 94/94 [00:03<00:00, 23.74it/s]
  0%|          | 1/360 [00:00<00:45,  7.95it/s]

Epoch 8   : loss: 0.0319  , score: 0.8664  *


100%|██████████| 360/360 [00:49<00:00,  7.31it/s]
100%|██████████| 94/94 [00:03<00:00, 23.67it/s]
  0%|          | 1/360 [00:00<00:46,  7.78it/s]

Epoch 9   : loss: 0.0319  , score: 0.8661  


100%|██████████| 360/360 [00:49<00:00,  7.32it/s]
100%|██████████| 94/94 [00:03<00:00, 23.65it/s]
  6%|▌         | 5/87 [00:00<00:02, 40.89it/s]

Epoch 10  : loss: 0.0321  , score: 0.8665  *


100%|██████████| 87/87 [00:03<00:00, 26.05it/s]


Test loss: 0.0391, score: 0.8459
Test score: 0.8459
Max RAM used: 3.08 Gb
User time: 7.74 min
GPU usage: total 15.90 Gb, reserved 3.82, allocated 0.83
Best bi-encoder_nli-cls-pooling model stored in file results/pretraining_sts/results_210708_100708_bi-encoder_nli-cls-pooling_best_model.bin
Already done: {'batch_size': [16.0], 'lr': [2e-05], 'lr_scheduler': ['linear'], 'mode': ['bi-encoder/nli-mean-pooling'], 'num_epochs': [10.0], 'test_score': [0.8521477459154022], 'train_size': [5749.0], 'train_subset_seed': [1.0], 'warmup_percent': [0.2]}
Repeating experiment
----------
RUN CONFIG
----------
num_epochs :  10
batch_size :  16
lr :  2e-05
lr_scheduler :  linear
warmup_percent :  0.2
mode :  bi-encoder/nli-mean-pooling
train_size :  5749
train_subset_seed :  1


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=438007537.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=112.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=399.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466081.0, style=ProgressStyle(descripti…

  0%|          | 0/360 [00:00<?, ?it/s]


Scheduler type: linear, epochs: 10, steps per epoch: 360, total steps: 3600, warmup steps: 720


100%|██████████| 360/360 [00:48<00:00,  7.35it/s]
100%|██████████| 94/94 [00:03<00:00, 23.60it/s]
  0%|          | 1/360 [00:00<00:42,  8.50it/s]

Epoch 1   : loss: 0.0311  , score: 0.8437  *


100%|██████████| 360/360 [00:49<00:00,  7.33it/s]
100%|██████████| 94/94 [00:03<00:00, 23.67it/s]
  0%|          | 1/360 [00:00<01:03,  5.64it/s]

Epoch 2   : loss: 0.0269  , score: 0.8589  *


100%|██████████| 360/360 [00:48<00:00,  7.37it/s]
100%|██████████| 94/94 [00:03<00:00, 23.67it/s]
  0%|          | 1/360 [00:00<00:47,  7.56it/s]

Epoch 3   : loss: 0.0258  , score: 0.8632  *


100%|██████████| 360/360 [00:49<00:00,  7.33it/s]
100%|██████████| 94/94 [00:03<00:00, 23.65it/s]
  0%|          | 1/360 [00:00<00:48,  7.36it/s]

Epoch 4   : loss: 0.0256  , score: 0.8655  *


100%|██████████| 360/360 [00:54<00:00,  6.56it/s]
100%|██████████| 94/94 [00:06<00:00, 14.20it/s]
  0%|          | 0/360 [00:00<?, ?it/s]

Epoch 5   : loss: 0.0252  , score: 0.8680  *


100%|██████████| 360/360 [00:56<00:00,  6.39it/s]
100%|██████████| 94/94 [00:03<00:00, 23.69it/s]
  0%|          | 1/360 [00:00<00:48,  7.46it/s]

Epoch 6   : loss: 0.0250  , score: 0.8678  


100%|██████████| 360/360 [00:51<00:00,  6.97it/s]
100%|██████████| 94/94 [00:05<00:00, 17.06it/s]
  0%|          | 0/360 [00:00<?, ?it/s]

Epoch 7   : loss: 0.0252  , score: 0.8677  


100%|██████████| 360/360 [01:27<00:00,  4.10it/s]
100%|██████████| 94/94 [00:03<00:00, 23.69it/s]
  0%|          | 1/360 [00:00<00:48,  7.44it/s]

Epoch 8   : loss: 0.0253  , score: 0.8686  *


100%|██████████| 360/360 [00:49<00:00,  7.26it/s]
100%|██████████| 94/94 [00:03<00:00, 23.80it/s]
  0%|          | 1/360 [00:00<00:48,  7.39it/s]

Epoch 9   : loss: 0.0250  , score: 0.8694  *


100%|██████████| 360/360 [00:49<00:00,  7.32it/s]
100%|██████████| 94/94 [00:03<00:00, 23.69it/s]
  6%|▌         | 5/87 [00:00<00:02, 40.58it/s]

Epoch 10  : loss: 0.0251  , score: 0.8695  *


100%|██████████| 87/87 [00:03<00:00, 25.96it/s]


Test loss: 0.0322, score: 0.8493
Test score: 0.8493
Max RAM used: 3.71 Gb
User time: 16.18 min
GPU usage: total 15.90 Gb, reserved 3.90, allocated 0.82
Best bi-encoder_nli-mean-pooling model stored in file results/pretraining_sts/results_210708_100708_bi-encoder_nli-mean-pooling_best_model.bin
Already done: {'batch_size': [16.0], 'lr': [2e-05], 'lr_scheduler': ['linear'], 'mode': ['bi-encoder/nli-linear-pooling'], 'num_epochs': [10.0], 'test_score': [0.6399008242273966], 'train_size': [5749.0], 'train_subset_seed': [1.0], 'warmup_percent': [0.2]}
Repeating experiment
----------
RUN CONFIG
----------
num_epochs :  10
batch_size :  16
lr :  2e-05
lr_scheduler :  linear
warmup_percent :  0.2
mode :  bi-encoder/nli-linear-pooling
train_size :  5749
train_subset_seed :  1


  0%|          | 1/360 [00:00<01:00,  5.94it/s]

Scheduler type: linear, epochs: 10, steps per epoch: 360, total steps: 3600, warmup steps: 720


100%|██████████| 360/360 [00:49<00:00,  7.32it/s]
100%|██████████| 94/94 [00:03<00:00, 23.65it/s]
  0%|          | 1/360 [00:00<00:55,  6.43it/s]

Epoch 1   : loss: 0.0719  , score: 0.7398  *


100%|██████████| 360/360 [00:49<00:00,  7.27it/s]
100%|██████████| 94/94 [00:03<00:00, 23.65it/s]
  0%|          | 1/360 [00:00<00:42,  8.38it/s]

Epoch 2   : loss: 0.0644  , score: 0.7662  *


100%|██████████| 360/360 [00:48<00:00,  7.37it/s]
100%|██████████| 94/94 [00:03<00:00, 23.61it/s]
  0%|          | 1/360 [00:00<00:41,  8.62it/s]

Epoch 3   : loss: 0.0562  , score: 0.8019  *


100%|██████████| 360/360 [00:49<00:00,  7.27it/s]
100%|██████████| 94/94 [00:03<00:00, 23.64it/s]
  0%|          | 1/360 [00:00<00:44,  8.01it/s]

Epoch 4   : loss: 0.0532  , score: 0.8091  *


100%|██████████| 360/360 [00:48<00:00,  7.37it/s]
100%|██████████| 94/94 [00:03<00:00, 23.66it/s]
  0%|          | 1/360 [00:00<00:46,  7.75it/s]

Epoch 5   : loss: 0.0515  , score: 0.8142  *


 57%|█████▋    | 205/360 [00:28<00:21,  7.29it/s]


KeyboardInterrupt: 

In [4]:
sts_model = CrossEncoder(mode='cls-pooling-hidden', toy_model=False)
sts_trainer = STSTrainer(model=sts_model, train_dataset=dataset['train'],
                         dataset=dataset, num_epochs=10, lr_scheduler='linear',
                         warmup_percent=0.2)
#sts_trainer.train(disable_progress_bar=True)
original_sts_model_path = './saved_models/few_shot_paws/original_cross_encoder.bin'
# torch.save(sts_model.state_dict(), original_sts_model_path)
sts_model.load_state_dict(torch.load(original_sts_model_path))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Scheduler type: linear, epochs: 10, steps per epoch: 360, total steps: 3600, warmup steps: 720


<All keys matched successfully>

In [6]:
sts_trainer.score(sts_trainer.test_dl)

100%|██████████| 87/87 [00:03<00:00, 27.72it/s]


(0.852038992078638, tensor(0.0240))