- Related works:
    - Dialogue-Act Prediction of Future Responses based on Conversation History (http://aclanthology.lst.uni-saarland.de/P19-2027.pdf)
    - DeliData: A dataset for deliberation in multi-party problem solving (https://arxiv.org/pdf/2108.05271.pdf)
    - Identifying Agreement/Disagreement in Conversational Speech: A Cross-lingual Study (https://www.sri.com/wp-content/uploads/2021/12/identifying_agreement-disagreement_in_conversational_speech.pdf)
    - The Role of Conversational Structure in Agreement and Disagreement Detection in Online Discussions (https://aclanthology.org/W15-4625.pdf)
    - Modeling Long-Range Context for Concurrent Dialogue Acts Recognition https://arxiv.org/pdf/1909.00521.pdf
    - User Satisfaction Estimation with Sequential Dialogue Act Modeling in Goal-oriented Conversational Systems https://arxiv.org/pdf/2202.02912.pdf
    - 

- Experiments:
    - Predict understanding vs non-understanding signals in dialogue -> That indicates quality of explanation (baseline)
    - LongFormer that consumes all the text up to window_size and predict the quality
    - Modeling quality by the degree of non-understanding explanation_moves in the dialogue: Average probability of all uternaces of being a non-understanding explanation move.
    - RQ: Can we predict ahead whether the dialogue will lead to non-understanding?

In [8]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import sys
import os
import wandb

sys.path.insert(0, '../src-py')
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

#wandb.init(project="test-project", entity="milad-it")

In [10]:
import json
import pandas as pd
import numpy as np
import glob 
from tabulate import tabulate
import math
import re
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


pd.set_option('display.max_colwidth', None)

In [11]:
eli5_annotation_df  = pd.read_pickle('../../data/eli5_ds/annotation-results/MACE-measure/final_mace_predictions.pkl')
eli5_dlg_quality_df = pd.read_csv('../../data/eli5_ds/annotation-results/MACE-measure/final_mace_rating_predictions.csv')

eli5_annotation_df['exp_type_label'] = eli5_annotation_df['exp_type_label'].fillna('(Y07) Not Explanation')

In [12]:
quality_scores = pd.Series(eli5_dlg_quality_df.rating_label.values, index = eli5_dlg_quality_df.task_id).to_dict()

In [13]:
eli5_annotation_df.exp_act_label.value_counts()

(E03) Provide an explanation                1403
(E04) Ask for an explanation                1178
(E07) Providing Feedback                    1045
(E06) Signaling non-understanding            114
(E05) Signaling understanding                 46
(E08) Providing Assessment                    19
(E10) Introducing Extraneous Information      18
(E02) Testing prior knowledge                 17
(E01) Testing understanding                   17
(E09) Other                                   16
Name: exp_act_label, dtype: int64

In [14]:
eli5_annotation_df = eli5_annotation_df.groupby('task_id').agg({'turn_text': lambda rows: list(rows),
                                                  'topic': lambda rows: list(rows)[0],
                                                  'topic_func_label': lambda rows: list(rows),
                                                  'dlg_act_label': lambda rows: list(rows),
                                                  'exp_type_label' : lambda rows: list(rows),
                                                  'exp_act_label': lambda rows: list(rows)}).reset_index()

eli5_annotation_df['quality'] = eli5_annotation_df.task_id.apply(lambda x: quality_scores[x])

In [None]:
eli5_annotation_df.head()

In [None]:
len(eli5_annotation_df)

In [None]:
eli5_annotation_df['num_turns'] = eli5_annotation_df.turn_text.apply(lambda x: len(x))
eli5_annotation_df['num_tokens'] = eli5_annotation_df.turn_text.apply(lambda turn_txts: len(' '.join([t['text'] for t in turn_txts])))

In [None]:
eli5_annotation_df.num_tokens.hist()
plt.show()

In [None]:
len([x for x in eli5_annotation_df.num_tokens if x > 4096]) #number of dialouges longer than 4096

### Experiments:

In [None]:
# from transformers import LongformerTokenizerFast
# from transformers import LongformerConfig
# from transformers import LongformerTokenizer

# from longformer import LongformerForSequenceClassification
# from transformers import (AutoTokenizer, AutoModelForSequenceClassification, default_data_collator,
#                           PreTrainedModel, BertModel, BertForSequenceClassification,
#                           TrainingArguments, Trainer)

In [None]:
eli5_annotation_df['turn_text_25'] = eli5_annotation_df.turn_text.apply(lambda turns: turns[:math.ceil(len(turns) * 0.25)])
eli5_annotation_df['turn_text_50'] = eli5_annotation_df.turn_text.apply(lambda turns: turns[:math.ceil(len(turns) * 0.50)])
eli5_annotation_df['turn_text_75'] = eli5_annotation_df.turn_text.apply(lambda turns: turns[:math.ceil(len(turns) * 0.75)])
eli5_annotation_df['turn_text_100']= eli5_annotation_df.turn_text.apply(lambda turns: turns)

In [None]:
%autoreload

from utils import *

In [None]:
#RMSE for an avg-baseline
quality_scores = eli5_annotation_df.quality.values
avg_score = np.average(quality_scores)

MSE = np.square(np.subtract(quality_scores,[avg_score] * len(quality_scores))).mean() 
 
RMSE = math.sqrt(MSE)
print("Mean Square Error:", round(MSE, 3))
print("Root Mean Square Error:", round(RMSE, 3))

In [16]:
#tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
#res = preprocess_function(tokenizer, eli5_annotation_df.sample(2), input_clm='turn_text', extra_exp_moves=False, extra_exp_types=False, global_attention=True)

### Train Models on different percentages of the dialogue:

In [None]:
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")

#### Longformer

In [22]:
#Evaluate the baseline_pereli5_annotation_dfn different sizes of turns
baseline_rmse_scores = []
for input_clm in ['turn_text_25', 'turn_text_50', 'turn_text_75', 'turn_text_100']:
    perc = input_clm.split('_')[-1]
    baseline_rmse_scores.append(train_and_evaluate_model('../data/quality_models/longformer-{}'.format(perc), eli5_annotation_df,
                                         'longformer-{}'.format(perc),
                                         input_clm=input_clm,
                                         num_train_epochs=5, lr=2e-5, batch_size=4, n_folds=3, eval_steps=100))

[34m[1mwandb[0m: Currently logged in as: [33mmiladalsh-it[0m ([33mmilad-it[0m). Use [1m`wandb login --relogin`[0m to force relogin


3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['longformer.embeddings.exp_type_embeddings.weight', '

Step,Training Loss,Validation Loss,Rmse
100,2.5531,2.492997,1.578923
200,1.7093,2.152284,1.467066
300,0.9384,2.649135,1.627616


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-25/longformer-0-fold/config.json


{'eval_loss': 3.047044515609741, 'eval_rmse': 1.7455785274505615, 'eval_runtime': 46.1869, 'eval_samples_per_second': 10.046, 'eval_steps_per_second': 2.512, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-25/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▄▁▅█
eval/rmse,▄▁▅█
eval/runtime,▁▁▁█
eval/samples_per_second,███▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,3.04704
eval/rmse,1.74558
eval/runtime,46.1869
eval/samples_per_second,10.046
eval/steps_per_second,2.512
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.9384
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.9122,3.181638,1.783715
200,1.8506,1.781494,1.334726
300,1.2807,1.891974,1.375491
400,1.1938,1.642679,1.28167
500,0.7855,1.983361,1.408319
600,0.4796,2.337059,1.528744
700,0.5117,2.865502,1.692779
800,0.28,2.278614,1.509508
900,0.2252,2.606899,1.614589
1000,0.2199,2.350879,1.533258


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-25/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-25/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-25/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-25/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-25/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  

Configuration saved in ../data/quality_models/longformer-25/longformer-1-fold/config.json


{'eval_loss': 2.5298612117767334, 'eval_rmse': 1.5905537605285645, 'eval_runtime': 7.3917, 'eval_samples_per_second': 15.558, 'eval_steps_per_second': 3.923, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-25/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▂▂▁▃▄▇▄▅▄▅▅
eval/rmse,█▂▂▁▃▄▇▄▆▅▅▅
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,▁▃▂▃▃▂▄▃▃▃▅█
eval/steps_per_second,▆▇▇▇▇▇█▇▇▇█▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.52986
eval/rmse,1.59055
eval/runtime,7.3917
eval/samples_per_second,15.558
eval/steps_per_second,3.923
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1215
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7503,2.442714,1.562919
200,1.8471,2.34426,1.531098
300,1.3618,2.743726,1.65642
400,1.1176,2.275353,1.508427
500,0.9581,2.418726,1.555225
600,0.5861,2.424288,1.557013
700,0.5366,2.71574,1.64795
800,0.379,2.529073,1.590306
900,0.3079,2.70003,1.643177
1000,0.293,2.604809,1.613942


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-25/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-25/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-25/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-25/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-25/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  

Configuration saved in ../data/quality_models/longformer-25/longformer-2-fold/config.json


{'eval_loss': 2.859114646911621, 'eval_rmse': 1.6908916234970093, 'eval_runtime': 4.724, 'eval_samples_per_second': 15.453, 'eval_steps_per_second': 4.022, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-25/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▃▂▇▁▃▃▆▄▆▅▅█
eval/rmse,▃▂▇▁▃▃▆▄▆▅▅█
eval/runtime,▁▁▁▁▁▁▁▁▁▃▁█
eval/samples_per_second,█████████▁██
eval/steps_per_second,█████████▁█▇
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▄▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.85911
eval/rmse,1.69089
eval/runtime,4.724
eval/samples_per_second,15.453
eval/steps_per_second,4.022
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.1858
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7216,2.442873,1.562969
200,1.987,1.983687,1.408434
300,1.4675,2.637595,1.624067


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-50/longformer-0-fold/config.json


{'eval_loss': 2.6007256507873535, 'eval_rmse': 1.6126766204833984, 'eval_runtime': 42.566, 'eval_samples_per_second': 10.901, 'eval_steps_per_second': 2.725, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-50/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▆▁██
eval/rmse,▆▁██
eval/runtime,▁▁▁█
eval/samples_per_second,███▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,2.60073
eval/rmse,1.61268
eval/runtime,42.566
eval/samples_per_second,10.901
eval/steps_per_second,2.725
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,1.4675
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.9284,3.396365,1.842923
200,2.1601,2.501959,1.581758
300,1.6866,1.888088,1.374077
400,1.3991,2.04247,1.42915
500,0.9695,3.120509,1.766496
600,0.6872,3.013267,1.735876
700,0.6667,3.039668,1.743464
800,0.3732,2.50394,1.582384
900,0.3415,2.615548,1.617265
1000,0.2556,2.507149,1.583398


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-50/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-50/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-50/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-50/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-50/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  

{'eval_loss': 2.493539810180664, 'eval_rmse': 1.579094648361206, 'eval_runtime': 10.4572, 'eval_samples_per_second': 10.997, 'eval_steps_per_second': 2.773, 'epoch': 5.0}


Configuration saved in ../data/quality_models/longformer-50/longformer-1-fold/config.json
Model weights saved in ../data/quality_models/longformer-50/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▄▁▂▇▆▆▄▄▄▄▄
eval/rmse,█▄▁▂▇▆▇▄▅▄▅▄
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▅▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.49354
eval/rmse,1.57909
eval/runtime,10.4572
eval/samples_per_second,10.997
eval/steps_per_second,2.773
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1718
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.777,2.577828,1.605562
200,1.8864,2.583594,1.607356
300,1.3836,2.158667,1.46924
400,1.3001,2.125156,1.457792
500,0.9753,2.084556,1.443799
600,0.7753,2.948105,1.717005
700,0.6232,2.521409,1.587895
800,0.4459,2.35652,1.535096
900,0.3976,2.345548,1.531518
1000,0.2978,2.228607,1.492852


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-50/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-50/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-50/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-50/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-50/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  

Configuration saved in ../data/quality_models/longformer-50/longformer-2-fold/config.json


{'eval_loss': 2.4461441040039062, 'eval_rmse': 1.564015507698059, 'eval_runtime': 7.1965, 'eval_samples_per_second': 10.144, 'eval_steps_per_second': 2.64, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-50/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▅▅▂▁▁█▅▃▃▂▄▄
eval/rmse,▅▅▂▁▁█▅▃▃▂▄▄
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▄▄▃▃▂▂▂▁▁
train/total_flos,▁

0,1
eval/loss,2.44614
eval/rmse,1.56402
eval/runtime,7.1965
eval/samples_per_second,10.144
eval/steps_per_second,2.64
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.182
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.644,2.381405,1.54318
200,1.7553,2.259485,1.503158
300,1.2694,2.12828,1.458863


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-75/longformer-0-fold/config.json


{'eval_loss': 2.943169116973877, 'eval_rmse': 1.7155667543411255, 'eval_runtime': 44.6136, 'eval_samples_per_second': 10.4, 'eval_steps_per_second': 2.6, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-75/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▃▂▁█
eval/rmse,▃▂▁█
eval/runtime,▁▁▁█
eval/samples_per_second,███▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▃▁
train/total_flos,▁

0,1
eval/loss,2.94317
eval/rmse,1.71557
eval/runtime,44.6136
eval/samples_per_second,10.4
eval/steps_per_second,2.6
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,1.2694
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.8796,3.120658,1.766538
200,1.8878,2.176781,1.475392
300,1.4702,2.152888,1.467272
400,1.3348,1.437593,1.198997
500,0.834,1.860707,1.364077
600,0.5556,1.463054,1.209568
700,0.5288,1.975955,1.405687
800,0.3327,1.816103,1.347629
900,0.2778,1.920278,1.385741
1000,0.2358,1.811689,1.34599


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-75/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-75/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-75/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-75/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-75/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  

Configuration saved in ../data/quality_models/longformer-75/longformer-1-fold/config.json


{'eval_loss': 2.4010939598083496, 'eval_rmse': 1.5495463609695435, 'eval_runtime': 9.4515, 'eval_samples_per_second': 12.167, 'eval_steps_per_second': 3.068, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-75/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▄▄▁▃▁▃▃▃▃▃▅
eval/rmse,█▄▄▁▃▁▄▃▃▃▃▅
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.40109
eval/rmse,1.54955
eval/runtime,9.4515
eval/samples_per_second,12.167
eval/steps_per_second,3.068
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.153
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.5869,2.454416,1.566658
200,1.6465,2.023239,1.422406
300,1.2203,2.024489,1.422845
400,1.0697,2.042567,1.429184
500,0.7988,1.734608,1.317045
600,0.549,2.366604,1.538377
700,0.4822,1.845926,1.358649
800,0.301,1.837931,1.355703
900,0.3037,1.813874,1.346801
1000,0.1757,1.662291,1.289299


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-75/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-75/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-75/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-75/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-75/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  

Configuration saved in ../data/quality_models/longformer-75/longformer-2-fold/config.json


{'eval_loss': 2.1043429374694824, 'eval_rmse': 1.450635313987732, 'eval_runtime': 8.6509, 'eval_samples_per_second': 8.438, 'eval_steps_per_second': 2.196, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-75/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▄▄▄▂▇▃▃▂▁▁▅
eval/rmse,█▄▄▅▂▇▃▃▂▁▁▅
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,██▇████████▁
eval/steps_per_second,██▇████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.10434
eval/rmse,1.45064
eval/runtime,8.6509
eval/samples_per_second,8.438
eval/steps_per_second,2.196
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.1396
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.5967,1.829872,1.352728
200,1.6513,1.642277,1.281514
300,1.0445,2.387237,1.545069


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-100/longformer-0-fold/config.json


{'eval_loss': 2.728280782699585, 'eval_rmse': 1.6517508029937744, 'eval_runtime': 44.7092, 'eval_samples_per_second': 10.378, 'eval_steps_per_second': 2.595, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-100/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▂▁▆█
eval/rmse,▂▁▆█
eval/runtime,▁▁▁█
eval/samples_per_second,███▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,2.72828
eval/rmse,1.65175
eval/runtime,44.7092
eval/samples_per_second,10.378
eval/steps_per_second,2.595
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,1.0445
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7778,3.700812,1.923749
200,1.7526,1.908297,1.381412
300,1.3407,1.75981,1.326578
400,1.0519,1.47107,1.212877
500,0.7667,1.561546,1.249618
600,0.5223,1.674424,1.293995
700,0.5079,2.03291,1.425801
800,0.2768,1.907362,1.381073
900,0.2278,1.812042,1.346121
1000,0.2293,1.921991,1.386359


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-100/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-100/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-100/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-100/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-100/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation ***

Configuration saved in ../data/quality_models/longformer-100/longformer-1-fold/config.json


{'eval_loss': 1.7858995199203491, 'eval_rmse': 1.3363754749298096, 'eval_runtime': 11.7953, 'eval_samples_per_second': 9.75, 'eval_steps_per_second': 2.459, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-100/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▂▂▁▁▂▃▂▂▂▂▂
eval/rmse,█▃▂▁▁▂▃▃▂▃▂▂
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▃▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,1.7859
eval/rmse,1.33638
eval/runtime,11.7953
eval/samples_per_second,9.75
eval/steps_per_second,2.459
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1464
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.4169,2.200625,1.48345
200,1.9843,2.404938,1.550786
300,1.4555,2.696765,1.642183
400,1.2461,1.967623,1.40272
500,0.9326,1.612493,1.26984
600,0.7417,1.519737,1.232776
700,0.5576,1.21057,1.100259
800,0.3683,1.552706,1.246076
900,0.4014,1.648134,1.283797
1000,0.3107,1.484212,1.218283


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-100/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-100/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-100/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-100/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-100/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation ***

Configuration saved in ../data/quality_models/longformer-100/longformer-2-fold/config.json


{'eval_loss': 2.63857364654541, 'eval_rmse': 1.6243687868118286, 'eval_runtime': 6.6044, 'eval_samples_per_second': 11.053, 'eval_steps_per_second': 2.877, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-100/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▆▇█▅▃▂▁▃▃▂▂█
eval/rmse,▆▇█▅▃▃▁▃▃▃▃█
eval/runtime,▅▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,▁██████████▄
eval/steps_per_second,▁██████████▄
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▇▅▄▃▃▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.63857
eval/rmse,1.62437
eval/runtime,6.6044
eval/samples_per_second,11.053
eval/steps_per_second,2.877
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.2579
train/total_flos,1.25457755400192e+16


In [25]:
json.dump(baseline_rmse_scores, open('../data/quality_models/baseline_rmse_scores.json', 'w'))

#### LongFormer + exp-moves

In [None]:
#Evaluate the baseline_pereli5_annotation_dfn different sizes of turns
exp_move_rmse_scores = []
for input_clm in ['turn_text_25', 'turn_text_50', 'turn_text_75', 'turn_text_100']:
    perc = input_clm.split('_')[-1]
    exp_move_rmse_scores.append(train_and_evaluate_model('../data/quality_models/longformer-exp-moves-{}'.format(perc), eli5_annotation_df,
                                         'longformer-exp-moves-{}'.format(perc),
                                         input_clm=input_clm,
                                        num_train_epochs=5, lr=2e-5, batch_size=4, n_folds=3, extra_exp_moves=True, num_exp_moves=11, eval_steps=100))

loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


{'eval_loss': 3.5717976093292236, 'eval_rmse': 1.8899199962615967, 'eval_runtime': 39.2318, 'eval_samples_per_second': 11.827, 'eval_steps_per_second': 2.957, 'epoch': 5.0}


Configuration saved in ../data/quality_models/longformer-exp-moves-25/longformer-0-fold/config.json
Model weights saved in ../data/quality_models/longformer-exp-moves-25/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/rmse,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,3.5718
eval/rmse,1.88992
eval/runtime,39.2318
eval/samples_per_second,11.827
eval/steps_per_second,2.957
train/epoch,5.0
train/global_step,310.0
train/total_flos,3218549745868800.0
train/train_loss,1.80109
train/train_runtime,372.0327


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
500,1.6437,2.530915,1.590885


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-moves-25/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-moves-25/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-moves-25/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-moves-25/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-moves-25/longformer-1-fold/checkpoint-500/special_tokens_map.json


In [28]:
json.dump(exp_move_rmse_scores, open('../data/quality_models/exp_move_rmse_scores.json', 'w'))

#### LongFormer + exp-types

In [30]:
#Evaluate the approach performance on different sizes of turns
exp_types_rmse_scores = []
for input_clm in ['turn_text_25', 'turn_text_50', 'turn_text_75', 'turn_text_100']:
    perc = input_clm.split('_')[-1]
    exp_types_rmse_scores.append(train_and_evaluate_model('../data/quality_models/longformer-exp-types-{}'.format(perc), eli5_annotation_df, 
                                       'longformer-exp-types-{}'.format(perc), 
                                        input_clm=input_clm,
                                       num_train_epochs=5, lr=2e-5, batch_size=4, extra_exp_types=True, num_exp_types=8, n_folds=3, eval_steps=100)
    )

loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.6879,2.536054,1.592499
200,1.8648,2.224604,1.491511
300,1.0971,2.756962,1.66041


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-25/longformer-0-fold/config.json


{'eval_loss': 3.108372211456299, 'eval_rmse': 1.7630574703216553, 'eval_runtime': 32.622, 'eval_samples_per_second': 14.224, 'eval_steps_per_second': 3.556, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-25/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▃▁▅█
eval/rmse,▄▁▅█
eval/runtime,▁▁▁█
eval/samples_per_second,▆█▇▁
eval/steps_per_second,▇██▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,3.10837
eval/rmse,1.76306
eval/runtime,32.622
eval/samples_per_second,14.224
eval/steps_per_second,3.556
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,1.0971
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.9898,2.911941,1.706441
200,1.9032,3.296554,1.815642
300,1.4871,2.357087,1.535281
400,1.298,1.893165,1.375924
500,0.8388,2.386587,1.544858
600,0.5281,3.001054,1.732355
700,0.648,2.818584,1.678864
800,0.4634,2.67236,1.634735
900,0.2819,2.423764,1.556844
1000,0.2624,2.237211,1.495731


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-25/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-25/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-25/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-25/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-25/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 30


Configuration saved in ../data/quality_models/longformer-exp-types-25/longformer-1-fold/config.json


{'eval_loss': 2.344376564025879, 'eval_rmse': 1.5311356782913208, 'eval_runtime': 7.5463, 'eval_samples_per_second': 15.239, 'eval_steps_per_second': 3.843, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-25/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▆█▃▁▃▇▆▅▄▃▅▃
eval/rmse,▆█▄▁▄▇▆▅▄▃▅▃
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,▃▃▂▄▃▁▄▅▁▁▄█
eval/steps_per_second,▇▇▇█▇▆██▆▆▇▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.34438
eval/rmse,1.53114
eval/runtime,7.5463
eval/samples_per_second,15.239
eval/steps_per_second,3.843
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.185
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7989,2.454234,1.5666
200,1.8583,3.29064,1.814012
300,1.3827,2.685158,1.638645
400,1.1985,2.48504,1.576401
500,1.142,2.696009,1.641953
600,0.7298,3.988513,1.997126
700,0.6543,3.068391,1.751682
800,0.5075,3.301417,1.81698
900,0.413,3.381708,1.838942
1000,0.4276,3.300231,1.816654


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-25/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-25/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-25/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-25/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-25/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 37


Configuration saved in ../data/quality_models/longformer-exp-types-25/longformer-2-fold/config.json


{'eval_loss': 2.782466173171997, 'eval_rmse': 1.6680725812911987, 'eval_runtime': 4.8072, 'eval_samples_per_second': 15.186, 'eval_steps_per_second': 3.952, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-25/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▅▂▁▂█▄▅▅▅▅▂
eval/rmse,▁▅▂▁▂█▄▅▅▅▅▃
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,▁▄▆▃▂▃▂▃▄▆▅█
eval/steps_per_second,▅▇█▆▆▆▆▆▇█▇▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▄▂▂▂▂▂▁
train/total_flos,▁

0,1
eval/loss,2.78247
eval/rmse,1.66807
eval/runtime,4.8072
eval/samples_per_second,15.186
eval/steps_per_second,3.952
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.2179
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.5729,1.800961,1.341999
200,1.6477,2.409065,1.552116
300,0.8572,2.641883,1.625387


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-50/longformer-0-fold/config.json


{'eval_loss': 3.0313611030578613, 'eval_rmse': 1.741080403327942, 'eval_runtime': 30.5569, 'eval_samples_per_second': 15.185, 'eval_steps_per_second': 3.796, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-50/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▄▆█
eval/rmse,▁▅▆█
eval/runtime,▁▁▁█
eval/samples_per_second,▄▁▃█
eval/steps_per_second,█▇▇▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,3.03136
eval/rmse,1.74108
eval/runtime,30.5569
eval/samples_per_second,15.185
eval/steps_per_second,3.796
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.8572
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7805,3.81269,1.952611
200,1.7973,1.817561,1.34817
300,1.3472,1.694264,1.301639
400,0.9843,2.243016,1.49767
500,0.7089,3.267059,1.807501
600,0.4221,2.890971,1.700286
700,0.4911,3.240157,1.800043
800,0.2932,3.031381,1.741086
900,0.2223,3.142618,1.772743
1000,0.1916,2.490591,1.578161


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-50/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-50/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-50/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-50/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-50/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 30


{'eval_loss': 2.010843276977539, 'eval_rmse': 1.4180421829223633, 'eval_runtime': 7.585, 'eval_samples_per_second': 15.161, 'eval_steps_per_second': 3.823, 'epoch': 5.0}


Configuration saved in ../data/quality_models/longformer-exp-types-50/longformer-1-fold/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-50/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▁▁▃▆▅▆▅▆▄▃▂
eval/rmse,█▂▁▃▆▅▆▆▆▄▄▂
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,▁▄▂▃▄▃▃▄▂▄▅█
eval/steps_per_second,▆█▇▇█▇▇█▇██▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▃▂▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.01084
eval/rmse,1.41804
eval/runtime,7.585
eval/samples_per_second,15.161
eval/steps_per_second,3.823
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1603
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7385,2.540441,1.593876
200,1.9348,2.257457,1.502483
300,1.3537,2.403602,1.550355
400,1.3021,2.574306,1.604465
500,0.8517,2.153497,1.46748
600,0.5977,3.13358,1.770192
700,0.4839,2.542831,1.594626
800,0.3467,2.528951,1.590268
900,0.2735,2.471303,1.572038
1000,0.2322,2.411404,1.552869


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-50/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-50/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-50/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-50/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-50/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 37


Configuration saved in ../data/quality_models/longformer-exp-types-50/longformer-2-fold/config.json


{'eval_loss': 2.5027706623077393, 'eval_rmse': 1.5820146799087524, 'eval_runtime': 7.4127, 'eval_samples_per_second': 9.848, 'eval_steps_per_second': 2.563, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-50/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▄▂▃▄▁█▄▄▃▃▄▃
eval/rmse,▄▂▃▄▁█▄▄▃▃▄▄
eval/runtime,▁▁▁▁▁▁▁▁▁▂▁█
eval/samples_per_second,█████████▅█▁
eval/steps_per_second,█████████▅█▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▄▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.50277
eval/rmse,1.58201
eval/runtime,7.4127
eval/samples_per_second,9.848
eval/steps_per_second,2.563
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.139
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.4994,1.898352,1.377807
200,1.6982,1.852703,1.36114
300,0.9934,1.87718,1.370102


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-75/longformer-0-fold/config.json


{'eval_loss': 2.729079484939575, 'eval_rmse': 1.6519925594329834, 'eval_runtime': 34.0973, 'eval_samples_per_second': 13.608, 'eval_steps_per_second': 3.402, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-75/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▁▁█
eval/rmse,▁▁▁█
eval/runtime,▁▁▁█
eval/samples_per_second,█▇█▁
eval/steps_per_second,█▇█▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,2.72908
eval/rmse,1.65199
eval/runtime,34.0973
eval/samples_per_second,13.608
eval/steps_per_second,3.402
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.9934
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.769,3.75121,1.936804
200,1.8451,1.655255,1.286567
300,1.2857,1.573728,1.254483
400,1.1481,1.570856,1.253338
500,0.8187,1.717833,1.310661
600,0.5104,1.854375,1.361754
700,0.5503,1.975343,1.405469
800,0.3916,1.908067,1.381328
900,0.2916,1.738701,1.318598
1000,0.2722,1.715491,1.309768


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-75/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-75/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-75/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-75/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-75/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 30


Configuration saved in ../data/quality_models/longformer-exp-types-75/longformer-1-fold/config.json


{'eval_loss': 2.0559074878692627, 'eval_rmse': 1.4338436126708984, 'eval_runtime': 10.1879, 'eval_samples_per_second': 11.288, 'eval_steps_per_second': 2.847, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-75/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▁▁▁▁▂▂▂▂▁▂▃
eval/rmse,█▁▁▁▂▂▃▂▂▂▂▃
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▄▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.05591
eval/rmse,1.43384
eval/runtime,10.1879
eval/samples_per_second,11.288
eval/steps_per_second,2.847
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1521
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.6457,2.530482,1.590749
200,1.7188,2.514881,1.585838
300,1.3081,2.157354,1.468793
400,1.2102,1.992653,1.411614
500,0.9197,2.021903,1.421937
600,0.6647,2.680123,1.637108
700,0.5452,2.068697,1.438297
800,0.3335,2.163659,1.470938
900,0.289,2.319317,1.52293
1000,0.2764,2.212789,1.487545


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-75/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-75/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-75/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-75/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-75/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 37


Configuration saved in ../data/quality_models/longformer-exp-types-75/longformer-2-fold/config.json


{'eval_loss': 2.3720834255218506, 'eval_rmse': 1.5401570796966553, 'eval_runtime': 5.8013, 'eval_samples_per_second': 12.583, 'eval_steps_per_second': 3.275, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-75/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▆▆▃▁▁█▂▃▄▃▃▅
eval/rmse,▇▆▃▁▁█▂▃▄▃▃▅
eval/runtime,▁▁▁▁▃▅▁▁▁▁▁█
eval/samples_per_second,████▄▁█████▅
eval/steps_per_second,████▄▁█████▅
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.37208
eval/rmse,1.54016
eval/runtime,5.8013
eval/samples_per_second,12.583
eval/steps_per_second,3.275
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.1599
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.5362,2.011117,1.418139
200,1.6085,2.121658,1.456591
300,0.9087,2.401075,1.54954


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-100/longformer-0-fold/config.json


{'eval_loss': 2.448212146759033, 'eval_rmse': 1.5646764039993286, 'eval_runtime': 36.8129, 'eval_samples_per_second': 12.604, 'eval_steps_per_second': 3.151, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-100/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▃▇█
eval/rmse,▁▃▇█
eval/runtime,▁▁▁█
eval/samples_per_second,█▇█▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,2.44821
eval/rmse,1.56468
eval/runtime,36.8129
eval/samples_per_second,12.604
eval/steps_per_second,3.151
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.9087
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.843,2.739877,1.655257
200,1.6428,1.391923,1.179798
300,1.2761,1.441932,1.200805
400,1.3613,1.555832,1.24733
500,0.9364,1.376335,1.173173
600,0.5557,1.214941,1.102243
700,0.5999,1.287541,1.134699
800,0.5329,1.288722,1.135219
900,0.3529,1.171455,1.082338
1000,0.3273,1.199813,1.09536


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-100/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-100/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-100/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-100/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-100/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples 

Configuration saved in ../data/quality_models/longformer-exp-types-100/longformer-1-fold/config.json


{'eval_loss': 1.6106237173080444, 'eval_rmse': 1.2691034078598022, 'eval_runtime': 8.9759, 'eval_samples_per_second': 12.812, 'eval_steps_per_second': 3.231, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-100/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▂▂▃▂▁▂▂▁▁▁▃
eval/rmse,█▂▂▃▂▁▂▂▁▁▂▃
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,▇█████▇█▇██▁
eval/steps_per_second,████████▇██▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,1.61062
eval/rmse,1.2691
eval/runtime,8.9759
eval/samples_per_second,12.812
eval/steps_per_second,3.231
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.2024
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.5405,2.471532,1.572111
200,1.7863,2.471267,1.572026
300,1.4065,1.838454,1.355896
400,1.3569,1.856635,1.362584
500,0.8388,1.838613,1.355955
600,0.6654,2.047401,1.430874
700,0.5835,1.750073,1.322903
800,0.3715,1.715989,1.309958
900,0.3581,1.635228,1.27876
1000,0.2737,1.604496,1.266687


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-100/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-100/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-100/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-100/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-100/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples 

Configuration saved in ../data/quality_models/longformer-exp-types-100/longformer-2-fold/config.json


{'eval_loss': 2.275981903076172, 'eval_rmse': 1.5086356401443481, 'eval_runtime': 7.6872, 'eval_samples_per_second': 9.496, 'eval_steps_per_second': 2.472, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-100/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,██▃▃▃▅▂▂▁▁▁▆
eval/rmse,██▃▃▃▅▂▂▁▁▁▇
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▅▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.27598
eval/rmse,1.50864
eval/runtime,7.6872
eval/samples_per_second,9.496
eval/steps_per_second,2.472
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.1943
train/total_flos,1.25457755400192e+16


In [31]:
json.dump(exp_types_rmse_scores, open('../data/quality_models/exp_types_rmse_scores.json', 'w'))

#### LongFormer + exp-types & Moves

In [32]:
#Evaluate the approach performance on different sizes of turns
exp_types_and_moves_rmse_scores = []
for input_clm in ['turn_text_25', 'turn_text_50', 'turn_text_75', 'turn_text_100']:
    perc = input_clm.split('_')[-1]
    exp_types_and_moves_rmse_scores.append(train_and_evaluate_model('../data/quality_models/longformer-exp-types-and-moves{}'.format(perc), eli5_annotation_df, 
                                       'longformer-exp-types-and-moves{}'.format(perc), 
                                        input_clm=input_clm,
                                        num_train_epochs=5, lr=2e-5, batch_size=4, extra_exp_types=True, num_exp_types=8, extra_exp_moves=True, num_exp_moves=11, n_folds=3, eval_steps=100)
    )

loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.6093,1.747996,1.322118
200,1.3323,2.564864,1.601519
300,0.6014,2.84588,1.686974


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-0-fold/config.json


{'eval_loss': 3.429048776626587, 'eval_rmse': 1.8517690896987915, 'eval_runtime': 40.2366, 'eval_samples_per_second': 11.532, 'eval_steps_per_second': 2.883, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▄▆█
eval/rmse,▁▅▆█
eval/runtime,▁▁▁█
eval/samples_per_second,███▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,3.42905
eval/rmse,1.85177
eval/runtime,40.2366
eval/samples_per_second,11.532
eval/steps_per_second,2.883
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.6014
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.937,3.155182,1.776283
200,1.7994,2.406647,1.551337
300,1.4034,2.355145,1.534648
400,1.2481,1.896763,1.37723
500,0.7708,2.821398,1.679702
600,0.5389,2.850292,1.688281
700,0.6112,2.917035,1.707933
800,0.318,2.683926,1.638269
900,0.275,2.405897,1.551095
1000,0.2083,2.190026,1.479874


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves25/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** 

{'eval_loss': 2.591442346572876, 'eval_rmse': 1.6097959280014038, 'eval_runtime': 10.403, 'eval_samples_per_second': 11.055, 'eval_steps_per_second': 2.788, 'epoch': 5.0}


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-1-fold/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▄▄▁▆▆▇▅▄▃▄▅
eval/rmse,█▄▄▁▆▆▇▆▄▃▄▅
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.59144
eval/rmse,1.6098
eval/runtime,10.403
eval/samples_per_second,11.055
eval/steps_per_second,2.788
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1703
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Using input_clm=turn_text_25
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7546,2.471958,1.572246
200,1.9843,2.825766,1.681001
300,1.4788,2.353109,1.533985
400,1.2774,2.253005,1.501001
500,1.1027,2.418843,1.555263
600,0.7097,2.898356,1.702456
700,0.6458,2.337491,1.528885
800,0.3981,2.565687,1.601776
900,0.3878,2.298429,1.516057
1000,0.3124,2.415398,1.554155


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves25/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** 

Configuration saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-2-fold/config.json


{'eval_loss': 2.354177474975586, 'eval_rmse': 1.5343328714370728, 'eval_runtime': 5.973, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 3.181, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves25/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▃▇▂▁▃█▂▄▁▃▂▂
eval/rmse,▃▇▂▁▃█▂▅▂▃▂▂
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,██████████▇▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.35418
eval/rmse,1.53433
eval/runtime,5.973
eval/samples_per_second,12.222
eval/steps_per_second,3.181
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.2288
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.4988,2.092113,1.446414
200,1.5604,1.995143,1.412495
300,0.6876,2.289812,1.513213


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-0-fold/config.json


{'eval_loss': 2.8964712619781494, 'eval_rmse': 1.7019022703170776, 'eval_runtime': 40.424, 'eval_samples_per_second': 11.478, 'eval_steps_per_second': 2.87, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▂▁▃█
eval/rmse,▂▁▃█
eval/runtime,▁▁▁█
eval/samples_per_second,███▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▄▁
train/total_flos,▁

0,1
eval/loss,2.89647
eval/rmse,1.7019
eval/runtime,40.424
eval/samples_per_second,11.478
eval/steps_per_second,2.87
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.6876
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.8182,3.790042,1.946803
200,1.7884,3.351169,1.83062
300,1.2573,2.967937,1.72277
400,1.0512,2.499222,1.580893
500,0.7274,2.474596,1.573085
600,0.4059,2.106382,1.451338
700,0.52,2.662833,1.631819
800,0.2989,2.19325,1.480962
900,0.1878,2.407502,1.551613
1000,0.1763,2.813476,1.677342


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves50/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** 

Configuration saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-1-fold/config.json


{'eval_loss': 1.9664844274520874, 'eval_rmse': 1.4023139476776123, 'eval_runtime': 9.5262, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 3.044, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▆▅▃▃▂▄▂▃▄▄▁
eval/rmse,█▇▅▃▃▂▄▂▃▅▄▁
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▃▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,1.96648
eval/rmse,1.40231
eval/runtime,9.5262
eval/samples_per_second,12.072
eval/steps_per_second,3.044
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1284
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Using input_clm=turn_text_50
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.49,2.277234,1.509051
200,1.6353,1.82127,1.349545
300,1.1094,2.334357,1.52786
400,1.1499,2.286412,1.512088
500,0.8587,2.403534,1.550334
600,0.5773,2.592278,1.610055
700,0.4162,2.514268,1.585644
800,0.2426,2.309967,1.519858
900,0.2415,2.198898,1.482868
1000,0.1888,2.112074,1.453298


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves50/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** 

{'eval_loss': 2.722442388534546, 'eval_rmse': 1.6499825716018677, 'eval_runtime': 5.6874, 'eval_samples_per_second': 12.835, 'eval_steps_per_second': 3.341, 'epoch': 5.0}


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-2-fold/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves50/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▅▁▅▅▆▇▆▅▄▃▄█
eval/rmse,▅▁▅▅▆▇▇▅▄▃▄█
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▇█████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.72244
eval/rmse,1.64998
eval/runtime,5.6874
eval/samples_per_second,12.835
eval/steps_per_second,3.341
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.1323
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.3696,1.575546,1.255207
200,1.117,1.898246,1.377769
300,0.4887,1.946129,1.395037


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-0-fold/config.json


{'eval_loss': 3.2624123096466064, 'eval_rmse': 1.8062149286270142, 'eval_runtime': 38.0126, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 3.052, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▂▃█
eval/rmse,▁▃▃█
eval/runtime,▁▁▁█
eval/samples_per_second,██▇▁
eval/steps_per_second,███▁
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▃▁
train/total_flos,▁

0,1
eval/loss,3.26241
eval/rmse,1.80621
eval/runtime,38.0126
eval/samples_per_second,12.206
eval/steps_per_second,3.052
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.4887
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.7434,3.005117,1.733527
200,1.6501,3.425,1.850676
300,1.6452,1.710704,1.307939
400,1.2394,1.453467,1.205598
500,0.9755,1.490103,1.220698
600,0.65,1.716388,1.31011
700,0.5737,2.059833,1.435212
800,0.3685,2.210799,1.486876
900,0.3077,1.823135,1.350235
1000,0.2925,1.676017,1.294611


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves75/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-1-fold/checkpoint-500/special_tokens_map.json
***** 

Configuration saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-1-fold/config.json


{'eval_loss': 2.0965335369110107, 'eval_rmse': 1.4479411840438843, 'eval_runtime': 10.1901, 'eval_samples_per_second': 11.285, 'eval_steps_per_second': 2.846, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▇█▂▁▁▂▃▄▂▂▂▃
eval/rmse,▇█▂▁▁▂▃▄▃▂▃▄
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▅▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.09653
eval/rmse,1.44794
eval/runtime,10.1901
eval/samples_per_second,11.285
eval/steps_per_second,2.846
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1581
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Using input_clm=turn_text_75
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.4009,2.544059,1.59501
200,1.7734,2.337829,1.528996
300,1.3979,2.667173,1.633148
400,1.1605,1.993865,1.412043
500,0.9368,1.722769,1.312543
600,0.6797,1.869268,1.367212
700,0.547,1.986946,1.409591
800,0.3092,2.111427,1.453075
900,0.3699,2.150017,1.466294
1000,0.252,2.189829,1.479807


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves75/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-2-fold/checkpoint-500/special_tokens_map.json
***** 

{'eval_loss': 2.2485294342041016, 'eval_rmse': 1.4995096921920776, 'eval_runtime': 6.085, 'eval_samples_per_second': 11.997, 'eval_steps_per_second': 3.122, 'epoch': 5.0}


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-2-fold/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves75/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▇▆█▃▁▂▃▄▄▄▃▅
eval/rmse,▇▆█▃▁▂▃▄▄▅▄▅
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,██▇████████▁
eval/steps_per_second,██▇████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▅▄▃▃▂▁▂▁▁
train/total_flos,▁

0,1
eval/loss,2.24853
eval/rmse,1.49951
eval/runtime,6.085
eval/samples_per_second,11.997
eval/steps_per_second,3.122
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.1811
train/total_flos,1.25457755400192e+16


loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/vocab.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/859f4633944e1b7e7fa301e72161388cd5903e36385d0ef2917256506bff64c3.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/merges.txt from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/af6fcabe2bf8cab6f77b20d94ba46a3dbf441ca0549e1f3c852c437b612f5224.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/allenai/longformer-base-4096/resolve/main/tokenizer_config.json from cache at None
loading configuration file 

3    49
1    49
4    49
2    49
5    49
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 245, Valid 30, and Test 464


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.355,2.542393,1.594488
200,1.6408,1.492575,1.22171
300,0.8642,1.959238,1.399728


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 464
  Batch size = 4


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-0-fold/config.json


{'eval_loss': 2.376236915588379, 'eval_rmse': 1.5415047407150269, 'eval_runtime': 41.8983, 'eval_samples_per_second': 11.074, 'eval_steps_per_second': 2.769, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▁▄▇
eval/rmse,█▁▄▇
eval/runtime,▁▁▁█
eval/samples_per_second,▁▂█▃
eval/steps_per_second,▁▂█▂
train/epoch,▁▁▄▄████
train/global_step,▁▁▄▄████
train/learning_rate,█▄▁
train/loss,█▅▁
train/total_flos,▁

0,1
eval/loss,2.37624
eval/rmse,1.5415
eval/runtime,41.8983
eval/samples_per_second,11.074
eval/steps_per_second,2.769
train/epoch,5.0
train/global_step,310.0
train/learning_rate,0.0
train/loss,0.8642
train/total_flos,3218549745868800.0


1    188
3    188
2    188
4    188
5    188
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 940, Valid 30, and Test 115


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.716,2.882318,1.697739
200,1.5026,1.970765,1.403839
300,1.3467,1.95817,1.399346
400,1.1386,1.428538,1.195215
500,0.8426,1.612025,1.269655
600,0.5581,1.24716,1.116763
700,0.6487,1.439775,1.199906
800,0.481,1.481413,1.217133
900,0.2825,1.431047,1.196264
1000,0.2706,1.275377,1.129326


***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
***** Running Evaluation *****
  Num examples = 30
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves100/longformer-1-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-1-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-1-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-1-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-1-fold/checkpoint-500/special_tokens_map.json
*

Configuration saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-1-fold/config.json


{'eval_loss': 1.894254446029663, 'eval_rmse': 1.3763190507888794, 'eval_runtime': 10.2203, 'eval_samples_per_second': 11.252, 'eval_steps_per_second': 2.837, 'epoch': 5.0}


Model weights saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▄▄▂▃▁▂▂▂▁▁▄
eval/rmse,█▄▄▂▃▁▂▂▂▁▂▄
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███████████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,1.89425
eval/rmse,1.37632
eval/runtime,10.2203
eval/samples_per_second,11.252
eval/steps_per_second,2.837
train/epoch,5.0
train/global_step,1175.0
train/learning_rate,0.0
train/loss,0.1835
train/total_flos,1.23487214739456e+16


1    191
3    191
2    191
4    191
5    191
Name: quality, dtype: int64
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Using input_clm=turn_text_100
Using global attention
Training 108, Valid 28, and Test 68
Training 955, Valid 37, and Test 73


loading weights file https://huggingface.co/allenai/longformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/a7a586602e625bd012d75abdfcc615f5bb1fe133273845f7381332c634273bd9.dc3a4f03d4ab11f972b126d0e6b67f43e5d9003b3aec54f8e549cc7e2d42398d
Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exa

Step,Training Loss,Validation Loss,Rmse
100,2.3943,2.154014,1.467656
200,1.4463,1.68445,1.297864
300,1.1288,1.53566,1.239218
400,1.0642,1.413248,1.188801
500,0.7184,1.249947,1.118011
600,0.5028,1.313157,1.14593
700,0.4341,1.295669,1.138275
800,0.299,1.404643,1.185176
900,0.2672,1.191852,1.09172
1000,0.2001,1.170556,1.081922


***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
***** Running Evaluation *****
  Num examples = 37
  Batch size = 4
Saving model checkpoint to ../data/quality_models/longformer-exp-types-and-moves100/longformer-2-fold/checkpoint-500
Configuration saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-2-fold/checkpoint-500/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-2-fold/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-2-fold/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-2-fold/checkpoint-500/special_tokens_map.json
*

{'eval_loss': 1.751179575920105, 'eval_rmse': 1.3233213424682617, 'eval_runtime': 6.3549, 'eval_samples_per_second': 11.487, 'eval_steps_per_second': 2.99, 'epoch': 5.0}


Configuration saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-2-fold/config.json
Model weights saved in ../data/quality_models/longformer-exp-types-and-moves100/longformer-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▅▄▃▂▂▂▃▁▁▁▅
eval/rmse,█▅▄▃▂▂▂▃▁▁▁▅
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,███▇███████▁
eval/steps_per_second,███████████▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▅▄▄▃▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,1.75118
eval/rmse,1.32332
eval/runtime,6.3549
eval/samples_per_second,11.487
eval/steps_per_second,2.99
train/epoch,5.0
train/global_step,1195.0
train/learning_rate,0.0
train/loss,0.15
train/total_flos,1.25457755400192e+16


In [34]:
json.dump(exp_types_and_moves_rmse_scores, open('../data/quality_models/exp_types_and_moves_rmse_scores.json', 'w'))

#### HAT model:

In [41]:
model_name_or_path = 'kiddothe2b/hierarchical-transformer-base-4096'

In [42]:
#Evaluate the approach performance on different sizes of turns
hat_rmse_scores = []
for input_clm in ['turn_text_25', 'turn_text_50', 'turn_text_75', 'turn_text_100']:
    eli5_annotation_df['input_texts'] = eli5_annotation_df[input_clm].apply(lambda row: [x['text'] for x in row])
    eli5_annotation_df['labels'] = eli5_annotation_df.quality
    perc = input_clm.split('_')[-1]
    hat_rmse_scores = train_and_evaluate_hat_model(model_name_or_path, '../data/quality_models/hat-model-{}'.format(perc), 
                                                         eli5_annotation_df, 'hat-model-{}'.format(perc), num_train_epochs=5, 
                                                         lr=5e-5, batch_size=8,  n_folds=5, eval_steps=50)

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

4    77
2    77
1    77
5    77
3    77
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 385, Valid 35, and Test 383
Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.3429,2.29113,1.513648
100,2.0548,3.415096,1.847998
150,1.7745,1.728767,1.314826
200,1.1085,1.923266,1.386819


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 3.6440885066986084, 'eval_rmse': 1.908949613571167, 'eval_runtime': 1.4642, 'eval_samples_per_second': 261.573, 'eval_steps_per_second': 32.782, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-25/hat-former-0-fold/config.json
Model weights saved in ../data/quality_models/hat-model-25/hat-former-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▃▇▁▂█
eval/rmse,▃▇▁▂█
eval/runtime,▁▁▁▁█
eval/samples_per_second,█▅▅▁▃
eval/steps_per_second,█▆▇▄▁
train/epoch,▁▁▃▃▅▅▆▆██
train/global_step,▁▁▃▃▅▅▆▆██
train/learning_rate,█▆▃▁
train/loss,█▆▅▁
train/total_flos,▁

0,1
eval/loss,3.64409
eval/rmse,1.90895
eval/runtime,1.4642
eval/samples_per_second,261.573
eval/steps_per_second,32.782
train/epoch,5.0
train/global_step,245.0
train/learning_rate,1e-05
train/loss,1.1085
train/total_flos,409213916333568.0


1    187
3    187
2    187
4    187
5    187
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 935, Valid 35, and Test 122


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.5652,1.824242,1.350645
100,2.0341,4.117488,2.02916
150,1.7389,4.594462,2.14347
200,1.3194,2.688012,1.639516
250,1.1197,5.392112,2.322092
300,0.755,3.535989,1.880423
350,0.5578,3.595363,1.896144
400,0.4096,3.411071,1.846909
450,0.3053,4.048071,2.011982
500,0.2504,4.271893,2.066856


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.2195627689361572, 'eval_rmse': 1.4898196458816528, 'eval_runtime': 0.4029, 'eval_samples_per_second': 302.835, 'eval_steps_per_second': 39.716, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-25/hat-former-1-fold/config.json
Model weights saved in ../data/quality_models/hat-model-25/hat-former-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▅▆▃█▄▄▄▅▆▆▂
eval/rmse,▁▆▇▃█▅▅▅▆▆▆▂
eval/runtime,▁▂▁▂▁▂▁▂▁▁▁█
eval/samples_per_second,▆▁▄▁▄▁▄▁▄▃▂█
eval/steps_per_second,█▁▅▁▅▁▅▂▅▄▃▇
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▆▄▄▃▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,2.21956
eval/rmse,1.48982
eval/runtime,0.4029
eval/samples_per_second,302.835
eval/steps_per_second,39.716
train/epoch,5.0
train/global_step,585.0
train/learning_rate,0.0
train/loss,0.1834
train/total_flos,2357839105857792.0


1    195
3    195
2    195
4    195
5    195
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 975, Valid 35, and Test 57


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.5314,3.198715,1.788495
100,1.9986,2.014031,1.419165
150,2.1543,2.176907,1.475434
200,1.9287,2.000857,1.414516
250,2.1022,1.925181,1.387509
300,1.8797,2.116501,1.45482
350,2.0924,2.536112,1.592517
400,1.9865,2.484713,1.576297
450,2.0337,2.224855,1.491595
500,2.1384,2.31446,1.521335


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.589247226715088, 'eval_rmse': 1.6091138124465942, 'eval_runtime': 0.201, 'eval_samples_per_second': 283.555, 'eval_steps_per_second': 39.797, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-25/hat-former-2-fold/config.json
Model weights saved in ../data/quality_models/hat-model-25/hat-former-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▁▂▁▁▂▄▄▃▃▄▃▅
eval/rmse,█▂▃▁▁▂▅▄▃▃▄▃▅
eval/runtime,▁▂▁▂▂▂▁▂▂▂▂▃█
eval/samples_per_second,▇▅▇▃▅▄▇▄▄▄▅▁█
eval/steps_per_second,█▅█▄▆▄▇▄▅▅▆▁█
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▇▇▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▂▄▂▃▁▃▂▃▄▂▂
train/total_flos,▁

0,1
eval/loss,2.58925
eval/rmse,1.60911
eval/runtime,0.201
eval/samples_per_second,283.555
eval/steps_per_second,39.797
train/epoch,5.0
train/global_step,610.0
train/learning_rate,0.0
train/loss,2.0047
train/total_flos,2370971773262592.0


1    200
3    200
2    200
4    200
5    200
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 1000, Valid 35, and Test 46


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4153,2.249994,1.499998
100,2.1116,3.114214,1.764714
150,2.1635,3.714631,1.927338
200,1.9799,5.017097,2.239888
250,1.926,2.377064,1.541773
300,1.7009,2.13304,1.460493
350,1.3242,2.185445,1.478325
400,1.1646,2.826173,1.681122
450,0.9525,3.026369,1.739646
500,0.8379,2.719726,1.649159


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 3.488116502761841, 'eval_rmse': 1.8676501512527466, 'eval_runtime': 0.1568, 'eval_samples_per_second': 293.314, 'eval_steps_per_second': 38.258, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-25/hat-former-3-fold/config.json
Model weights saved in ../data/quality_models/hat-model-25/hat-former-3-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▃▅█▂▁▁▃▃▂▄▄▄
eval/rmse,▁▄▅█▂▁▁▃▄▃▄▄▅
eval/runtime,▁█▃▃▁▄▁▂▁▄▂▄▇
eval/samples_per_second,▇▁▅▅▆▄▇▅▆▄▆▄█
eval/steps_per_second,█▁▆▅█▄█▆█▅▇▄▇
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▇▇▆▆▅▄▃▃▂▁▁
train/total_flos,▁

0,1
eval/loss,3.48812
eval/rmse,1.86765
eval/runtime,0.1568
eval/samples_per_second,293.314
eval/steps_per_second,38.258
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.5782
train/total_flos,2434621434617856.0


1    199
3    199
2    199
4    199
5    199
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 131, Valid 33, and Test 40
Training 995, Valid 35, and Test 44


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.5456,2.442444,1.562832
100,2.1442,2.340091,1.529736
150,2.004,2.314205,1.521251
200,1.6157,3.397335,1.843186
250,1.4875,2.807144,1.675454
300,1.1251,2.975554,1.72498
350,0.8563,2.684627,1.638483
400,0.5352,3.052328,1.747091
450,0.4651,2.62856,1.621283
500,0.3598,2.699586,1.643042


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 3.566516160964966, 'eval_rmse': 1.8885221481323242, 'eval_runtime': 0.2303, 'eval_samples_per_second': 191.079, 'eval_steps_per_second': 26.056, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-25/hat-former-4-fold/config.json
Model weights saved in ../data/quality_models/hat-model-25/hat-former-4-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▂▁▁▇▄▅▃▅▃▃▃▄█
eval/rmse,▂▁▁▇▄▅▃▅▃▃▃▄█
eval/runtime,▁▁▁▂▁▃▂▂▂▂▂▂█
eval/samples_per_second,█▇█▆▇▅▆▆▇▅▇▅▁
eval/steps_per_second,█▇█▆▇▅▇▆▇▅▇▆▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▇▆▅▅▄▃▂▂▂▁▁
train/total_flos,▁

0,1
eval/loss,3.56652
eval/rmse,1.88852
eval/runtime,0.2303
eval/samples_per_second,191.079
eval/steps_per_second,26.056
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.1862
train/total_flos,2431557145556736.0


loading configuration file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/config.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/67fff7e4e0a33106e11c355770df4420b2385850d788d1b7b549eb8d64b4f3d1.ef5baf2314e8ee19e0e25bc7741c47229b1ac5c08ae795cbc2d27f5f2c19535c
Model config HATConfig {
  "_name_or_path": "kiddothe2b/hierarchical-transformer-base-4096",
  "architectures": [
    "HATForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "auto_map": {
    "AutoConfig": "configuration_hat.HATConfig",
    "AutoModel": "modelling_hat.HATModel",
    "AutoModelForMaskedLM": "modelling_hat.HATForMaskedLM",
    "AutoModelForMultipleChoice": "modelling_hat.HATForMultipleChoice",
    "AutoModelForQuestionAnswering": "modelling_hat.HATForQuestionAnswering",
    "AutoModelForSequenceClassification": "modelling_hat.HATForSequenceClassification",
    "AutoModelForTokenClassification": "modelling_hat.HATForTokenClassific

4    77
2    77
1    77
5    77
3    77
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 385, Valid 35, and Test 383


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.349,2.406016,1.551134
100,1.8842,3.27155,1.808743
150,1.3221,4.596625,2.143974
200,0.9955,4.369844,2.090417


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.5383291244506836, 'eval_rmse': 1.593213438987732, 'eval_runtime': 2.1979, 'eval_samples_per_second': 174.256, 'eval_steps_per_second': 21.839, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-50/hat-former-0-fold/config.json
Model weights saved in ../data/quality_models/hat-model-50/hat-former-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▄█▇▁
eval/rmse,▁▄█▇▁
eval/runtime,▁▁▁▁█
eval/samples_per_second,█▄▇▆▁
eval/steps_per_second,█▅▇▆▁
train/epoch,▁▁▃▃▅▅▆▆██
train/global_step,▁▁▃▃▅▅▆▆██
train/learning_rate,█▆▃▁
train/loss,█▆▃▁
train/total_flos,▁

0,1
eval/loss,2.53833
eval/rmse,1.59321
eval/runtime,2.1979
eval/samples_per_second,174.256
eval/steps_per_second,21.839
train/epoch,5.0
train/global_step,245.0
train/learning_rate,1e-05
train/loss,0.9955
train/total_flos,746110610824704.0


1    187
3    187
2    187
4    187
5    187
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 935, Valid 35, and Test 122


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4776,2.218168,1.489352
100,2.1018,3.56119,1.887112
150,2.1263,5.747125,2.397316
200,1.8035,2.057908,1.434541
250,1.4959,9.304587,3.050342
300,1.2287,3.584704,1.893331
350,0.9596,3.000477,1.732188
400,0.6206,2.909361,1.705685
450,0.5287,4.172142,2.042582
500,0.4099,3.838885,1.959307


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.734311819076538, 'eval_rmse': 1.6535755395889282, 'eval_runtime': 0.6803, 'eval_samples_per_second': 179.336, 'eval_steps_per_second': 23.519, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-50/hat-former-1-fold/config.json
Model weights saved in ../data/quality_models/hat-model-50/hat-former-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▂▅▁█▂▂▂▃▃▂▂
eval/rmse,▁▃▅▁█▃▂▂▄▃▃▂
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▇▆▇▇▆▇▇▇▆█▁
eval/steps_per_second,█▇▇▇▇▆█▇▇▆█▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▇▇▆▅▄▃▂▂▁▁
train/total_flos,▁

0,1
eval/loss,2.73431
eval/rmse,1.65358
eval/runtime,0.6803
eval/samples_per_second,179.336
eval/steps_per_second,23.519
train/epoch,5.0
train/global_step,585.0
train/learning_rate,0.0
train/loss,0.3283
train/total_flos,4519038405107712.0


1    195
3    195
2    195
4    195
5    195
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 975, Valid 35, and Test 57


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4748,2.812171,1.676953
100,2.0264,1.912607,1.38297
150,2.1519,2.31679,1.5221
200,1.9699,2.221812,1.490574
250,2.0687,1.883113,1.372266
300,1.9106,2.117913,1.455305
350,2.036,2.284575,1.511481
400,1.9765,2.428713,1.558433
450,2.0103,2.0731,1.439826
500,2.0983,2.236291,1.495423


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.587500810623169, 'eval_rmse': 1.6085710525512695, 'eval_runtime': 0.2678, 'eval_samples_per_second': 212.818, 'eval_steps_per_second': 29.869, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-50/hat-former-2-fold/config.json
Model weights saved in ../data/quality_models/hat-model-50/hat-former-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▁▄▄▁▃▄▅▂▄▄▃▆
eval/rmse,█▁▄▄▁▃▄▅▃▄▄▃▆
eval/runtime,▁▂▁▂▁▂▁▁▁▁▁▁█
eval/samples_per_second,▇▁█▄▇▅▇▅▆▆▇▅▃
eval/steps_per_second,▇▁█▄▇▅▇▅▆▆▇▅▂
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▇▇▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▂▄▂▃▁▃▂▂▃▁▂
train/total_flos,▁

0,1
eval/loss,2.5875
eval/rmse,1.60857
eval/runtime,0.2678
eval/samples_per_second,212.818
eval/steps_per_second,29.869
train/epoch,5.0
train/global_step,610.0
train/learning_rate,0.0
train/loss,2.0218
train/total_flos,4521489836356608.0


1    200
3    200
2    200
4    200
5    200
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 1000, Valid 35, and Test 46


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.3911,2.355914,1.534899
100,2.1045,2.445014,1.563654
150,2.1509,2.330279,1.526525
200,2.0277,3.949504,1.987336
250,1.7469,1.84147,1.357008
300,1.2879,3.243425,1.800951
350,1.189,2.2414,1.497131
400,0.8736,2.352893,1.533914
450,0.7798,2.327857,1.525732
500,0.5344,2.205275,1.485017


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.5942468643188477, 'eval_rmse': 1.6106665134429932, 'eval_runtime': 0.1887, 'eval_samples_per_second': 243.724, 'eval_steps_per_second': 31.79, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-50/hat-former-3-fold/config.json
Model weights saved in ../data/quality_models/hat-model-50/hat-former-3-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▃▃▃█▁▆▂▃▃▂▂▁▃
eval/rmse,▃▃▃█▁▆▃▃▃▂▂▂▄
eval/runtime,▁▂▂▄▃▄▃▁▃▄▁▃█
eval/samples_per_second,█▆▅▁▄▁▃█▄▂▇▂▇
eval/steps_per_second,█▆▅▁▄▁▃█▄▂▇▂▃
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▇▇▇▆▄▄▃▃▂▁▁
train/total_flos,▁

0,1
eval/loss,2.59425
eval/rmse,1.61067
eval/runtime,0.1887
eval/samples_per_second,243.724
eval/steps_per_second,31.79
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.4242
train/total_flos,4665423871113216.0


1    199
3    199
2    199
4    199
5    199
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 131, Valid 33, and Test 40
Training 995, Valid 35, and Test 44


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4985,2.363261,1.53729
100,2.1284,2.73179,1.652813
150,2.2234,3.094012,1.758981
200,1.9697,3.085741,1.756628
250,2.151,2.38417,1.544076
300,2.0842,2.390585,1.546152
350,2.0566,2.38328,1.543787
400,1.9733,2.378843,1.54235
450,1.9775,2.377253,1.541834
500,1.9863,2.395909,1.547873


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.3302206993103027, 'eval_rmse': 1.5265061855316162, 'eval_runtime': 0.5815, 'eval_samples_per_second': 75.672, 'eval_steps_per_second': 10.319, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-50/hat-former-4-fold/config.json
Model weights saved in ../data/quality_models/hat-model-50/hat-former-4-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▅██▁▂▁▁▁▂▂▁▁
eval/rmse,▁▅██▂▂▂▁▁▂▂▁▁
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▇█▇█▇█▇▇▇▇▇▁
eval/steps_per_second,█▇█▇█▇██▇▇▇▇▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▄▅▂▄▃▃▂▂▂▂▁
train/total_flos,▁

0,1
eval/loss,2.33022
eval/rmse,1.52651
eval/runtime,0.5815
eval/samples_per_second,75.672
eval/steps_per_second,10.319
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,1.9147
train/total_flos,4659908150803200.0


loading configuration file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/config.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/67fff7e4e0a33106e11c355770df4420b2385850d788d1b7b549eb8d64b4f3d1.ef5baf2314e8ee19e0e25bc7741c47229b1ac5c08ae795cbc2d27f5f2c19535c
Model config HATConfig {
  "_name_or_path": "kiddothe2b/hierarchical-transformer-base-4096",
  "architectures": [
    "HATForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "auto_map": {
    "AutoConfig": "configuration_hat.HATConfig",
    "AutoModel": "modelling_hat.HATModel",
    "AutoModelForMaskedLM": "modelling_hat.HATForMaskedLM",
    "AutoModelForMultipleChoice": "modelling_hat.HATForMultipleChoice",
    "AutoModelForQuestionAnswering": "modelling_hat.HATForQuestionAnswering",
    "AutoModelForSequenceClassification": "modelling_hat.HATForSequenceClassification",
    "AutoModelForTokenClassification": "modelling_hat.HATForTokenClassific

4    77
2    77
1    77
5    77
3    77
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 385, Valid 35, and Test 383


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4552,3.193518,1.787042
100,2.1151,2.473502,1.572737
150,1.6316,1.677336,1.29512
200,1.466,3.553924,1.885185


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.788304328918457, 'eval_rmse': 1.6698216199874878, 'eval_runtime': 3.9019, 'eval_samples_per_second': 98.156, 'eval_steps_per_second': 12.302, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-75/hat-former-0-fold/config.json
Model weights saved in ../data/quality_models/hat-model-75/hat-former-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▇▄▁█▅
eval/rmse,▇▄▁█▅
eval/runtime,▁▁▁▁█
eval/samples_per_second,█▇█▇▁
eval/steps_per_second,█▇█▇▁
train/epoch,▁▁▃▃▅▅▆▆██
train/global_step,▁▁▃▃▅▅▆▆██
train/learning_rate,█▆▃▁
train/loss,█▆▂▁
train/total_flos,▁

0,1
eval/loss,2.7883
eval/rmse,1.66982
eval/runtime,3.9019
eval/samples_per_second,98.156
eval/steps_per_second,12.302
train/epoch,5.0
train/global_step,245.0
train/learning_rate,1e-05
train/loss,1.466
train/total_flos,1099992221826048.0


1    187
3    187
2    187
4    187
5    187
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 935, Valid 35, and Test 122


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.446,2.088851,1.445286
100,2.0914,3.684628,1.919538
150,2.225,3.872848,1.967955
200,1.8767,2.13407,1.460846
250,1.6861,8.961104,2.99351
300,1.3184,1.587221,1.25985
350,0.8972,2.516063,1.58621
400,0.6482,1.7184,1.310878
450,0.6236,3.669749,1.915659
500,0.4422,2.766376,1.663243


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.315868377685547, 'eval_rmse': 1.5217976570129395, 'eval_runtime': 1.0315, 'eval_samples_per_second': 118.271, 'eval_steps_per_second': 15.511, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-75/hat-former-1-fold/config.json
Model weights saved in ../data/quality_models/hat-model-75/hat-former-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁▃▃▂█▁▂▁▃▂▂▂
eval/rmse,▂▄▄▂█▁▂▁▄▃▂▂
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▇▇▆█▆█▇▇▆█▁
eval/steps_per_second,█▇█▇█▇█▇▇▆█▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▇▇▆▅▄▃▂▂▁▁
train/total_flos,▁

0,1
eval/loss,2.31587
eval/rmse,1.5218
eval/runtime,1.0315
eval/samples_per_second,118.271
eval/steps_per_second,15.511
train/epoch,5.0
train/global_step,585.0
train/learning_rate,0.0
train/loss,0.4041
train/total_flos,6806136209211648.0


1    195
3    195
2    195
4    195
5    195
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 975, Valid 35, and Test 57


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4134,4.445885,2.108526
100,1.7564,1.778884,1.333748
150,1.3838,1.782117,1.334959
200,1.16,1.521967,1.23368
250,1.0544,1.783418,1.335447
300,0.5164,2.351011,1.533301
350,0.4358,1.597974,1.26411
400,0.4045,1.740754,1.319376
450,0.3272,1.683746,1.297592
500,0.3032,1.580556,1.257202


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.802793264389038, 'eval_rmse': 1.67415452003479, 'eval_runtime': 0.7064, 'eval_samples_per_second': 80.696, 'eval_steps_per_second': 11.326, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-75/hat-former-2-fold/config.json
Model weights saved in ../data/quality_models/hat-model-75/hat-former-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▂▂▁▂▃▁▂▁▁▂▂▄
eval/rmse,█▂▂▁▂▃▁▂▂▁▃▂▅
eval/runtime,▁▁▁▁▁▁▁▁▁▁▆▁█
eval/samples_per_second,█▇███▇██▇█▁▇▂
eval/steps_per_second,█▇███▇██▇█▁▇▂
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▇▇▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▆▅▄▄▂▂▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.80279
eval/rmse,1.67415
eval/runtime,0.7064
eval/samples_per_second,80.696
eval/steps_per_second,11.326
train/epoch,5.0
train/global_step,610.0
train/learning_rate,0.0
train/loss,0.216
train/total_flos,6819619081080576.0


1    200
3    200
2    200
4    200
5    200
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 1000, Valid 35, and Test 46


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4797,3.648192,1.910024
100,2.0495,3.047889,1.74582
150,1.8608,4.316662,2.077658
200,1.2341,1.99105,1.411046
250,1.0384,1.77705,1.333061
300,0.7232,2.723213,1.650216
350,0.6956,1.870808,1.367775
400,0.5445,2.04004,1.4283
450,0.5266,2.23689,1.495624
500,0.3722,1.891536,1.375331


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 1.8368535041809082, 'eval_rmse': 1.355305790901184, 'eval_runtime': 0.3125, 'eval_samples_per_second': 147.21, 'eval_steps_per_second': 19.201, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-75/hat-former-3-fold/config.json
Model weights saved in ../data/quality_models/hat-model-75/hat-former-3-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▆▅█▂▁▄▁▂▂▁▂▁▁
eval/rmse,▆▅█▂▁▄▁▂▃▁▂▁▁
eval/runtime,▃▃▁▁▁▁▁▂▁▁▁▁█
eval/samples_per_second,▂▄▇█▇▇▇▆███▇▁
eval/steps_per_second,▄▅▇█▇▇▇▇███▇▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▇▆▄▃▃▂▂▂▁▁▁
train/total_flos,▁

0,1
eval/loss,1.83685
eval/rmse,1.35531
eval/runtime,0.3125
eval/samples_per_second,147.21
eval/steps_per_second,19.201
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.2957
train/total_flos,7023700732551168.0


1    199
3    199
2    199
4    199
5    199
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 131, Valid 33, and Test 40
Training 995, Valid 35, and Test 44


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4358,2.619738,1.61856
100,2.0996,2.515267,1.585959
150,1.8138,2.218728,1.48954
200,1.2595,2.357831,1.535523
250,1.342,2.110202,1.452653
300,0.9177,2.658208,1.630401
350,0.7235,2.59013,1.609388
400,0.5249,2.479255,1.574565
450,0.3846,2.67577,1.635778
500,0.4867,2.436776,1.561018


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.1350088119506836, 'eval_rmse': 1.4611667394638062, 'eval_runtime': 0.4725, 'eval_samples_per_second': 93.115, 'eval_steps_per_second': 12.697, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-75/hat-former-4-fold/config.json
Model weights saved in ../data/quality_models/hat-model-75/hat-former-4-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▇▆▂▄▁█▇▆█▅▇█▁
eval/rmse,▇▆▂▄▁█▇▆█▅▇█▁
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▇▇▇█▇█▇▇██▇▁
eval/steps_per_second,█▇█▇█▇█▇▇██▇▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▇▆▄▄▃▂▂▁▂▁▁
train/total_flos,▁

0,1
eval/loss,2.13501
eval/rmse,1.46117
eval/runtime,0.4725
eval/samples_per_second,93.115
eval/steps_per_second,12.697
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.28
train/total_flos,7023700732551168.0


loading configuration file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/config.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/67fff7e4e0a33106e11c355770df4420b2385850d788d1b7b549eb8d64b4f3d1.ef5baf2314e8ee19e0e25bc7741c47229b1ac5c08ae795cbc2d27f5f2c19535c
Model config HATConfig {
  "_name_or_path": "kiddothe2b/hierarchical-transformer-base-4096",
  "architectures": [
    "HATForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "auto_map": {
    "AutoConfig": "configuration_hat.HATConfig",
    "AutoModel": "modelling_hat.HATModel",
    "AutoModelForMaskedLM": "modelling_hat.HATForMaskedLM",
    "AutoModelForMultipleChoice": "modelling_hat.HATForMultipleChoice",
    "AutoModelForQuestionAnswering": "modelling_hat.HATForQuestionAnswering",
    "AutoModelForSequenceClassification": "modelling_hat.HATForSequenceClassification",
    "AutoModelForTokenClassification": "modelling_hat.HATForTokenClassific

4    77
2    77
1    77
5    77
3    77
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 385, Valid 35, and Test 383


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.2347,2.127819,1.458704
100,1.6148,2.779916,1.667308
150,0.9547,1.8488,1.359706
200,0.5892,2.119233,1.455758


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 1.5452544689178467, 'eval_rmse': 1.2430826425552368, 'eval_runtime': 4.8202, 'eval_samples_per_second': 79.457, 'eval_steps_per_second': 9.958, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-100/hat-former-0-fold/config.json
Model weights saved in ../data/quality_models/hat-model-100/hat-former-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▄█▃▄▁
eval/rmse,▅█▃▅▁
eval/runtime,▁▁▁▁█
eval/samples_per_second,█▇▇▇▁
eval/steps_per_second,█▇▇▇▁
train/epoch,▁▁▃▃▅▅▆▆██
train/global_step,▁▁▃▃▅▅▆▆██
train/learning_rate,█▆▃▁
train/loss,█▅▃▁
train/total_flos,▁

0,1
eval/loss,1.54525
eval/rmse,1.24308
eval/runtime,4.8202
eval/samples_per_second,79.457
eval/steps_per_second,9.958
train/epoch,5.0
train/global_step,245.0
train/learning_rate,1e-05
train/loss,0.5892
train/total_flos,1417014812977920.0


1    187
3    187
2    187
4    187
5    187
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 935, Valid 35, and Test 122


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4261,4.709958,2.170244
100,2.0657,5.174829,2.274825
150,1.9438,7.1349,2.671123
200,1.9184,4.772621,2.184633
250,1.9868,8.692263,2.948264
300,2.1242,2.95298,1.718424
350,1.9245,4.255725,2.062941
400,1.7921,1.409053,1.187035
450,1.5119,2.157869,1.468969
500,0.9422,2.359823,1.536172


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.1105782985687256, 'eval_rmse': 1.4527829885482788, 'eval_runtime': 1.2569, 'eval_samples_per_second': 97.067, 'eval_steps_per_second': 12.73, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-100/hat-former-1-fold/config.json
Model weights saved in ../data/quality_models/hat-model-100/hat-former-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▄▅▇▄█▂▄▁▂▂▂▂
eval/rmse,▅▅▇▅█▃▄▁▂▂▃▂
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▇█████▇█▇█▁
eval/steps_per_second,█▇█████▇█▇█▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▆▆▆▆▇▆▅▄▂▁
train/total_flos,▁

0,1
eval/loss,2.11058
eval/rmse,1.45278
eval/runtime,1.2569
eval/samples_per_second,97.067
eval/steps_per_second,12.73
train/epoch,5.0
train/global_step,585.0
train/learning_rate,0.0
train/loss,0.8073
train/total_flos,8687171937159168.0


1    195
3    195
2    195
4    195
5    195
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 975, Valid 35, and Test 57


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4593,4.834338,2.198713
100,1.9187,2.893019,1.700888
150,1.8658,2.658131,1.630377
200,1.3157,1.840953,1.356817
250,1.1872,1.475546,1.21472
300,0.714,1.333289,1.154681
350,0.8279,1.51946,1.232664
400,0.5828,1.841039,1.356849
450,0.4366,1.874425,1.369096
500,0.4359,1.670953,1.292653


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.074430227279663, 'eval_rmse': 1.4402881860733032, 'eval_runtime': 0.3772, 'eval_samples_per_second': 151.107, 'eval_steps_per_second': 21.208, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-100/hat-former-2-fold/config.json
Model weights saved in ../data/quality_models/hat-model-100/hat-former-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▄▄▂▁▁▁▂▂▂▂▂▂
eval/rmse,█▅▄▂▁▁▂▂▂▂▂▂▃
eval/runtime,▁▂▁▁▁▁▁▁▁▂▁▁█
eval/samples_per_second,▇▄▇▆███▇▇▄▇▅▁
eval/steps_per_second,▇▄▇▆███▇▇▅▇▅▁
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▇▇▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▆▆▄▄▂▃▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.07443
eval/rmse,1.44029
eval/runtime,0.3772
eval/samples_per_second,151.107
eval/steps_per_second,21.208
train/epoch,5.0
train/global_step,610.0
train/learning_rate,0.0
train/loss,0.3212
train/total_flos,8709147267283200.0


1    200
3    200
2    200
4    200
5    200
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 1000, Valid 35, and Test 46


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4094,3.950896,1.987686
100,2.0599,2.866979,1.693216
150,1.5684,5.066316,2.250848
200,1.3469,1.713791,1.309119
250,1.0462,3.77152,1.94204
300,0.7876,2.374953,1.541088
350,0.8575,2.505761,1.58296
400,0.7813,2.222531,1.490816
450,0.5129,2.144059,1.464261
500,0.429,2.629284,1.621507


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.0922787189483643, 'eval_rmse': 1.4464712142944336, 'eval_runtime': 0.4048, 'eval_samples_per_second': 113.639, 'eval_steps_per_second': 14.822, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-100/hat-former-3-fold/config.json
Model weights saved in ../data/quality_models/hat-model-100/hat-former-3-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▆▃█▁▅▂▃▂▂▃▃▂▂
eval/rmse,▆▄█▁▆▃▃▂▂▃▃▃▂
eval/runtime,▁▂▁▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▅█▇█▇█▇▇▇█▇▁
eval/steps_per_second,█▆█▇█▇█▇▇▇█▇▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▇▅▄▃▂▃▂▁▁▁▁
train/total_flos,▁

0,1
eval/loss,2.09228
eval/rmse,1.44647
eval/runtime,0.4048
eval/samples_per_second,113.639
eval/steps_per_second,14.822
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.4083
train/total_flos,8988347776309248.0


1    199
3    199
2    199
4    199
5    199
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 131, Valid 33, and Test 40
Training 995, Valid 35, and Test 44


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  False


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['pooler.dense.b

Step,Training Loss,Validation Loss,Rmse
50,2.4589,2.417143,1.554716
100,1.8166,3.21912,1.794191
150,1.6983,2.571012,1.603438
200,0.9501,2.314132,1.521227
250,0.8859,2.510803,1.584551
300,0.8188,2.583022,1.607178
350,0.6378,2.209302,1.486372
400,0.4812,2.503585,1.582272
450,0.3796,2.506194,1.583096
500,0.419,2.316913,1.522141


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__. If input_texts, __index_level_0__ are not expected by `MyH

{'eval_loss': 2.4996938705444336, 'eval_rmse': 1.5810418128967285, 'eval_runtime': 0.3053, 'eval_samples_per_second': 144.128, 'eval_steps_per_second': 19.654, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-100/hat-former-4-fold/config.json
Model weights saved in ../data/quality_models/hat-model-100/hat-former-4-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▂█▄▂▃▄▁▃▃▂▄▃▃
eval/rmse,▃█▄▂▃▄▁▃▃▂▄▃▃
eval/runtime,▁▂▁▂▁▂▁▂▁▁▁▂█
eval/samples_per_second,▇▇█▅▇▅▇▆█▇▇▆▁
eval/steps_per_second,█▇█▅▇▆▇▆█▇▇▆▁
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▆▆▃▃▃▂▂▁▂▁▁
train/total_flos,▁

0,1
eval/loss,2.49969
eval/rmse,1.58104
eval/runtime,0.3053
eval/samples_per_second,144.128
eval/steps_per_second,19.654
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.3069
train/total_flos,8978366949081600.0


In [19]:
json.dump(hat_rmse_scores, open('../data/quality_models/hat_rmse_scores.json', 'w'))

#### HAT model + exp-moves:

In [48]:
extra_feats_clm = 'exp_act_label'
eli5_annotation_df[extra_feats_clm] = eli5_annotation_df[extra_feats_clm].apply(lambda row: [int(x[2:4]) for x in row])

In [65]:
#Evaluate the approach performance on different sizes of turns
hat_rmse_scores = []
for input_clm in [ 'turn_text_100']: #['turn_text_25', 'turn_text_50', 'turn_text_75', 'turn_text_100']:
    eli5_annotation_df['input_texts'] = eli5_annotation_df[input_clm].apply(lambda row: [x['text'] for x in row])
    eli5_annotation_df['labels'] = eli5_annotation_df.quality
    perc = input_clm.split('_')[-1]
    hat_rmse_scores = train_and_evaluate_hat_model(model_name_or_path, '../data/quality_models/hat-model-exp-moves-test-3-{}'.format(perc), 
                                                         eli5_annotation_df, 'hat-model-exp-moves-test-3-{}'.format(perc), num_train_epochs=5, 
                                                         lr=5e-5, batch_size=8,  n_folds=5, eval_steps=50, extra_encoder=True, extra_feats_clm=extra_feats_clm,
                                                  flow_model_hidden_size=256, nhead=6, nlayers=6)

loading configuration file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/config.json from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/67fff7e4e0a33106e11c355770df4420b2385850d788d1b7b549eb8d64b4f3d1.ef5baf2314e8ee19e0e25bc7741c47229b1ac5c08ae795cbc2d27f5f2c19535c
Model config HATConfig {
  "_name_or_path": "kiddothe2b/hierarchical-transformer-base-4096",
  "architectures": [
    "HATForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "auto_map": {
    "AutoConfig": "configuration_hat.HATConfig",
    "AutoModel": "modelling_hat.HATModel",
    "AutoModelForMaskedLM": "modelling_hat.HATForMaskedLM",
    "AutoModelForMultipleChoice": "modelling_hat.HATForMultipleChoice",
    "AutoModelForQuestionAnswering": "modelling_hat.HATForQuestionAnswering",
    "AutoModelForSequenceClassification": "modelling_hat.HATForSequenceClassification",
    "AutoModelForTokenClassification": "modelling_hat.HATForTokenClassific

4    77
2    77
1    77
5    77
3    77
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 385, Valid 35, and Test 383


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  True


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['exp_moves_enco

Step,Training Loss,Validation Loss,Rmse
50,2.4419,3.240654,1.800182
100,2.0348,4.16959,2.041957
150,1.6996,1.988342,1.410086
200,1.5278,2.137898,1.462155


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__

{'eval_loss': 2.2616257667541504, 'eval_rmse': 1.5038702487945557, 'eval_runtime': 3.3711, 'eval_samples_per_second': 113.612, 'eval_steps_per_second': 14.239, 'epoch': 5.0}


Configuration saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-0-fold/config.json
Model weights saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-0-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▅█▁▁▂
eval/rmse,▅█▁▂▂
eval/runtime,▁▁▁▁█
eval/samples_per_second,█▄█▇▁
eval/steps_per_second,█▅█▇▁
train/epoch,▁▁▃▃▅▅▆▆██
train/global_step,▁▁▃▃▅▅▆▆██
train/learning_rate,█▆▃▁
train/loss,█▅▂▁
train/total_flos,▁

0,1
eval/loss,2.26163
eval/rmse,1.50387
eval/runtime,3.3711
eval/samples_per_second,113.612
eval/steps_per_second,14.239
train/epoch,5.0
train/global_step,245.0
train/learning_rate,1e-05
train/loss,1.5278
train/total_flos,1432594376928000.0


1    187
3    187
2    187
4    187
5    187
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 935, Valid 35, and Test 122


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  True


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['exp_moves_enco

Step,Training Loss,Validation Loss,Rmse
50,2.4514,5.644044,2.37572
100,2.1032,5.900749,2.429146
150,2.1302,8.633048,2.938205
200,1.8454,4.612739,2.147729
250,1.7335,4.761095,2.181993
300,1.8588,4.088434,2.021988
350,1.6076,3.976038,1.994001
400,1.2855,3.99897,1.999742
450,0.853,5.825769,2.413663
500,0.8371,6.568947,2.562996


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__

Configuration saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-1-fold/config.json


{'eval_loss': 4.725615978240967, 'eval_rmse': 2.1738483905792236, 'eval_runtime': 0.791, 'eval_samples_per_second': 154.233, 'eval_steps_per_second': 20.227, 'epoch': 5.0}


Model weights saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-1-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▄▄█▂▂▁▁▁▄▅▂▂
eval/rmse,▄▄█▂▂▁▁▁▄▅▂▂
eval/runtime,▁▂▁▁▁▁▁▁▁▁▁█
eval/samples_per_second,█▁█▂▆▆▇▃▅▃▇▅
eval/steps_per_second,█▁█▂▆▆▇▃▅▃▇▂
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▃▂▂▁
train/loss,█▇▇▆▅▆▅▄▂▂▁
train/total_flos,▁

0,1
eval/loss,4.72562
eval/rmse,2.17385
eval/runtime,0.791
eval/samples_per_second,154.233
eval/steps_per_second,20.227
train/epoch,5.0
train/global_step,585.0
train/learning_rate,0.0
train/loss,0.6332
train/total_flos,8782684241971200.0


1    195
3    195
2    195
4    195
5    195
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 975, Valid 35, and Test 57


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  True


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['exp_moves_enco

Step,Training Loss,Validation Loss,Rmse
50,2.3605,5.965517,2.442441
100,1.9985,2.575722,1.604906
150,2.1401,3.397133,1.843131
200,1.9331,3.308678,1.818977
250,2.0486,2.684422,1.638421
300,1.8852,2.696481,1.642097
350,2.0372,2.989197,1.728929
400,1.983,3.415632,1.848143
450,1.9637,2.860902,1.69142
500,2.1053,2.726043,1.651073


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__

Configuration saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-2-fold/config.json


{'eval_loss': 2.573918104171753, 'eval_rmse': 1.6043435335159302, 'eval_runtime': 0.3697, 'eval_samples_per_second': 154.2, 'eval_steps_per_second': 21.642, 'epoch': 5.0}


Model weights saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-2-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▁▃▃▁▁▂▃▂▁▂▁▁
eval/rmse,█▁▃▃▁▁▂▃▂▁▂▁▁
eval/runtime,▁▃▁▂▁▁▂▂▁▂▁▂█
eval/samples_per_second,█▁▇▃█▇▆▄▇▆▇▅▆
eval/steps_per_second,█▁▇▃█▇▆▄▇▆▇▅▅
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▇▇▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▃▅▂▃▁▃▂▂▄▂▂
train/total_flos,▁

0,1
eval/loss,2.57392
eval/rmse,1.60434
eval/runtime,0.3697
eval/samples_per_second,154.2
eval/steps_per_second,21.642
train/epoch,5.0
train/global_step,610.0
train/learning_rate,0.0
train/loss,1.9195
train/total_flos,8804901182880000.0


1    200
3    200
2    200
4    200
5    200
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 130, Valid 33, and Test 41
Training 1000, Valid 35, and Test 46


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  True


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['exp_moves_enco

Step,Training Loss,Validation Loss,Rmse
50,2.5428,3.141506,1.772429
100,1.9918,3.039463,1.743405
150,1.9372,2.033884,1.426143
200,1.5338,3.732171,1.931883
250,1.3622,5.613261,2.369232
300,0.981,2.346076,1.53169
350,0.9967,2.450666,1.56546
400,0.9185,3.299393,1.816423
450,0.7979,2.748524,1.657867
500,0.7747,1.96377,1.401346


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__

Configuration saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-3-fold/config.json


{'eval_loss': 2.272286891937256, 'eval_rmse': 1.5074104070663452, 'eval_runtime': 0.2827, 'eval_samples_per_second': 162.692, 'eval_steps_per_second': 21.221, 'epoch': 5.0}


Model weights saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-3-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▃▃▁▄█▂▂▄▃▁▃▃▂
eval/rmse,▄▃▁▅█▂▂▄▃▁▃▃▂
eval/runtime,▁▇▂▄▂▂▂▂▃▄▂▃█
eval/samples_per_second,█▁▆▄▇▆▇▆▅▅▇▅▇
eval/steps_per_second,█▁▆▄▇▆▇▆▅▅▇▅▅
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▆▆▅▄▃▃▂▂▂▁▁
train/total_flos,▁

0,1
eval/loss,2.27229
eval/rmse,1.50741
eval/runtime,0.2827
eval/samples_per_second,162.692
eval/steps_per_second,21.221
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.5682
train/total_flos,9087171400243200.0


1    199
3    199
2    199
4    199
5    199
Name: quality, dtype: int64


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Training 131, Valid 33, and Test 40
Training 995, Valid 35, and Test 44


loading weights file https://huggingface.co/kiddothe2b/hierarchical-transformer-base-4096/resolve/main/pytorch_model.bin from cache at /mnt/ceph/storage/data-tmp/current/sile2804/.cache/huggingface/transformers/3e6b4d161afa24393cd7a5363c4581beaea6fcad5e0aa6e1efd3733999473b92.b9741b26fc86ac9ac410a55dc25f6ed6b2dff47efeefb6a7058e3e63117e545a


Use extra encoder:  True


Some weights of the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 were not used when initializing MyHATForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyHATForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyHATForSequenceClassification were not initialized from the model checkpoint at kiddothe2b/hierarchical-transformer-base-4096 and are newly initialized: ['exp_moves_enco

Step,Training Loss,Validation Loss,Rmse
50,2.5433,4.373102,2.091197
100,2.0968,3.53604,1.880436
150,2.0498,2.830776,1.682491
200,1.3832,1.714378,1.309343
250,1.3948,1.768618,1.329894
300,0.855,2.431456,1.559313
350,0.6724,1.96741,1.402644
400,0.4991,2.639511,1.624657
450,0.4549,1.965711,1.402038
500,0.4478,2.021156,1.421674


The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__, exp_act_label. If input_texts, __index_level_0__, exp_act_label are not expected by `MyHATForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 35
  Batch size = 8
The following columns in the evaluation set  don't have a corresponding argument in `MyHATForSequenceClassification.forward` and have been ignored: input_texts, __index_level_0__

Configuration saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-4-fold/config.json


{'eval_loss': 2.265375852584839, 'eval_rmse': 1.5051167011260986, 'eval_runtime': 0.2756, 'eval_samples_per_second': 159.631, 'eval_steps_per_second': 21.768, 'epoch': 5.0}


Model weights saved in ../data/quality_models/hat-model-exp-moves-test-3-100/hat-former-4-fold/pytorch_model.bin


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,█▆▄▁▁▃▂▃▂▂▁▂▂
eval/rmse,█▆▄▁▁▃▂▄▂▂▁▂▃
eval/runtime,▂▆▃▄▂▄▁▄▂▄▁█▇
eval/samples_per_second,▆▃▆▅▇▄█▄▇▄▇▁▇
eval/steps_per_second,▆▃▆▅▇▄█▄▇▄▇▁▆
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/global_step,▁▁▂▂▂▂▃▃▃▃▄▄▅▅▅▅▆▆▆▆▇▇████
train/learning_rate,█▇▇▆▅▅▄▄▃▂▂▁
train/loss,█▇▆▄▄▃▂▂▂▂▁▁
train/total_flos,▁

0,1
eval/loss,2.26538
eval/rmse,1.50512
eval/runtime,0.2756
eval/samples_per_second,159.631
eval/steps_per_second,21.768
train/epoch,5.0
train/global_step,625.0
train/learning_rate,0.0
train/loss,0.3092
train/total_flos,9077080837440000.0


### Printing Results:

In [66]:
print(tabulate([
        ['Longformer']  + [load_results('../data/quality_models/longformer-25/')[1], load_results('../data/quality_models/longformer-50/')[1], load_results('../data/quality_models/longformer-75/')[1], load_results('../data/quality_models/longformer-100/')[1]],
        ['Exp Moves'] + [load_results('../data/quality_models/longformer-exp-moves-25/')[1], load_results('../data/quality_models/longformer-exp-moves-50/')[1], load_results('../data/quality_models/longformer-exp-moves-75/')[1], load_results('../data/quality_models/longformer-exp-moves-100/')[1]],
        ['Exp Types'] + [load_results('../data/quality_models/longformer-exp-types-25/')[1], load_results('../data/quality_models/longformer-exp-types-50/')[1], load_results('../data/quality_models/longformer-exp-types-75/')[1], load_results('../data/quality_models/longformer-exp-types-100/')[1]],
        ['Exp Moves and Types'] + [load_results('../data/quality_models/longformer-exp-types-and-moves25/')[1], load_results('../data/quality_models/longformer-exp-types-and-moves50/')[1], load_results('../data/quality_models/longformer-exp-types-and-moves75/')[1], load_results('../data/quality_models/longformer-exp-types-and-moves100/')[1]],
        ['HAT'] + [load_results('../data/quality_models/hat-model-25/')[1], load_results('../data/quality_models/hat-model-50/')[1], load_results('../data/quality_models/hat-model-75')[1], load_results('../data/quality_models/hat-model-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-25/')[1], load_results('../data/quality_models/hat-model-exp-moves-50/')[1], load_results('../data/quality_models/hat-model-exp-moves-75')[1], load_results('../data/quality_models/hat-model-exp-moves-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-test-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-test-1-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-test-2-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-test-3-100/')[1]],
], headers=['Approach', '@25%', '@50%', '@75%', '@100%']))

Approach                @25%     @50%     @75%    @100%
-------------------  -------  -------  -------  -------
Longformer           1.67567  1.58526  1.57192  1.5375
Exp Moves            1.66363  1.57336  1.45779  1.43321
Exp Types            1.65409  1.58038  1.542    1.44747
Exp Moves and Types  1.6653   1.58473  1.58456  1.41372
HAT                  1.75281  1.59851  1.53645  1.43273
HAT + Exp-moves      1.72779  1.692    1.52813  1.5525
HAT + Exp-moves      1.56315
HAT + Exp-moves      1.47224
HAT + Exp-moves      1.65892
HAT + Exp-moves      1.65892


In [62]:
print(tabulate([
        ['Longformer']  + [load_results('../data/quality_models/longformer-25/')[1], load_results('../data/quality_models/longformer-50/')[1], load_results('../data/quality_models/longformer-75/')[1], load_results('../data/quality_models/longformer-100/')[1]],
        ['Exp Moves'] + [load_results('../data/quality_models/longformer-exp-moves-25/')[1], load_results('../data/quality_models/longformer-exp-moves-50/')[1], load_results('../data/quality_models/longformer-exp-moves-75/')[1], load_results('../data/quality_models/longformer-exp-moves-100/')[1]],
        ['Exp Types'] + [load_results('../data/quality_models/longformer-exp-types-25/')[1], load_results('../data/quality_models/longformer-exp-types-50/')[1], load_results('../data/quality_models/longformer-exp-types-75/')[1], load_results('../data/quality_models/longformer-exp-types-100/')[1]],
        ['Exp Moves and Types'] + [load_results('../data/quality_models/longformer-exp-types-and-moves25/')[1], load_results('../data/quality_models/longformer-exp-types-and-moves50/')[1], load_results('../data/quality_models/longformer-exp-types-and-moves75/')[1], load_results('../data/quality_models/longformer-exp-types-and-moves100/')[1]],
        ['HAT'] + [load_results('../data/quality_models/hat-model-25/')[1], load_results('../data/quality_models/hat-model-50/')[1], load_results('../data/quality_models/hat-model-75')[1], load_results('../data/quality_models/hat-model-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-25/')[1], load_results('../data/quality_models/hat-model-exp-moves-50/')[1], load_results('../data/quality_models/hat-model-exp-moves-75')[1], load_results('../data/quality_models/hat-model-exp-moves-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-test-100/')[1]],
        ['HAT + Exp-moves'] + [load_results('../data/quality_models/hat-model-exp-moves-test-1-100/')[1]],
], headers=['Approach', '@25%', '@50%', '@75%', '@100%']))

Approach                @25%     @50%     @75%    @100%
-------------------  -------  -------  -------  -------
Longformer           1.67567  1.58526  1.57192  1.5375
Exp Moves            1.66363  1.57336  1.45779  1.43321
Exp Types            1.65409  1.58038  1.542    1.44747
Exp Moves and Types  1.6653   1.58473  1.58456  1.41372
HAT                  1.75281  1.59851  1.53645  1.43273
HAT + Exp-moves      1.72779  1.692    1.52813  1.5525
HAT + Exp-moves      1.56315
HAT + Exp-moves      1.47224
