*Загрузим необходимые пакеты*

In [1]:
%pip install -q torch lightning transformers datasets trl wandb transformers

Note: you may need to restart the kernel to use updated packages.


# *Level 1*

*Установим модель и датасет*

In [2]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"

device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.padding_side = "left"
model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
	num_labels=1,
).to(device)

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at HuggingFaceTB/SmolLM2-135M-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
dataset_name = "juyoungml/HelpSteer2-binarized"

ds = load_dataset(dataset_name)

train_ds = ds["train"]
val_ds = ds["validation"]
ds

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'chosen_score', 'rejected_score', 'chosen_rationale', 'rejected_rationale', 'score_diff', 'difficulty'],
        num_rows: 7224
    })
    validation: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'chosen_score', 'rejected_score', 'chosen_rationale', 'rejected_rationale', 'score_diff', 'difficulty'],
        num_rows: 373
    })
})

In [5]:
def format_ds(example):
    user_msg = example['prompt']
    chosen = example['chosen']
    rejected = example['rejected']

    prompt = f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n"

    return {
		"prompt": prompt,
		"chosen": f"{prompt}{chosen}<|im_end|>",
		"rejected": f"{prompt}{rejected}<|im_end|>",
    }

In [6]:
print(tokenizer.special_tokens_map)

{'bos_token': '<|im_start|>', 'eos_token': '<|im_end|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|im_end|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>']}


In [7]:
train_ds = train_ds.map(format_ds, remove_columns=train_ds.column_names)
val_ds = val_ds.map(format_ds, remove_columns=val_ds.column_names)

*Перейдем к обучению reward модели*

In [8]:
from trl import RewardTrainer, RewardConfig

In [9]:
reward_conf = RewardConfig(
    output_dir="reward_model",
    num_train_epochs=1,
    learning_rate=5e-5,
    fp16=True,
    max_length=512,
    eval_strategy="steps",
    eval_steps=50,
)

trainer = RewardTrainer(
    model=model,
    args=reward_conf,
    processing_class=tokenizer,
    train_dataset=train_ds,
    eval_dataset=val_ds,
)

In [10]:
trainer.train()
trainer.save_model(reward_conf.output_dir)



[34m[1mwandb[0m: Currently logged in as: [33m671342ihxrxmx[0m ([33m671342ihxrxmx-iu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss,Accuracy
50,0.7196,0.689822,0.513966
100,0.6595,0.692218,0.592179
150,0.6551,0.67641,0.597765
200,0.613,0.652616,0.620112
250,0.601,0.642865,0.614525
300,0.6306,0.643826,0.61236
350,0.6071,0.621478,0.608939
400,0.6012,0.619745,0.608939




*Перейдем к реализации reinforce \w baseline и дообучению sft с помощью него*

In [11]:
from transformers import AutoModelForCausalLM
from torch.optim import AdamW
from torch.utils.data import DataLoader
from lightning.pytorch.loggers import WandbLogger
from lightning import LightningModule, LightningDataModule, Trainer

*Воспользуемся lightning*

In [12]:
class ReinforceDataModule(LightningDataModule):
	def __init__(self, dataset, model_name, batch_size, max_prompt_length, num_workers):
		super().__init__()
		self.ds = dataset
		self.model_name = model_name
		self.batch_size = batch_size
		self.max_prompt_length = max_prompt_length
		self.num_workers = num_workers
		self.tokenizer = AutoTokenizer.from_pretrained(model_name)
		tokenizer.padding_side = "left"

	def setup(self, stage=None):
		self.train_dataset = self.ds["train"]
		self.val_dataset = self.ds["validation"]

	def _collate_fn(self, batch):
		prompts = [example["prompt"] for example in batch]
		encoding = self.tokenizer(
			prompts,
			return_tensors="pt",
			padding=True,
			truncation=True,
			max_length=self.max_prompt_length,
		)
		return {
			"prompt": prompts,
			"input_ids": encoding["input_ids"],
			"attention_mask": encoding["attention_mask"],
		}

	def train_dataloader(self):
		return DataLoader(
			self.train_dataset,
			batch_size=self.batch_size,
			shuffle=True,
			num_workers=self.num_workers,
			collate_fn=self._collate_fn,
		)

	def val_dataloader(self):
		return DataLoader(
			self.val_dataset,
			batch_size=self.batch_size,
			num_workers=self.num_workers,
			collate_fn=self._collate_fn,
		)

In [13]:
class ReinforceModel(LightningModule):
	def __init__(
		self, sft_model_name, reward_model_name, learning_rate, max_new_tokens, alpha, top_k, top_p,
	):
		super().__init__()
		self.save_hyperparameters()

		self.policy_model = AutoModelForCausalLM.from_pretrained(sft_model_name)
		self.reward_model = AutoModelForSequenceClassification.from_pretrained(reward_model_name)
		self.tokenizer = AutoTokenizer.from_pretrained(sft_model_name)
		self.tokenizer.padding_side = "left"

		for param in self.reward_model.parameters():
			param.requires_grad = False

		# Инициализация бейзлайна в виде скользящего среднего
		self.register_buffer("moving_avg_reward", torch.tensor(0.0))

	def configure_optimizers(self):
		optimizer = AdamW(self.policy_model.parameters(), lr=self.hparams.learning_rate)
		return optimizer

	@torch.no_grad()
	def generate_responses(self, input_ids, attention_mask):
		# генерация ответов policy модели
		outputs = self.policy_model.generate(
			input_ids=input_ids,
			attention_mask=attention_mask,
			max_new_tokens=self.hparams.max_new_tokens,
			do_sample=True,
			top_k=self.hparams.top_k,
			top_p=self.hparams.top_p,
			pad_token_id=self.tokenizer.pad_token_id,
		)

		responses = [out[len(input_ids[i]):] for i, out in enumerate(outputs)]
		return responses, outputs

	def compute_reward(self, full_input_ids, attention_mask):
		outputs = self.reward_model(input_ids=full_input_ids, attention_mask=attention_mask)
		# возьмем за награду вероятность положительного класса
		rewards = torch.sigmoid(outputs.logits).squeeze(-1)
		return rewards

	def training_step(self, batch, batch_idx):
		prompts = batch["prompt"]
		input_ids = batch["input_ids"]
		attention_mask = batch["attention_mask"]

		responses, full_outputs = self.generate_responses(input_ids, attention_mask)

		# Полные входы: prompt + response
		full_input_ids = full_outputs
		full_attention_mask = (full_input_ids != self.tokenizer.pad_token_id).long()

		rewards = self.compute_reward(full_input_ids, full_attention_mask).detach()
		self.moving_avg_reward = self.hparams.alpha * self.moving_avg_reward + (1 - self.hparams.alpha) * rewards.mean()

		outputs = self.policy_model(full_input_ids, attention_mask=full_attention_mask)
		logits = outputs.logits[:, :-1, :]
		targets = full_input_ids[:, 1:]

		log_probs = logits.log_softmax(dim=-1)
		selected_log_probs = torch.gather(log_probs, 2, targets.unsqueeze(-1)).squeeze(-1)

		gen_len = full_input_ids.size(1) - input_ids.size(1)
		mask = torch.zeros_like(selected_log_probs)
		mask[:, -gen_len:] = 1.0

		baseline = self.moving_avg_reward.detach()
		advantage = rewards.unsqueeze(1) - baseline
		loss = - (selected_log_probs * advantage * mask).sum(dim=1).mean()

		self.log("train_loss", loss, prog_bar=True)
		self.log("moving_avg_reward", self.moving_avg_reward, prog_bar=True)
		self.log("reward", rewards.mean(), prog_bar=True)

		return loss
	
	def validation_step(self, batch, batch_idx):
		pass


In [14]:
dm = ReinforceDataModule(
    ds,
    model_name="HuggingFaceTB/SmolLM2-135M-Instruct",
    batch_size=16,
    max_prompt_length=256,
    num_workers=8,
)

reinforce_model = ReinforceModel(
	sft_model_name = "HuggingFaceTB/SmolLM2-135M-Instruct",
	reward_model_name = "reward_model/checkpoint-439",
	learning_rate=5e-5,
	max_new_tokens=64,
	alpha=0.99,
	top_k=30,
	top_p=0.95,
)

logger = WandbLogger(
    name="smollm2-reinforce"
)

trainer = Trainer(
    max_epochs=3,
    accelerator="gpu",
    log_every_n_steps=10,
    logger=logger,
)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [15]:
trainer.fit(reinforce_model, datamodule=dm)

You are using a CUDA device ('NVIDIA GeForce RTX 4060 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/home/hxrt_mx/ml/notebooks/smollm2-reinforce-alignment/.venv/lib/python3.11/site-packages/lightning/pytorch/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type                           | Params | Mode
-----------------------------------------------------------------------
0 | policy_model | LlamaForCausalLM               | 134 M  | eval
1 | reward_model | LlamaForSequenceClassification | 134 M  | eval
-

Epoch 0:   0%|          | 0/452 [00:00<?, ?it/s]                            

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   0%|          | 1/452 [00:05<42:11,  0.18it/s, v_num=5dwa, train_loss=82.20, moving_avg_reward=0.00277, reward=0.277]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   0%|          | 2/452 [00:17<1:06:54,  0.11it/s, v_num=5dwa, train_loss=78.30, moving_avg_reward=0.00726, reward=0.452]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   1%|          | 3/452 [00:30<1:14:50,  0.10it/s, v_num=5dwa, train_loss=28.30, moving_avg_reward=0.0106, reward=0.346] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   1%|          | 4/452 [00:42<1:18:35,  0.10it/s, v_num=5dwa, train_loss=18.90, moving_avg_reward=0.0138, reward=0.327]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   1%|          | 5/452 [00:54<1:20:41,  0.09it/s, v_num=5dwa, train_loss=14.50, moving_avg_reward=0.0165, reward=0.286]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   1%|▏         | 6/452 [01:06<1:22:01,  0.09it/s, v_num=5dwa, train_loss=34.80, moving_avg_reward=0.0206, reward=0.421]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   2%|▏         | 7/452 [01:18<1:22:51,  0.09it/s, v_num=5dwa, train_loss=13.30, moving_avg_reward=0.0236, reward=0.320]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   2%|▏         | 8/452 [01:30<1:23:23,  0.09it/s, v_num=5dwa, train_loss=14.90, moving_avg_reward=0.0263, reward=0.295]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   2%|▏         | 9/452 [01:42<1:23:46,  0.09it/s, v_num=5dwa, train_loss=6.600, moving_avg_reward=0.0287, reward=0.273]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   2%|▏         | 10/452 [01:54<1:24:00,  0.09it/s, v_num=5dwa, train_loss=7.140, moving_avg_reward=0.0307, reward=0.223]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   2%|▏         | 11/452 [02:06<1:24:12,  0.09it/s, v_num=5dwa, train_loss=3.120, moving_avg_reward=0.0322, reward=0.186]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   3%|▎         | 12/452 [02:17<1:24:19,  0.09it/s, v_num=5dwa, train_loss=2.060, moving_avg_reward=0.0341, reward=0.217]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   3%|▎         | 13/452 [02:29<1:24:23,  0.09it/s, v_num=5dwa, train_loss=3.680, moving_avg_reward=0.0363, reward=0.256]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   3%|▎         | 14/452 [02:41<1:24:24,  0.09it/s, v_num=5dwa, train_loss=4.040, moving_avg_reward=0.0389, reward=0.299]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   3%|▎         | 15/452 [02:53<1:24:24,  0.09it/s, v_num=5dwa, train_loss=10.80, moving_avg_reward=0.0421, reward=0.352]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   4%|▎         | 16/452 [03:05<1:24:22,  0.09it/s, v_num=5dwa, train_loss=5.710, moving_avg_reward=0.0442, reward=0.259]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   4%|▍         | 17/452 [03:17<1:24:19,  0.09it/s, v_num=5dwa, train_loss=7.250, moving_avg_reward=0.0462, reward=0.243]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   4%|▍         | 18/452 [03:29<1:24:16,  0.09it/s, v_num=5dwa, train_loss=3.390, moving_avg_reward=0.0481, reward=0.239]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   4%|▍         | 19/452 [03:41<1:24:12,  0.09it/s, v_num=5dwa, train_loss=10.60, moving_avg_reward=0.0511, reward=0.342]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   4%|▍         | 20/452 [03:53<1:24:06,  0.09it/s, v_num=5dwa, train_loss=5.560, moving_avg_reward=0.0539, reward=0.332]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   5%|▍         | 21/452 [04:05<1:24:00,  0.09it/s, v_num=5dwa, train_loss=8.960, moving_avg_reward=0.0568, reward=0.341]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   5%|▍         | 22/452 [04:17<1:23:54,  0.09it/s, v_num=5dwa, train_loss=5.220, moving_avg_reward=0.0582, reward=0.206]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   5%|▌         | 23/452 [04:29<1:23:46,  0.09it/s, v_num=5dwa, train_loss=6.890, moving_avg_reward=0.0603, reward=0.265]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   5%|▌         | 24/452 [04:41<1:23:39,  0.09it/s, v_num=5dwa, train_loss=8.130, moving_avg_reward=0.0626, reward=0.291]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   6%|▌         | 25/452 [04:53<1:23:31,  0.09it/s, v_num=5dwa, train_loss=4.240, moving_avg_reward=0.0647, reward=0.270]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   6%|▌         | 26/452 [05:05<1:23:24,  0.09it/s, v_num=5dwa, train_loss=1.380, moving_avg_reward=0.0674, reward=0.337]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   6%|▌         | 27/452 [05:17<1:23:17,  0.09it/s, v_num=5dwa, train_loss=0.306, moving_avg_reward=0.0686, reward=0.185]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   6%|▌         | 28/452 [05:29<1:23:08,  0.08it/s, v_num=5dwa, train_loss=3.730, moving_avg_reward=0.0708, reward=0.294]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   6%|▋         | 29/452 [05:41<1:22:59,  0.08it/s, v_num=5dwa, train_loss=12.00, moving_avg_reward=0.0735, reward=0.334]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   7%|▋         | 30/452 [05:53<1:22:51,  0.08it/s, v_num=5dwa, train_loss=4.980, moving_avg_reward=0.0758, reward=0.306]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   7%|▋         | 31/452 [06:05<1:22:41,  0.08it/s, v_num=5dwa, train_loss=3.960, moving_avg_reward=0.0779, reward=0.291]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   7%|▋         | 32/452 [06:13<1:21:41,  0.09it/s, v_num=5dwa, train_loss=1.080, moving_avg_reward=0.0798, reward=0.263]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   7%|▋         | 33/452 [06:26<1:21:49,  0.09it/s, v_num=5dwa, train_loss=-0.185, moving_avg_reward=0.0821, reward=0.312]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   8%|▊         | 34/452 [06:38<1:21:41,  0.09it/s, v_num=5dwa, train_loss=3.320, moving_avg_reward=0.0842, reward=0.296] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   8%|▊         | 35/452 [06:50<1:21:33,  0.09it/s, v_num=5dwa, train_loss=1.540, moving_avg_reward=0.0858, reward=0.242]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   8%|▊         | 36/452 [07:02<1:21:24,  0.09it/s, v_num=5dwa, train_loss=6.060, moving_avg_reward=0.0883, reward=0.333]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   8%|▊         | 37/452 [07:14<1:21:15,  0.09it/s, v_num=5dwa, train_loss=1.660, moving_avg_reward=0.0907, reward=0.331]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   8%|▊         | 38/452 [07:22<1:20:22,  0.09it/s, v_num=5dwa, train_loss=0.351, moving_avg_reward=0.0915, reward=0.165]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   9%|▊         | 39/452 [07:34<1:20:13,  0.09it/s, v_num=5dwa, train_loss=3.390, moving_avg_reward=0.0936, reward=0.304]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   9%|▉         | 40/452 [07:46<1:20:05,  0.09it/s, v_num=5dwa, train_loss=3.610, moving_avg_reward=0.095, reward=0.237] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   9%|▉         | 41/452 [07:52<1:18:54,  0.09it/s, v_num=5dwa, train_loss=1.680, moving_avg_reward=0.0972, reward=0.308]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:   9%|▉         | 42/452 [08:04<1:18:47,  0.09it/s, v_num=5dwa, train_loss=3.590, moving_avg_reward=0.0987, reward=0.256]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  10%|▉         | 43/452 [08:16<1:18:39,  0.09it/s, v_num=5dwa, train_loss=5.990, moving_avg_reward=0.101, reward=0.314] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  10%|▉         | 44/452 [08:21<1:17:29,  0.09it/s, v_num=5dwa, train_loss=0.960, moving_avg_reward=0.103, reward=0.263]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  10%|▉         | 45/452 [08:32<1:17:15,  0.09it/s, v_num=5dwa, train_loss=3.370, moving_avg_reward=0.104, reward=0.293]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  10%|█         | 46/452 [08:44<1:17:09,  0.09it/s, v_num=5dwa, train_loss=3.270, moving_avg_reward=0.106, reward=0.280]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  10%|█         | 47/452 [08:56<1:17:02,  0.09it/s, v_num=5dwa, train_loss=0.113, moving_avg_reward=0.108, reward=0.332]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  11%|█         | 48/452 [09:08<1:16:55,  0.09it/s, v_num=5dwa, train_loss=8.150, moving_avg_reward=0.110, reward=0.301]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  11%|█         | 49/452 [09:20<1:16:48,  0.09it/s, v_num=5dwa, train_loss=5.280, moving_avg_reward=0.113, reward=0.388]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  11%|█         | 50/452 [09:30<1:16:24,  0.09it/s, v_num=5dwa, train_loss=2.820, moving_avg_reward=0.115, reward=0.252]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  11%|█▏        | 51/452 [09:42<1:16:17,  0.09it/s, v_num=5dwa, train_loss=2.400, moving_avg_reward=0.116, reward=0.275]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  12%|█▏        | 52/452 [09:54<1:16:09,  0.09it/s, v_num=5dwa, train_loss=1.980, moving_avg_reward=0.118, reward=0.276]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  12%|█▏        | 53/452 [10:06<1:16:02,  0.09it/s, v_num=5dwa, train_loss=2.460, moving_avg_reward=0.119, reward=0.263]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  12%|█▏        | 54/452 [10:17<1:15:54,  0.09it/s, v_num=5dwa, train_loss=0.444, moving_avg_reward=0.120, reward=0.200]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  12%|█▏        | 55/452 [10:29<1:15:46,  0.09it/s, v_num=5dwa, train_loss=1.170, moving_avg_reward=0.121, reward=0.267]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  12%|█▏        | 56/452 [10:41<1:15:38,  0.09it/s, v_num=5dwa, train_loss=7.430, moving_avg_reward=0.123, reward=0.325]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  13%|█▎        | 57/452 [10:53<1:15:30,  0.09it/s, v_num=5dwa, train_loss=5.140, moving_avg_reward=0.125, reward=0.250]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  13%|█▎        | 58/452 [11:05<1:15:22,  0.09it/s, v_num=5dwa, train_loss=0.312, moving_avg_reward=0.126, reward=0.233]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  13%|█▎        | 59/452 [11:16<1:15:06,  0.09it/s, v_num=5dwa, train_loss=0.458, moving_avg_reward=0.128, reward=0.356]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  13%|█▎        | 60/452 [11:27<1:14:50,  0.09it/s, v_num=5dwa, train_loss=1.320, moving_avg_reward=0.129, reward=0.239]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  13%|█▎        | 61/452 [11:39<1:14:42,  0.09it/s, v_num=5dwa, train_loss=5.450, moving_avg_reward=0.131, reward=0.300]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  14%|█▎        | 62/452 [11:51<1:14:33,  0.09it/s, v_num=5dwa, train_loss=1.400, moving_avg_reward=0.132, reward=0.218]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  14%|█▍        | 63/452 [12:03<1:14:25,  0.09it/s, v_num=5dwa, train_loss=4.460, moving_avg_reward=0.134, reward=0.299]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  14%|█▍        | 64/452 [12:15<1:14:16,  0.09it/s, v_num=5dwa, train_loss=0.751, moving_avg_reward=0.135, reward=0.277]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  14%|█▍        | 65/452 [12:27<1:14:07,  0.09it/s, v_num=5dwa, train_loss=4.470, moving_avg_reward=0.137, reward=0.320]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  15%|█▍        | 66/452 [12:38<1:13:58,  0.09it/s, v_num=5dwa, train_loss=5.110, moving_avg_reward=0.138, reward=0.238]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  15%|█▍        | 67/452 [12:46<1:13:23,  0.09it/s, v_num=5dwa, train_loss=1.960, moving_avg_reward=0.139, reward=0.239]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  15%|█▌        | 68/452 [12:58<1:13:14,  0.09it/s, v_num=5dwa, train_loss=2.010, moving_avg_reward=0.140, reward=0.284]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  15%|█▌        | 69/452 [13:10<1:13:06,  0.09it/s, v_num=5dwa, train_loss=1.280, moving_avg_reward=0.142, reward=0.265]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  15%|█▌        | 70/452 [13:22<1:12:57,  0.09it/s, v_num=5dwa, train_loss=5.150, moving_avg_reward=0.143, reward=0.283]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  16%|█▌        | 71/452 [13:31<1:12:33,  0.09it/s, v_num=5dwa, train_loss=2.490, moving_avg_reward=0.145, reward=0.327]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  16%|█▌        | 72/452 [13:43<1:12:24,  0.09it/s, v_num=5dwa, train_loss=-0.488, moving_avg_reward=0.146, reward=0.238]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  16%|█▌        | 73/452 [13:55<1:12:15,  0.09it/s, v_num=5dwa, train_loss=-0.0722, moving_avg_reward=0.146, reward=0.187]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  16%|█▋        | 74/452 [14:07<1:12:06,  0.09it/s, v_num=5dwa, train_loss=1.110, moving_avg_reward=0.148, reward=0.326]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  17%|█▋        | 75/452 [14:16<1:11:42,  0.09it/s, v_num=5dwa, train_loss=1.770, moving_avg_reward=0.149, reward=0.235]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  17%|█▋        | 76/452 [14:27<1:11:34,  0.09it/s, v_num=5dwa, train_loss=4.090, moving_avg_reward=0.151, reward=0.351]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  17%|█▋        | 77/452 [14:39<1:11:25,  0.09it/s, v_num=5dwa, train_loss=1.890, moving_avg_reward=0.152, reward=0.281]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  17%|█▋        | 78/452 [14:51<1:11:16,  0.09it/s, v_num=5dwa, train_loss=3.100, moving_avg_reward=0.154, reward=0.301]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  17%|█▋        | 79/452 [15:00<1:10:53,  0.09it/s, v_num=5dwa, train_loss=3.220, moving_avg_reward=0.155, reward=0.315]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  18%|█▊        | 80/452 [15:12<1:10:45,  0.09it/s, v_num=5dwa, train_loss=0.335, moving_avg_reward=0.156, reward=0.277]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  18%|█▊        | 81/452 [15:24<1:10:36,  0.09it/s, v_num=5dwa, train_loss=0.296, moving_avg_reward=0.158, reward=0.290]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  18%|█▊        | 82/452 [15:36<1:10:27,  0.09it/s, v_num=5dwa, train_loss=2.270, moving_avg_reward=0.160, reward=0.335]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  18%|█▊        | 83/452 [15:48<1:10:17,  0.09it/s, v_num=5dwa, train_loss=2.060, moving_avg_reward=0.162, reward=0.374]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  19%|█▊        | 84/452 [16:00<1:10:08,  0.09it/s, v_num=5dwa, train_loss=1.670, moving_avg_reward=0.163, reward=0.247]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  19%|█▉        | 85/452 [16:12<1:09:59,  0.09it/s, v_num=5dwa, train_loss=0.970, moving_avg_reward=0.163, reward=0.203]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  19%|█▉        | 86/452 [16:24<1:09:50,  0.09it/s, v_num=5dwa, train_loss=2.560, moving_avg_reward=0.164, reward=0.222]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  19%|█▉        | 87/452 [16:36<1:09:41,  0.09it/s, v_num=5dwa, train_loss=6.110, moving_avg_reward=0.165, reward=0.294]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  19%|█▉        | 88/452 [16:46<1:09:23,  0.09it/s, v_num=5dwa, train_loss=1.320, moving_avg_reward=0.166, reward=0.235]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  20%|█▉        | 89/452 [16:58<1:09:13,  0.09it/s, v_num=5dwa, train_loss=0.750, moving_avg_reward=0.168, reward=0.377]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  20%|█▉        | 90/452 [17:10<1:09:04,  0.09it/s, v_num=5dwa, train_loss=0.366, moving_avg_reward=0.168, reward=0.198]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  20%|██        | 91/452 [17:22<1:08:55,  0.09it/s, v_num=5dwa, train_loss=0.989, moving_avg_reward=0.169, reward=0.237]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  20%|██        | 92/452 [17:34<1:08:45,  0.09it/s, v_num=5dwa, train_loss=2.370, moving_avg_reward=0.170, reward=0.294]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  21%|██        | 93/452 [17:46<1:08:36,  0.09it/s, v_num=5dwa, train_loss=0.0283, moving_avg_reward=0.170, reward=0.201]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  21%|██        | 94/452 [17:58<1:08:26,  0.09it/s, v_num=5dwa, train_loss=3.760, moving_avg_reward=0.171, reward=0.285] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  21%|██        | 95/452 [18:10<1:08:17,  0.09it/s, v_num=5dwa, train_loss=-0.195, moving_avg_reward=0.173, reward=0.303]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  21%|██        | 96/452 [18:22<1:08:07,  0.09it/s, v_num=5dwa, train_loss=5.560, moving_avg_reward=0.174, reward=0.297] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  21%|██▏       | 97/452 [18:34<1:07:57,  0.09it/s, v_num=5dwa, train_loss=2.250, moving_avg_reward=0.175, reward=0.250]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  22%|██▏       | 98/452 [18:46<1:07:47,  0.09it/s, v_num=5dwa, train_loss=0.420, moving_avg_reward=0.176, reward=0.282]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  22%|██▏       | 99/452 [18:53<1:07:21,  0.09it/s, v_num=5dwa, train_loss=1.600, moving_avg_reward=0.177, reward=0.292]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  22%|██▏       | 100/452 [19:05<1:07:11,  0.09it/s, v_num=5dwa, train_loss=2.080, moving_avg_reward=0.178, reward=0.283]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  22%|██▏       | 101/452 [19:17<1:07:01,  0.09it/s, v_num=5dwa, train_loss=2.390, moving_avg_reward=0.179, reward=0.323]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  23%|██▎       | 102/452 [19:29<1:06:51,  0.09it/s, v_num=5dwa, train_loss=0.416, moving_avg_reward=0.181, reward=0.309]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  23%|██▎       | 103/452 [19:41<1:06:42,  0.09it/s, v_num=5dwa, train_loss=3.090, moving_avg_reward=0.182, reward=0.295]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  23%|██▎       | 104/452 [19:53<1:06:32,  0.09it/s, v_num=5dwa, train_loss=2.260, moving_avg_reward=0.182, reward=0.207]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  23%|██▎       | 105/452 [20:05<1:06:22,  0.09it/s, v_num=5dwa, train_loss=4.890, moving_avg_reward=0.183, reward=0.312]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  23%|██▎       | 106/452 [20:16<1:06:12,  0.09it/s, v_num=5dwa, train_loss=1.120, moving_avg_reward=0.184, reward=0.207]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  24%|██▎       | 107/452 [20:28<1:06:02,  0.09it/s, v_num=5dwa, train_loss=0.0755, moving_avg_reward=0.184, reward=0.242]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  24%|██▍       | 108/452 [20:34<1:05:33,  0.09it/s, v_num=5dwa, train_loss=0.597, moving_avg_reward=0.185, reward=0.250] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  24%|██▍       | 109/452 [20:46<1:05:23,  0.09it/s, v_num=5dwa, train_loss=4.920, moving_avg_reward=0.186, reward=0.337]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  24%|██▍       | 110/452 [20:58<1:05:13,  0.09it/s, v_num=5dwa, train_loss=-0.717, moving_avg_reward=0.186, reward=0.146]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  25%|██▍       | 111/452 [21:05<1:04:47,  0.09it/s, v_num=5dwa, train_loss=1.130, moving_avg_reward=0.187, reward=0.266] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  25%|██▍       | 112/452 [21:10<1:04:18,  0.09it/s, v_num=5dwa, train_loss=0.451, moving_avg_reward=0.187, reward=0.220]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  25%|██▌       | 113/452 [21:22<1:04:08,  0.09it/s, v_num=5dwa, train_loss=0.456, moving_avg_reward=0.188, reward=0.247]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  25%|██▌       | 114/452 [21:32<1:03:52,  0.09it/s, v_num=5dwa, train_loss=1.840, moving_avg_reward=0.189, reward=0.280]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  25%|██▌       | 115/452 [21:44<1:03:42,  0.09it/s, v_num=5dwa, train_loss=-0.0605, moving_avg_reward=0.189, reward=0.197]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  26%|██▌       | 116/452 [21:56<1:03:33,  0.09it/s, v_num=5dwa, train_loss=0.355, moving_avg_reward=0.190, reward=0.277]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  26%|██▌       | 117/452 [22:08<1:03:23,  0.09it/s, v_num=5dwa, train_loss=0.112, moving_avg_reward=0.191, reward=0.342]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  26%|██▌       | 118/452 [22:20<1:03:13,  0.09it/s, v_num=5dwa, train_loss=1.220, moving_avg_reward=0.192, reward=0.262]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  26%|██▋       | 119/452 [22:32<1:03:04,  0.09it/s, v_num=5dwa, train_loss=2.170, moving_avg_reward=0.193, reward=0.349]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  27%|██▋       | 120/452 [22:44<1:02:54,  0.09it/s, v_num=5dwa, train_loss=2.420, moving_avg_reward=0.194, reward=0.252]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  27%|██▋       | 121/452 [22:56<1:02:44,  0.09it/s, v_num=5dwa, train_loss=0.420, moving_avg_reward=0.195, reward=0.268]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  27%|██▋       | 122/452 [23:08<1:02:36,  0.09it/s, v_num=5dwa, train_loss=4.360, moving_avg_reward=0.196, reward=0.328]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  27%|██▋       | 123/452 [23:20<1:02:26,  0.09it/s, v_num=5dwa, train_loss=0.304, moving_avg_reward=0.197, reward=0.276]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  27%|██▋       | 124/452 [23:26<1:02:00,  0.09it/s, v_num=5dwa, train_loss=-0.174, moving_avg_reward=0.197, reward=0.257]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  28%|██▊       | 125/452 [23:38<1:01:51,  0.09it/s, v_num=5dwa, train_loss=1.720, moving_avg_reward=0.198, reward=0.296] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  28%|██▊       | 126/452 [23:49<1:01:38,  0.09it/s, v_num=5dwa, train_loss=0.956, moving_avg_reward=0.199, reward=0.291]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  28%|██▊       | 127/452 [24:01<1:01:29,  0.09it/s, v_num=5dwa, train_loss=1.190, moving_avg_reward=0.200, reward=0.296]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  28%|██▊       | 128/452 [24:13<1:01:19,  0.09it/s, v_num=5dwa, train_loss=2.280, moving_avg_reward=0.201, reward=0.306]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  29%|██▊       | 129/452 [24:25<1:01:09,  0.09it/s, v_num=5dwa, train_loss=1.210, moving_avg_reward=0.203, reward=0.361]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  29%|██▉       | 130/452 [24:37<1:01:00,  0.09it/s, v_num=5dwa, train_loss=2.650, moving_avg_reward=0.204, reward=0.268]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  29%|██▉       | 131/452 [24:42<1:00:33,  0.09it/s, v_num=5dwa, train_loss=0.445, moving_avg_reward=0.204, reward=0.241]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  29%|██▉       | 132/452 [24:53<1:00:20,  0.09it/s, v_num=5dwa, train_loss=-0.213, moving_avg_reward=0.204, reward=0.243]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  29%|██▉       | 133/452 [25:05<1:00:10,  0.09it/s, v_num=5dwa, train_loss=3.570, moving_avg_reward=0.206, reward=0.316] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  30%|██▉       | 134/452 [25:17<1:00:00,  0.09it/s, v_num=5dwa, train_loss=1.040, moving_avg_reward=0.206, reward=0.278]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  30%|██▉       | 135/452 [25:29<59:51,  0.09it/s, v_num=5dwa, train_loss=3.620, moving_avg_reward=0.207, reward=0.318]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  30%|███       | 136/452 [25:39<59:36,  0.09it/s, v_num=5dwa, train_loss=0.286, moving_avg_reward=0.208, reward=0.254]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  30%|███       | 137/452 [25:49<59:23,  0.09it/s, v_num=5dwa, train_loss=-0.234, moving_avg_reward=0.208, reward=0.217]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  31%|███       | 138/452 [25:56<59:02,  0.09it/s, v_num=5dwa, train_loss=1.390, moving_avg_reward=0.209, reward=0.268] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  31%|███       | 139/452 [26:08<58:52,  0.09it/s, v_num=5dwa, train_loss=0.805, moving_avg_reward=0.209, reward=0.299]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  31%|███       | 140/452 [26:20<58:43,  0.09it/s, v_num=5dwa, train_loss=3.140, moving_avg_reward=0.210, reward=0.293]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  31%|███       | 141/452 [26:25<58:16,  0.09it/s, v_num=5dwa, train_loss=0.512, moving_avg_reward=0.211, reward=0.250]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  31%|███▏      | 142/452 [26:37<58:07,  0.09it/s, v_num=5dwa, train_loss=3.490, moving_avg_reward=0.211, reward=0.268]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  32%|███▏      | 143/452 [26:45<57:49,  0.09it/s, v_num=5dwa, train_loss=1.360, moving_avg_reward=0.212, reward=0.259]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  32%|███▏      | 144/452 [26:56<57:38,  0.09it/s, v_num=5dwa, train_loss=1.740, moving_avg_reward=0.212, reward=0.261]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  32%|███▏      | 145/452 [27:05<57:21,  0.09it/s, v_num=5dwa, train_loss=3.730, moving_avg_reward=0.213, reward=0.260]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  32%|███▏      | 146/452 [27:10<56:57,  0.09it/s, v_num=5dwa, train_loss=5.930, moving_avg_reward=0.213, reward=0.254]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  33%|███▎      | 147/452 [27:19<56:40,  0.09it/s, v_num=5dwa, train_loss=1.040, moving_avg_reward=0.213, reward=0.224]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  33%|███▎      | 148/452 [27:31<56:32,  0.09it/s, v_num=5dwa, train_loss=1.900, moving_avg_reward=0.215, reward=0.384]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  33%|███▎      | 149/452 [27:39<56:13,  0.09it/s, v_num=5dwa, train_loss=0.190, moving_avg_reward=0.215, reward=0.239]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  33%|███▎      | 150/452 [27:47<55:57,  0.09it/s, v_num=5dwa, train_loss=1.010, moving_avg_reward=0.216, reward=0.319]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  33%|███▎      | 151/452 [28:00<55:49,  0.09it/s, v_num=5dwa, train_loss=0.673, moving_avg_reward=0.216, reward=0.232]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  34%|███▎      | 152/452 [28:12<55:39,  0.09it/s, v_num=5dwa, train_loss=3.550, moving_avg_reward=0.218, reward=0.376]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  34%|███▍      | 153/452 [28:18<55:19,  0.09it/s, v_num=5dwa, train_loss=1.060, moving_avg_reward=0.218, reward=0.261]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  34%|███▍      | 154/452 [28:30<55:10,  0.09it/s, v_num=5dwa, train_loss=0.152, moving_avg_reward=0.220, reward=0.352]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  34%|███▍      | 155/452 [28:43<55:01,  0.09it/s, v_num=5dwa, train_loss=1.600, moving_avg_reward=0.220, reward=0.278]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  35%|███▍      | 156/452 [28:51<54:45,  0.09it/s, v_num=5dwa, train_loss=0.624, moving_avg_reward=0.221, reward=0.276]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  35%|███▍      | 157/452 [29:00<54:30,  0.09it/s, v_num=5dwa, train_loss=-0.242, moving_avg_reward=0.220, reward=0.148]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  35%|███▍      | 158/452 [29:12<54:21,  0.09it/s, v_num=5dwa, train_loss=0.0139, moving_avg_reward=0.221, reward=0.285]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  35%|███▌      | 159/452 [29:24<54:12,  0.09it/s, v_num=5dwa, train_loss=0.620, moving_avg_reward=0.221, reward=0.271] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  35%|███▌      | 160/452 [29:37<54:03,  0.09it/s, v_num=5dwa, train_loss=-0.252, moving_avg_reward=0.222, reward=0.246]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  36%|███▌      | 161/452 [29:46<53:49,  0.09it/s, v_num=5dwa, train_loss=0.941, moving_avg_reward=0.222, reward=0.278] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  36%|███▌      | 162/452 [29:58<53:40,  0.09it/s, v_num=5dwa, train_loss=2.350, moving_avg_reward=0.222, reward=0.238]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  36%|███▌      | 163/452 [30:11<53:30,  0.09it/s, v_num=5dwa, train_loss=-0.377, moving_avg_reward=0.223, reward=0.262]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  36%|███▋      | 164/452 [30:23<53:21,  0.09it/s, v_num=5dwa, train_loss=1.130, moving_avg_reward=0.223, reward=0.243] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  37%|███▋      | 165/452 [30:35<53:12,  0.09it/s, v_num=5dwa, train_loss=1.750, moving_avg_reward=0.223, reward=0.230]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  37%|███▋      | 166/452 [30:47<53:02,  0.09it/s, v_num=5dwa, train_loss=0.514, moving_avg_reward=0.224, reward=0.293]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  37%|███▋      | 167/452 [30:53<52:43,  0.09it/s, v_num=5dwa, train_loss=0.642, moving_avg_reward=0.224, reward=0.238]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  37%|███▋      | 168/452 [31:06<52:34,  0.09it/s, v_num=5dwa, train_loss=0.257, moving_avg_reward=0.224, reward=0.203]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  37%|███▋      | 169/452 [31:18<52:25,  0.09it/s, v_num=5dwa, train_loss=0.316, moving_avg_reward=0.224, reward=0.294]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  38%|███▊      | 170/452 [31:30<52:15,  0.09it/s, v_num=5dwa, train_loss=1.530, moving_avg_reward=0.225, reward=0.261]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  38%|███▊      | 171/452 [31:42<52:06,  0.09it/s, v_num=5dwa, train_loss=1.890, moving_avg_reward=0.225, reward=0.215]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  38%|███▊      | 172/452 [31:54<51:56,  0.09it/s, v_num=5dwa, train_loss=0.703, moving_avg_reward=0.225, reward=0.250]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  38%|███▊      | 173/452 [32:06<51:47,  0.09it/s, v_num=5dwa, train_loss=1.850, moving_avg_reward=0.227, reward=0.396]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  38%|███▊      | 174/452 [32:18<51:37,  0.09it/s, v_num=5dwa, train_loss=-5.66, moving_avg_reward=0.227, reward=0.231]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  39%|███▊      | 175/452 [32:30<51:27,  0.09it/s, v_num=5dwa, train_loss=-0.536, moving_avg_reward=0.227, reward=0.300]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  39%|███▉      | 176/452 [32:42<51:18,  0.09it/s, v_num=5dwa, train_loss=0.947, moving_avg_reward=0.228, reward=0.340] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  39%|███▉      | 177/452 [32:50<51:01,  0.09it/s, v_num=5dwa, train_loss=0.331, moving_avg_reward=0.228, reward=0.222]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  39%|███▉      | 178/452 [33:02<50:52,  0.09it/s, v_num=5dwa, train_loss=-0.181, moving_avg_reward=0.228, reward=0.220]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  40%|███▉      | 179/452 [33:14<50:42,  0.09it/s, v_num=5dwa, train_loss=1.080, moving_avg_reward=0.229, reward=0.259] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  40%|███▉      | 180/452 [33:21<50:25,  0.09it/s, v_num=5dwa, train_loss=0.752, moving_avg_reward=0.230, reward=0.331]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  40%|████      | 181/452 [33:34<50:15,  0.09it/s, v_num=5dwa, train_loss=2.460, moving_avg_reward=0.231, reward=0.331]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  40%|████      | 182/452 [33:46<50:06,  0.09it/s, v_num=5dwa, train_loss=0.314, moving_avg_reward=0.232, reward=0.326]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  40%|████      | 183/452 [33:56<49:53,  0.09it/s, v_num=5dwa, train_loss=0.134, moving_avg_reward=0.233, reward=0.406]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  41%|████      | 184/452 [34:04<49:38,  0.09it/s, v_num=5dwa, train_loss=-0.358, moving_avg_reward=0.233, reward=0.226]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  41%|████      | 185/452 [34:17<49:28,  0.09it/s, v_num=5dwa, train_loss=-0.158, moving_avg_reward=0.233, reward=0.250]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  41%|████      | 186/452 [34:22<49:09,  0.09it/s, v_num=5dwa, train_loss=0.803, moving_avg_reward=0.234, reward=0.326] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  41%|████▏     | 187/452 [34:32<48:57,  0.09it/s, v_num=5dwa, train_loss=-0.915, moving_avg_reward=0.235, reward=0.322]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  42%|████▏     | 188/452 [34:42<48:44,  0.09it/s, v_num=5dwa, train_loss=0.458, moving_avg_reward=0.236, reward=0.299] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  42%|████▏     | 189/452 [34:49<48:27,  0.09it/s, v_num=5dwa, train_loss=0.244, moving_avg_reward=0.237, reward=0.306]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  42%|████▏     | 190/452 [35:01<48:17,  0.09it/s, v_num=5dwa, train_loss=4.670, moving_avg_reward=0.238, reward=0.378]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  42%|████▏     | 191/452 [35:13<48:08,  0.09it/s, v_num=5dwa, train_loss=1.810, moving_avg_reward=0.239, reward=0.325]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  42%|████▏     | 192/452 [35:25<47:58,  0.09it/s, v_num=5dwa, train_loss=1.500, moving_avg_reward=0.240, reward=0.308]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  43%|████▎     | 193/452 [35:37<47:49,  0.09it/s, v_num=5dwa, train_loss=0.273, moving_avg_reward=0.240, reward=0.287]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  43%|████▎     | 194/452 [35:50<47:39,  0.09it/s, v_num=5dwa, train_loss=-0.16, moving_avg_reward=0.240, reward=0.257]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  43%|████▎     | 195/452 [36:02<47:29,  0.09it/s, v_num=5dwa, train_loss=-6.68, moving_avg_reward=0.241, reward=0.282]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  43%|████▎     | 196/452 [36:14<47:19,  0.09it/s, v_num=5dwa, train_loss=-0.988, moving_avg_reward=0.241, reward=0.257]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  44%|████▎     | 197/452 [36:26<47:10,  0.09it/s, v_num=5dwa, train_loss=6.030, moving_avg_reward=0.242, reward=0.388] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  44%|████▍     | 198/452 [36:38<47:00,  0.09it/s, v_num=5dwa, train_loss=1.230, moving_avg_reward=0.243, reward=0.309]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  44%|████▍     | 199/452 [36:50<46:50,  0.09it/s, v_num=5dwa, train_loss=-0.865, moving_avg_reward=0.244, reward=0.321]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  44%|████▍     | 200/452 [37:02<46:40,  0.09it/s, v_num=5dwa, train_loss=-1.17, moving_avg_reward=0.244, reward=0.306] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  44%|████▍     | 201/452 [37:14<46:30,  0.09it/s, v_num=5dwa, train_loss=-1.93, moving_avg_reward=0.244, reward=0.249]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  45%|████▍     | 202/452 [37:27<46:21,  0.09it/s, v_num=5dwa, train_loss=-4.66, moving_avg_reward=0.245, reward=0.306]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  45%|████▍     | 203/452 [37:39<46:11,  0.09it/s, v_num=5dwa, train_loss=3.930, moving_avg_reward=0.246, reward=0.332]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  45%|████▌     | 204/452 [37:51<46:01,  0.09it/s, v_num=5dwa, train_loss=0.868, moving_avg_reward=0.246, reward=0.288]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  45%|████▌     | 205/452 [38:03<45:51,  0.09it/s, v_num=5dwa, train_loss=-4.16, moving_avg_reward=0.246, reward=0.242]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  46%|████▌     | 206/452 [38:15<45:41,  0.09it/s, v_num=5dwa, train_loss=1.160, moving_avg_reward=0.247, reward=0.314]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  46%|████▌     | 207/452 [38:27<45:31,  0.09it/s, v_num=5dwa, train_loss=0.896, moving_avg_reward=0.248, reward=0.329]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  46%|████▌     | 208/452 [38:39<45:21,  0.09it/s, v_num=5dwa, train_loss=5.090, moving_avg_reward=0.248, reward=0.309]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  46%|████▌     | 209/452 [38:51<45:11,  0.09it/s, v_num=5dwa, train_loss=4.200, moving_avg_reward=0.249, reward=0.289]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  46%|████▋     | 210/452 [38:59<44:55,  0.09it/s, v_num=5dwa, train_loss=-2.52, moving_avg_reward=0.248, reward=0.226]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  47%|████▋     | 211/452 [39:11<44:46,  0.09it/s, v_num=5dwa, train_loss=-1.76, moving_avg_reward=0.250, reward=0.362]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  47%|████▋     | 212/452 [39:23<44:36,  0.09it/s, v_num=5dwa, train_loss=-1.01, moving_avg_reward=0.250, reward=0.305]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  47%|████▋     | 213/452 [39:36<44:26,  0.09it/s, v_num=5dwa, train_loss=-6.59, moving_avg_reward=0.250, reward=0.239]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  47%|████▋     | 214/452 [39:48<44:15,  0.09it/s, v_num=5dwa, train_loss=-6.91, moving_avg_reward=0.250, reward=0.285]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  48%|████▊     | 215/452 [40:00<44:05,  0.09it/s, v_num=5dwa, train_loss=-1.12, moving_avg_reward=0.251, reward=0.309]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  48%|████▊     | 216/452 [40:12<43:55,  0.09it/s, v_num=5dwa, train_loss=-4.85, moving_avg_reward=0.251, reward=0.230]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  48%|████▊     | 217/452 [40:24<43:45,  0.09it/s, v_num=5dwa, train_loss=-37.2, moving_avg_reward=0.250, reward=0.199]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  48%|████▊     | 218/452 [40:36<43:35,  0.09it/s, v_num=5dwa, train_loss=-22.1, moving_avg_reward=0.250, reward=0.237]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  48%|████▊     | 219/452 [40:48<43:25,  0.09it/s, v_num=5dwa, train_loss=14.20, moving_avg_reward=0.251, reward=0.349]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  49%|████▊     | 220/452 [41:00<43:14,  0.09it/s, v_num=5dwa, train_loss=22.70, moving_avg_reward=0.252, reward=0.357]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  49%|████▉     | 221/452 [41:12<43:04,  0.09it/s, v_num=5dwa, train_loss=-22.2, moving_avg_reward=0.252, reward=0.242]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  49%|████▉     | 222/452 [41:24<42:54,  0.09it/s, v_num=5dwa, train_loss=18.30, moving_avg_reward=0.254, reward=0.401]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  49%|████▉     | 223/452 [41:37<42:44,  0.09it/s, v_num=5dwa, train_loss=2.270, moving_avg_reward=0.254, reward=0.334]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  50%|████▉     | 224/452 [41:49<42:33,  0.09it/s, v_num=5dwa, train_loss=-17.2, moving_avg_reward=0.254, reward=0.236]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  50%|████▉     | 225/452 [42:01<42:23,  0.09it/s, v_num=5dwa, train_loss=-36.7, moving_avg_reward=0.254, reward=0.278]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  50%|█████     | 226/452 [42:13<42:13,  0.09it/s, v_num=5dwa, train_loss=-8.82, moving_avg_reward=0.255, reward=0.342]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  50%|█████     | 227/452 [42:25<42:02,  0.09it/s, v_num=5dwa, train_loss=7.590, moving_avg_reward=0.255, reward=0.221]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  50%|█████     | 228/452 [42:34<41:49,  0.09it/s, v_num=5dwa, train_loss=-18.4, moving_avg_reward=0.255, reward=0.273]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  51%|█████     | 229/452 [42:47<41:39,  0.09it/s, v_num=5dwa, train_loss=-3.90, moving_avg_reward=0.255, reward=0.196]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  51%|█████     | 230/452 [42:59<41:29,  0.09it/s, v_num=5dwa, train_loss=-9.70, moving_avg_reward=0.255, reward=0.302]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  51%|█████     | 231/452 [43:11<41:19,  0.09it/s, v_num=5dwa, train_loss=-23.8, moving_avg_reward=0.255, reward=0.247]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  51%|█████▏    | 232/452 [43:23<41:08,  0.09it/s, v_num=5dwa, train_loss=-5.05, moving_avg_reward=0.255, reward=0.285]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  52%|█████▏    | 233/452 [43:35<40:58,  0.09it/s, v_num=5dwa, train_loss=-2.32, moving_avg_reward=0.256, reward=0.304]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  52%|█████▏    | 234/452 [43:47<40:47,  0.09it/s, v_num=5dwa, train_loss=-10.8, moving_avg_reward=0.256, reward=0.251]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  52%|█████▏    | 235/452 [43:59<40:37,  0.09it/s, v_num=5dwa, train_loss=-6.48, moving_avg_reward=0.256, reward=0.317]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  52%|█████▏    | 236/452 [44:11<40:27,  0.09it/s, v_num=5dwa, train_loss=0.790, moving_avg_reward=0.257, reward=0.278]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  52%|█████▏    | 237/452 [44:23<40:16,  0.09it/s, v_num=5dwa, train_loss=-14.2, moving_avg_reward=0.257, reward=0.268]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  53%|█████▎    | 238/452 [44:35<40:06,  0.09it/s, v_num=5dwa, train_loss=-0.855, moving_avg_reward=0.257, reward=0.266]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  53%|█████▎    | 239/452 [44:48<39:55,  0.09it/s, v_num=5dwa, train_loss=16.70, moving_avg_reward=0.257, reward=0.309] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  53%|█████▎    | 240/452 [45:00<39:45,  0.09it/s, v_num=5dwa, train_loss=15.90, moving_avg_reward=0.258, reward=0.313]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  53%|█████▎    | 241/452 [45:12<39:34,  0.09it/s, v_num=5dwa, train_loss=-13.4, moving_avg_reward=0.259, reward=0.353]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  54%|█████▎    | 242/452 [45:24<39:24,  0.09it/s, v_num=5dwa, train_loss=20.50, moving_avg_reward=0.260, reward=0.345]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  54%|█████▍    | 243/452 [45:36<39:13,  0.09it/s, v_num=5dwa, train_loss=-33.2, moving_avg_reward=0.259, reward=0.224]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  54%|█████▍    | 244/452 [45:48<39:02,  0.09it/s, v_num=5dwa, train_loss=-2.81, moving_avg_reward=0.259, reward=0.273]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  54%|█████▍    | 245/452 [46:00<38:52,  0.09it/s, v_num=5dwa, train_loss=-3.87, moving_avg_reward=0.260, reward=0.275]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  54%|█████▍    | 246/452 [46:12<38:41,  0.09it/s, v_num=5dwa, train_loss=3.180, moving_avg_reward=0.260, reward=0.343]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  55%|█████▍    | 247/452 [46:24<38:31,  0.09it/s, v_num=5dwa, train_loss=29.80, moving_avg_reward=0.261, reward=0.344]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  55%|█████▍    | 248/452 [46:36<38:20,  0.09it/s, v_num=5dwa, train_loss=-18.0, moving_avg_reward=0.262, reward=0.351]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  55%|█████▌    | 249/452 [46:48<38:09,  0.09it/s, v_num=5dwa, train_loss=-26.9, moving_avg_reward=0.261, reward=0.188]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  55%|█████▌    | 250/452 [47:01<37:59,  0.09it/s, v_num=5dwa, train_loss=1.920, moving_avg_reward=0.262, reward=0.298]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  56%|█████▌    | 251/452 [47:13<37:48,  0.09it/s, v_num=5dwa, train_loss=-15.3, moving_avg_reward=0.262, reward=0.295]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  56%|█████▌    | 252/452 [47:24<37:37,  0.09it/s, v_num=5dwa, train_loss=2.460, moving_avg_reward=0.263, reward=0.312]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  56%|█████▌    | 253/452 [47:36<37:26,  0.09it/s, v_num=5dwa, train_loss=-0.492, moving_avg_reward=0.263, reward=0.295]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  56%|█████▌    | 254/452 [47:48<37:16,  0.09it/s, v_num=5dwa, train_loss=1.600, moving_avg_reward=0.263, reward=0.254] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  56%|█████▋    | 255/452 [48:00<37:05,  0.09it/s, v_num=5dwa, train_loss=0.963, moving_avg_reward=0.263, reward=0.279]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  57%|█████▋    | 256/452 [48:12<36:54,  0.09it/s, v_num=5dwa, train_loss=0.587, moving_avg_reward=0.263, reward=0.246]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  57%|█████▋    | 257/452 [48:24<36:44,  0.09it/s, v_num=5dwa, train_loss=23.70, moving_avg_reward=0.262, reward=0.221]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  57%|█████▋    | 258/452 [48:36<36:33,  0.09it/s, v_num=5dwa, train_loss=-1.03, moving_avg_reward=0.263, reward=0.345]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  57%|█████▋    | 259/452 [48:47<36:21,  0.09it/s, v_num=5dwa, train_loss=0.454, moving_avg_reward=0.263, reward=0.270]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  58%|█████▊    | 260/452 [48:59<36:10,  0.09it/s, v_num=5dwa, train_loss=15.50, moving_avg_reward=0.264, reward=0.347]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  58%|█████▊    | 261/452 [49:11<36:00,  0.09it/s, v_num=5dwa, train_loss=0.423, moving_avg_reward=0.265, reward=0.373]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  58%|█████▊    | 262/452 [49:23<35:49,  0.09it/s, v_num=5dwa, train_loss=7.370, moving_avg_reward=0.266, reward=0.314]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  58%|█████▊    | 263/452 [49:31<35:35,  0.09it/s, v_num=5dwa, train_loss=0.0942, moving_avg_reward=0.266, reward=0.293]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  58%|█████▊    | 264/452 [49:42<35:24,  0.09it/s, v_num=5dwa, train_loss=-1.85, moving_avg_reward=0.266, reward=0.235] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  59%|█████▊    | 265/452 [49:54<35:12,  0.09it/s, v_num=5dwa, train_loss=-3.28, moving_avg_reward=0.266, reward=0.310]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  59%|█████▉    | 266/452 [50:03<35:00,  0.09it/s, v_num=5dwa, train_loss=-0.958, moving_avg_reward=0.267, reward=0.322]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  59%|█████▉    | 267/452 [50:10<34:45,  0.09it/s, v_num=5dwa, train_loss=0.333, moving_avg_reward=0.267, reward=0.314] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  59%|█████▉    | 268/452 [50:22<34:35,  0.09it/s, v_num=5dwa, train_loss=0.633, moving_avg_reward=0.267, reward=0.265]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  60%|█████▉    | 269/452 [50:30<34:21,  0.09it/s, v_num=5dwa, train_loss=-0.0883, moving_avg_reward=0.267, reward=0.234]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  60%|█████▉    | 270/452 [50:39<34:08,  0.09it/s, v_num=5dwa, train_loss=2.020, moving_avg_reward=0.267, reward=0.335]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  60%|█████▉    | 271/452 [50:52<33:58,  0.09it/s, v_num=5dwa, train_loss=-2.30, moving_avg_reward=0.268, reward=0.314]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  60%|██████    | 272/452 [51:01<33:46,  0.09it/s, v_num=5dwa, train_loss=-0.0804, moving_avg_reward=0.269, reward=0.358]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  60%|██████    | 273/452 [51:11<33:34,  0.09it/s, v_num=5dwa, train_loss=0.616, moving_avg_reward=0.269, reward=0.309]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  61%|██████    | 274/452 [51:20<33:21,  0.09it/s, v_num=5dwa, train_loss=0.594, moving_avg_reward=0.269, reward=0.264]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  61%|██████    | 275/452 [51:29<33:08,  0.09it/s, v_num=5dwa, train_loss=-1.51, moving_avg_reward=0.268, reward=0.184]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  61%|██████    | 276/452 [51:41<32:57,  0.09it/s, v_num=5dwa, train_loss=4.030, moving_avg_reward=0.269, reward=0.306]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  61%|██████▏   | 277/452 [51:46<32:42,  0.09it/s, v_num=5dwa, train_loss=1.740, moving_avg_reward=0.270, reward=0.394]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  62%|██████▏   | 278/452 [51:59<32:32,  0.09it/s, v_num=5dwa, train_loss=2.310, moving_avg_reward=0.271, reward=0.365]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  62%|██████▏   | 279/452 [52:11<32:21,  0.09it/s, v_num=5dwa, train_loss=-1.49, moving_avg_reward=0.271, reward=0.278]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  62%|██████▏   | 280/452 [52:15<32:06,  0.09it/s, v_num=5dwa, train_loss=-0.69, moving_avg_reward=0.272, reward=0.394]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  62%|██████▏   | 281/452 [52:18<31:50,  0.09it/s, v_num=5dwa, train_loss=-0.392, moving_avg_reward=0.273, reward=0.317]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  62%|██████▏   | 282/452 [52:24<31:35,  0.09it/s, v_num=5dwa, train_loss=0.949, moving_avg_reward=0.273, reward=0.352] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  63%|██████▎   | 283/452 [52:37<31:25,  0.09it/s, v_num=5dwa, train_loss=-0.12, moving_avg_reward=0.274, reward=0.319]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  63%|██████▎   | 284/452 [52:49<31:14,  0.09it/s, v_num=5dwa, train_loss=5.430, moving_avg_reward=0.275, reward=0.364]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  63%|██████▎   | 285/452 [52:59<31:03,  0.09it/s, v_num=5dwa, train_loss=2.010, moving_avg_reward=0.275, reward=0.310]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  63%|██████▎   | 286/452 [53:06<30:49,  0.09it/s, v_num=5dwa, train_loss=0.961, moving_avg_reward=0.277, reward=0.431]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  63%|██████▎   | 287/452 [53:16<30:37,  0.09it/s, v_num=5dwa, train_loss=3.690, moving_avg_reward=0.278, reward=0.407]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  64%|██████▎   | 288/452 [53:26<30:26,  0.09it/s, v_num=5dwa, train_loss=-2.51, moving_avg_reward=0.278, reward=0.253]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  64%|██████▍   | 289/452 [53:36<30:14,  0.09it/s, v_num=5dwa, train_loss=4.880, moving_avg_reward=0.279, reward=0.357]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  64%|██████▍   | 290/452 [53:49<30:03,  0.09it/s, v_num=5dwa, train_loss=6.490, moving_avg_reward=0.280, reward=0.397]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  64%|██████▍   | 291/452 [53:58<29:51,  0.09it/s, v_num=5dwa, train_loss=3.180, moving_avg_reward=0.281, reward=0.406]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  65%|██████▍   | 292/452 [54:06<29:38,  0.09it/s, v_num=5dwa, train_loss=4.170, moving_avg_reward=0.282, reward=0.389]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  65%|██████▍   | 293/452 [54:18<29:28,  0.09it/s, v_num=5dwa, train_loss=0.491, moving_avg_reward=0.283, reward=0.388]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  65%|██████▌   | 294/452 [54:27<29:16,  0.09it/s, v_num=5dwa, train_loss=0.706, moving_avg_reward=0.284, reward=0.372]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  65%|██████▌   | 295/452 [54:38<29:04,  0.09it/s, v_num=5dwa, train_loss=3.440, moving_avg_reward=0.285, reward=0.390]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  65%|██████▌   | 296/452 [54:43<28:50,  0.09it/s, v_num=5dwa, train_loss=2.170, moving_avg_reward=0.285, reward=0.290]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  66%|██████▌   | 297/452 [54:54<28:39,  0.09it/s, v_num=5dwa, train_loss=0.802, moving_avg_reward=0.286, reward=0.381]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  66%|██████▌   | 298/452 [54:59<28:24,  0.09it/s, v_num=5dwa, train_loss=-0.795, moving_avg_reward=0.286, reward=0.321]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  66%|██████▌   | 299/452 [55:11<28:14,  0.09it/s, v_num=5dwa, train_loss=-0.29, moving_avg_reward=0.287, reward=0.375] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  66%|██████▋   | 300/452 [55:18<28:01,  0.09it/s, v_num=5dwa, train_loss=1.310, moving_avg_reward=0.288, reward=0.393]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  67%|██████▋   | 301/452 [55:26<27:48,  0.09it/s, v_num=5dwa, train_loss=3.700, moving_avg_reward=0.289, reward=0.385]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  67%|██████▋   | 302/452 [55:33<27:35,  0.09it/s, v_num=5dwa, train_loss=-0.48, moving_avg_reward=0.290, reward=0.355]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  67%|██████▋   | 303/452 [55:39<27:22,  0.09it/s, v_num=5dwa, train_loss=0.945, moving_avg_reward=0.290, reward=0.329]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  67%|██████▋   | 304/452 [55:52<27:12,  0.09it/s, v_num=5dwa, train_loss=6.160, moving_avg_reward=0.292, reward=0.437]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  67%|██████▋   | 305/452 [56:02<27:00,  0.09it/s, v_num=5dwa, train_loss=0.689, moving_avg_reward=0.292, reward=0.277]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  68%|██████▊   | 306/452 [56:07<26:46,  0.09it/s, v_num=5dwa, train_loss=-0.386, moving_avg_reward=0.293, reward=0.391]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  68%|██████▊   | 307/452 [56:09<26:31,  0.09it/s, v_num=5dwa, train_loss=-0.234, moving_avg_reward=0.293, reward=0.338]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  68%|██████▊   | 308/452 [56:16<26:18,  0.09it/s, v_num=5dwa, train_loss=-1.55, moving_avg_reward=0.293, reward=0.291] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  68%|██████▊   | 309/452 [56:28<26:08,  0.09it/s, v_num=5dwa, train_loss=9.990, moving_avg_reward=0.294, reward=0.377]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  69%|██████▊   | 310/452 [56:36<25:55,  0.09it/s, v_num=5dwa, train_loss=0.0153, moving_avg_reward=0.294, reward=0.295]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  69%|██████▉   | 311/452 [56:39<25:41,  0.09it/s, v_num=5dwa, train_loss=0.309, moving_avg_reward=0.294, reward=0.338] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  69%|██████▉   | 312/452 [56:51<25:30,  0.09it/s, v_num=5dwa, train_loss=0.230, moving_avg_reward=0.295, reward=0.355]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  69%|██████▉   | 313/452 [56:59<25:18,  0.09it/s, v_num=5dwa, train_loss=2.870, moving_avg_reward=0.296, reward=0.394]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  69%|██████▉   | 314/452 [57:02<25:04,  0.09it/s, v_num=5dwa, train_loss=-0.268, moving_avg_reward=0.295, reward=0.214]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  70%|██████▉   | 315/452 [57:17<24:54,  0.09it/s, v_num=5dwa, train_loss=6.990, moving_avg_reward=0.296, reward=0.403] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  70%|██████▉   | 316/452 [57:24<24:42,  0.09it/s, v_num=5dwa, train_loss=1.300, moving_avg_reward=0.297, reward=0.389]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  70%|███████   | 317/452 [57:32<24:30,  0.09it/s, v_num=5dwa, train_loss=1.580, moving_avg_reward=0.298, reward=0.408]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  70%|███████   | 318/452 [57:54<24:23,  0.09it/s, v_num=5dwa, train_loss=3.160, moving_avg_reward=0.300, reward=0.454]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  71%|███████   | 319/452 [58:11<24:15,  0.09it/s, v_num=5dwa, train_loss=-4.19, moving_avg_reward=0.301, reward=0.375]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  71%|███████   | 320/452 [58:22<24:04,  0.09it/s, v_num=5dwa, train_loss=1.730, moving_avg_reward=0.301, reward=0.308]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  71%|███████   | 321/452 [58:44<23:58,  0.09it/s, v_num=5dwa, train_loss=0.338, moving_avg_reward=0.302, reward=0.445]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  71%|███████   | 322/452 [59:05<23:51,  0.09it/s, v_num=5dwa, train_loss=5.350, moving_avg_reward=0.303, reward=0.375]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  71%|███████▏  | 323/452 [59:26<23:44,  0.09it/s, v_num=5dwa, train_loss=0.518, moving_avg_reward=0.304, reward=0.415]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  72%|███████▏  | 324/452 [59:48<23:37,  0.09it/s, v_num=5dwa, train_loss=-13.6, moving_avg_reward=0.305, reward=0.423]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  72%|███████▏  | 325/452 [1:00:09<23:30,  0.09it/s, v_num=5dwa, train_loss=-3.17, moving_avg_reward=0.306, reward=0.437]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  72%|███████▏  | 326/452 [1:00:31<23:23,  0.09it/s, v_num=5dwa, train_loss=8.760, moving_avg_reward=0.308, reward=0.458]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  72%|███████▏  | 327/452 [1:00:52<23:16,  0.09it/s, v_num=5dwa, train_loss=-3.39, moving_avg_reward=0.311, reward=0.594]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  73%|███████▎  | 328/452 [1:01:14<23:09,  0.09it/s, v_num=5dwa, train_loss=-15.7, moving_avg_reward=0.312, reward=0.440]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  73%|███████▎  | 329/452 [1:01:35<23:01,  0.09it/s, v_num=5dwa, train_loss=14.60, moving_avg_reward=0.314, reward=0.468]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  73%|███████▎  | 330/452 [1:01:57<22:54,  0.09it/s, v_num=5dwa, train_loss=-11.6, moving_avg_reward=0.315, reward=0.483]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  73%|███████▎  | 331/452 [1:02:18<22:46,  0.09it/s, v_num=5dwa, train_loss=-3.42, moving_avg_reward=0.316, reward=0.398]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  73%|███████▎  | 332/452 [1:02:40<22:39,  0.09it/s, v_num=5dwa, train_loss=12.00, moving_avg_reward=0.318, reward=0.491]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  74%|███████▎  | 333/452 [1:02:55<22:29,  0.09it/s, v_num=5dwa, train_loss=10.00, moving_avg_reward=0.320, reward=0.506]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  74%|███████▍  | 334/452 [1:03:17<22:21,  0.09it/s, v_num=5dwa, train_loss=14.70, moving_avg_reward=0.321, reward=0.400]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  74%|███████▍  | 335/452 [1:03:38<22:13,  0.09it/s, v_num=5dwa, train_loss=-48.0, moving_avg_reward=0.322, reward=0.468]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  74%|███████▍  | 336/452 [1:04:00<22:05,  0.09it/s, v_num=5dwa, train_loss=20.30, moving_avg_reward=0.325, reward=0.606]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  75%|███████▍  | 337/452 [1:04:21<21:57,  0.09it/s, v_num=5dwa, train_loss=0.745, moving_avg_reward=0.325, reward=0.355]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  75%|███████▍  | 338/452 [1:04:42<21:49,  0.09it/s, v_num=5dwa, train_loss=8.800, moving_avg_reward=0.327, reward=0.489]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  75%|███████▌  | 339/452 [1:05:04<21:41,  0.09it/s, v_num=5dwa, train_loss=0.851, moving_avg_reward=0.328, reward=0.419]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  75%|███████▌  | 340/452 [1:05:25<21:33,  0.09it/s, v_num=5dwa, train_loss=7.860, moving_avg_reward=0.329, reward=0.438]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  75%|███████▌  | 341/452 [1:05:47<21:24,  0.09it/s, v_num=5dwa, train_loss=0.041, moving_avg_reward=0.329, reward=0.379]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  76%|███████▌  | 342/452 [1:06:08<21:16,  0.09it/s, v_num=5dwa, train_loss=-6.83, moving_avg_reward=0.328, reward=0.222]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  76%|███████▌  | 343/452 [1:06:30<21:07,  0.09it/s, v_num=5dwa, train_loss=10.00, moving_avg_reward=0.330, reward=0.467]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  76%|███████▌  | 344/452 [1:06:51<20:59,  0.09it/s, v_num=5dwa, train_loss=24.90, moving_avg_reward=0.331, reward=0.407]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  76%|███████▋  | 345/452 [1:07:12<20:50,  0.09it/s, v_num=5dwa, train_loss=8.850, moving_avg_reward=0.333, reward=0.536]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  77%|███████▋  | 346/452 [1:07:34<20:42,  0.09it/s, v_num=5dwa, train_loss=9.790, moving_avg_reward=0.334, reward=0.512]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  77%|███████▋  | 347/452 [1:07:55<20:33,  0.09it/s, v_num=5dwa, train_loss=11.20, moving_avg_reward=0.336, reward=0.479]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  77%|███████▋  | 348/452 [1:08:17<20:24,  0.08it/s, v_num=5dwa, train_loss=10.20, moving_avg_reward=0.337, reward=0.457]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  77%|███████▋  | 349/452 [1:08:37<20:15,  0.08it/s, v_num=5dwa, train_loss=9.760, moving_avg_reward=0.338, reward=0.443]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  77%|███████▋  | 350/452 [1:08:58<20:06,  0.08it/s, v_num=5dwa, train_loss=5.020, moving_avg_reward=0.338, reward=0.373]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  78%|███████▊  | 351/452 [1:09:19<19:57,  0.08it/s, v_num=5dwa, train_loss=-1.27, moving_avg_reward=0.339, reward=0.397]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  78%|███████▊  | 352/452 [1:09:41<19:47,  0.08it/s, v_num=5dwa, train_loss=11.30, moving_avg_reward=0.341, reward=0.520]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  78%|███████▊  | 353/452 [1:10:02<19:38,  0.08it/s, v_num=5dwa, train_loss=7.860, moving_avg_reward=0.342, reward=0.458]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  78%|███████▊  | 354/452 [1:10:24<19:29,  0.08it/s, v_num=5dwa, train_loss=1.410, moving_avg_reward=0.342, reward=0.383]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  79%|███████▊  | 355/452 [1:10:45<19:20,  0.08it/s, v_num=5dwa, train_loss=-2.71, moving_avg_reward=0.342, reward=0.322]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  79%|███████▉  | 356/452 [1:11:07<19:10,  0.08it/s, v_num=5dwa, train_loss=11.70, moving_avg_reward=0.344, reward=0.518]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  79%|███████▉  | 357/452 [1:11:28<19:01,  0.08it/s, v_num=5dwa, train_loss=4.830, moving_avg_reward=0.345, reward=0.413]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  79%|███████▉  | 358/452 [1:11:49<18:51,  0.08it/s, v_num=5dwa, train_loss=16.80, moving_avg_reward=0.348, reward=0.638]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  79%|███████▉  | 359/452 [1:12:11<18:41,  0.08it/s, v_num=5dwa, train_loss=7.770, moving_avg_reward=0.349, reward=0.513]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  80%|███████▉  | 360/452 [1:12:32<18:32,  0.08it/s, v_num=5dwa, train_loss=4.690, moving_avg_reward=0.350, reward=0.450]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  80%|███████▉  | 361/452 [1:12:53<18:22,  0.08it/s, v_num=5dwa, train_loss=9.700, moving_avg_reward=0.352, reward=0.487]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  80%|████████  | 362/452 [1:13:14<18:12,  0.08it/s, v_num=5dwa, train_loss=11.00, moving_avg_reward=0.353, reward=0.535]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  80%|████████  | 363/452 [1:13:36<18:02,  0.08it/s, v_num=5dwa, train_loss=14.30, moving_avg_reward=0.356, reward=0.631]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  81%|████████  | 364/452 [1:13:57<17:52,  0.08it/s, v_num=5dwa, train_loss=11.70, moving_avg_reward=0.358, reward=0.529]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  81%|████████  | 365/452 [1:14:18<17:42,  0.08it/s, v_num=5dwa, train_loss=16.10, moving_avg_reward=0.360, reward=0.599]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  81%|████████  | 366/452 [1:14:39<17:32,  0.08it/s, v_num=5dwa, train_loss=18.20, moving_avg_reward=0.363, reward=0.635]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  81%|████████  | 367/452 [1:15:01<17:22,  0.08it/s, v_num=5dwa, train_loss=6.560, moving_avg_reward=0.366, reward=0.624]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  81%|████████▏ | 368/452 [1:15:22<17:12,  0.08it/s, v_num=5dwa, train_loss=14.20, moving_avg_reward=0.369, reward=0.694]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  82%|████████▏ | 369/452 [1:15:43<17:02,  0.08it/s, v_num=5dwa, train_loss=10.90, moving_avg_reward=0.371, reward=0.584]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  82%|████████▏ | 370/452 [1:16:05<16:51,  0.08it/s, v_num=5dwa, train_loss=10.00, moving_avg_reward=0.374, reward=0.653]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  82%|████████▏ | 371/452 [1:16:22<16:40,  0.08it/s, v_num=5dwa, train_loss=13.90, moving_avg_reward=0.377, reward=0.677]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  82%|████████▏ | 372/452 [1:16:39<16:29,  0.08it/s, v_num=5dwa, train_loss=13.90, moving_avg_reward=0.380, reward=0.654]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  83%|████████▎ | 373/452 [1:17:01<16:18,  0.08it/s, v_num=5dwa, train_loss=13.30, moving_avg_reward=0.383, reward=0.729]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  83%|████████▎ | 374/452 [1:17:22<16:08,  0.08it/s, v_num=5dwa, train_loss=15.30, moving_avg_reward=0.387, reward=0.748]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  83%|████████▎ | 375/452 [1:17:40<15:57,  0.08it/s, v_num=5dwa, train_loss=8.160, moving_avg_reward=0.388, reward=0.526]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  83%|████████▎ | 376/452 [1:17:57<15:45,  0.08it/s, v_num=5dwa, train_loss=11.30, moving_avg_reward=0.391, reward=0.677]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  83%|████████▎ | 377/452 [1:18:12<15:33,  0.08it/s, v_num=5dwa, train_loss=9.810, moving_avg_reward=0.393, reward=0.595]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  84%|████████▎ | 378/452 [1:18:34<15:22,  0.08it/s, v_num=5dwa, train_loss=11.70, moving_avg_reward=0.396, reward=0.683]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  84%|████████▍ | 379/452 [1:18:50<15:11,  0.08it/s, v_num=5dwa, train_loss=13.90, moving_avg_reward=0.399, reward=0.690]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  84%|████████▍ | 380/452 [1:19:08<14:59,  0.08it/s, v_num=5dwa, train_loss=15.30, moving_avg_reward=0.402, reward=0.717]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  84%|████████▍ | 381/452 [1:19:26<14:48,  0.08it/s, v_num=5dwa, train_loss=12.30, moving_avg_reward=0.405, reward=0.682]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  85%|████████▍ | 382/452 [1:19:40<14:36,  0.08it/s, v_num=5dwa, train_loss=9.430, moving_avg_reward=0.407, reward=0.647]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  85%|████████▍ | 383/452 [1:19:55<14:24,  0.08it/s, v_num=5dwa, train_loss=9.130, moving_avg_reward=0.410, reward=0.650]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  85%|████████▍ | 384/452 [1:20:14<14:12,  0.08it/s, v_num=5dwa, train_loss=11.80, moving_avg_reward=0.412, reward=0.641]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  85%|████████▌ | 385/452 [1:20:31<14:00,  0.08it/s, v_num=5dwa, train_loss=10.80, moving_avg_reward=0.415, reward=0.723]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  85%|████████▌ | 386/452 [1:20:53<13:49,  0.08it/s, v_num=5dwa, train_loss=8.760, moving_avg_reward=0.417, reward=0.632]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  86%|████████▌ | 387/452 [1:21:07<13:37,  0.08it/s, v_num=5dwa, train_loss=6.770, moving_avg_reward=0.420, reward=0.625]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  86%|████████▌ | 388/452 [1:21:25<13:25,  0.08it/s, v_num=5dwa, train_loss=12.20, moving_avg_reward=0.423, reward=0.793]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  86%|████████▌ | 389/452 [1:21:43<13:14,  0.08it/s, v_num=5dwa, train_loss=8.080, moving_avg_reward=0.425, reward=0.607]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  86%|████████▋ | 390/452 [1:22:05<13:02,  0.08it/s, v_num=5dwa, train_loss=2.320, moving_avg_reward=0.426, reward=0.526]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  87%|████████▋ | 391/452 [1:22:26<12:51,  0.08it/s, v_num=5dwa, train_loss=10.10, moving_avg_reward=0.430, reward=0.787]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  87%|████████▋ | 392/452 [1:22:43<12:39,  0.08it/s, v_num=5dwa, train_loss=8.280, moving_avg_reward=0.432, reward=0.687]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  87%|████████▋ | 393/452 [1:23:00<12:27,  0.08it/s, v_num=5dwa, train_loss=11.10, moving_avg_reward=0.434, reward=0.646]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  87%|████████▋ | 394/452 [1:23:16<12:15,  0.08it/s, v_num=5dwa, train_loss=7.270, moving_avg_reward=0.436, reward=0.595]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  87%|████████▋ | 395/452 [1:23:29<12:02,  0.08it/s, v_num=5dwa, train_loss=7.640, moving_avg_reward=0.440, reward=0.781]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  88%|████████▊ | 396/452 [1:23:51<11:51,  0.08it/s, v_num=5dwa, train_loss=4.250, moving_avg_reward=0.441, reward=0.580]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  88%|████████▊ | 397/452 [1:24:12<11:40,  0.08it/s, v_num=5dwa, train_loss=13.20, moving_avg_reward=0.444, reward=0.781]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  88%|████████▊ | 398/452 [1:24:29<11:27,  0.08it/s, v_num=5dwa, train_loss=8.920, moving_avg_reward=0.447, reward=0.690]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  88%|████████▊ | 399/452 [1:24:46<11:15,  0.08it/s, v_num=5dwa, train_loss=7.960, moving_avg_reward=0.450, reward=0.743]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  88%|████████▊ | 400/452 [1:24:58<11:02,  0.08it/s, v_num=5dwa, train_loss=6.090, moving_avg_reward=0.452, reward=0.644]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  89%|████████▊ | 401/452 [1:25:12<10:50,  0.08it/s, v_num=5dwa, train_loss=2.480, moving_avg_reward=0.451, reward=0.415]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  89%|████████▉ | 402/452 [1:25:34<10:38,  0.08it/s, v_num=5dwa, train_loss=9.300, moving_avg_reward=0.454, reward=0.700]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  89%|████████▉ | 403/452 [1:25:51<10:26,  0.08it/s, v_num=5dwa, train_loss=9.190, moving_avg_reward=0.456, reward=0.720]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  89%|████████▉ | 404/452 [1:26:06<10:13,  0.08it/s, v_num=5dwa, train_loss=6.890, moving_avg_reward=0.458, reward=0.612]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  90%|████████▉ | 405/452 [1:26:22<10:01,  0.08it/s, v_num=5dwa, train_loss=7.060, moving_avg_reward=0.459, reward=0.548]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  90%|████████▉ | 406/452 [1:26:38<09:49,  0.08it/s, v_num=5dwa, train_loss=9.120, moving_avg_reward=0.462, reward=0.777]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  90%|█████████ | 407/452 [1:27:00<09:37,  0.08it/s, v_num=5dwa, train_loss=2.780, moving_avg_reward=0.463, reward=0.596]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  90%|█████████ | 408/452 [1:27:19<09:24,  0.08it/s, v_num=5dwa, train_loss=12.10, moving_avg_reward=0.466, reward=0.745]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  90%|█████████ | 409/452 [1:27:40<09:13,  0.08it/s, v_num=5dwa, train_loss=5.890, moving_avg_reward=0.468, reward=0.633]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  91%|█████████ | 410/452 [1:27:54<09:00,  0.08it/s, v_num=5dwa, train_loss=5.840, moving_avg_reward=0.469, reward=0.569]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  91%|█████████ | 411/452 [1:28:13<08:48,  0.08it/s, v_num=5dwa, train_loss=6.950, moving_avg_reward=0.470, reward=0.601]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  91%|█████████ | 412/452 [1:28:26<08:35,  0.08it/s, v_num=5dwa, train_loss=3.860, moving_avg_reward=0.471, reward=0.520]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  91%|█████████▏| 413/452 [1:28:42<08:22,  0.08it/s, v_num=5dwa, train_loss=4.760, moving_avg_reward=0.472, reward=0.599]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  92%|█████████▏| 414/452 [1:28:59<08:10,  0.08it/s, v_num=5dwa, train_loss=6.950, moving_avg_reward=0.474, reward=0.688]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  92%|█████████▏| 415/452 [1:29:18<07:57,  0.08it/s, v_num=5dwa, train_loss=6.950, moving_avg_reward=0.476, reward=0.680]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  92%|█████████▏| 416/452 [1:29:38<07:45,  0.08it/s, v_num=5dwa, train_loss=12.50, moving_avg_reward=0.479, reward=0.773]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  92%|█████████▏| 417/452 [1:29:58<07:33,  0.08it/s, v_num=5dwa, train_loss=8.910, moving_avg_reward=0.481, reward=0.684]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  92%|█████████▏| 418/452 [1:30:15<07:20,  0.08it/s, v_num=5dwa, train_loss=7.650, moving_avg_reward=0.484, reward=0.775]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  93%|█████████▎| 419/452 [1:30:32<07:07,  0.08it/s, v_num=5dwa, train_loss=5.120, moving_avg_reward=0.486, reward=0.635]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  93%|█████████▎| 420/452 [1:30:50<06:55,  0.08it/s, v_num=5dwa, train_loss=9.950, moving_avg_reward=0.490, reward=0.898]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  93%|█████████▎| 421/452 [1:31:09<06:42,  0.08it/s, v_num=5dwa, train_loss=7.800, moving_avg_reward=0.491, reward=0.652]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  93%|█████████▎| 422/452 [1:31:24<06:29,  0.08it/s, v_num=5dwa, train_loss=6.710, moving_avg_reward=0.493, reward=0.680]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  94%|█████████▎| 423/452 [1:31:46<06:17,  0.08it/s, v_num=5dwa, train_loss=9.440, moving_avg_reward=0.497, reward=0.822]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  94%|█████████▍| 424/452 [1:32:02<06:04,  0.08it/s, v_num=5dwa, train_loss=2.570, moving_avg_reward=0.497, reward=0.546]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  94%|█████████▍| 425/452 [1:32:23<05:52,  0.08it/s, v_num=5dwa, train_loss=5.810, moving_avg_reward=0.499, reward=0.698]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  94%|█████████▍| 426/452 [1:32:41<05:39,  0.08it/s, v_num=5dwa, train_loss=4.830, moving_avg_reward=0.500, reward=0.591]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  94%|█████████▍| 427/452 [1:33:02<05:26,  0.08it/s, v_num=5dwa, train_loss=7.820, moving_avg_reward=0.502, reward=0.689]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  95%|█████████▍| 428/452 [1:33:23<05:14,  0.08it/s, v_num=5dwa, train_loss=3.660, moving_avg_reward=0.503, reward=0.564]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  95%|█████████▍| 429/452 [1:33:45<05:01,  0.08it/s, v_num=5dwa, train_loss=2.990, moving_avg_reward=0.503, reward=0.538]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  95%|█████████▌| 430/452 [1:34:06<04:48,  0.08it/s, v_num=5dwa, train_loss=3.270, moving_avg_reward=0.504, reward=0.608]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  95%|█████████▌| 431/452 [1:34:22<04:35,  0.08it/s, v_num=5dwa, train_loss=3.010, moving_avg_reward=0.505, reward=0.597]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  96%|█████████▌| 432/452 [1:34:41<04:23,  0.08it/s, v_num=5dwa, train_loss=4.930, moving_avg_reward=0.506, reward=0.614]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  96%|█████████▌| 433/452 [1:35:03<04:10,  0.08it/s, v_num=5dwa, train_loss=3.790, moving_avg_reward=0.508, reward=0.686]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  96%|█████████▌| 434/452 [1:35:24<03:57,  0.08it/s, v_num=5dwa, train_loss=5.070, moving_avg_reward=0.510, reward=0.774]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  96%|█████████▌| 435/452 [1:35:49<03:44,  0.08it/s, v_num=5dwa, train_loss=3.960, moving_avg_reward=0.512, reward=0.704]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  96%|█████████▋| 436/452 [1:36:12<03:31,  0.08it/s, v_num=5dwa, train_loss=3.570, moving_avg_reward=0.513, reward=0.611]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  97%|█████████▋| 437/452 [1:36:30<03:18,  0.08it/s, v_num=5dwa, train_loss=3.190, moving_avg_reward=0.515, reward=0.674]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  97%|█████████▋| 438/452 [1:36:51<03:05,  0.08it/s, v_num=5dwa, train_loss=2.030, moving_avg_reward=0.515, reward=0.524]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  97%|█████████▋| 439/452 [1:37:13<02:52,  0.08it/s, v_num=5dwa, train_loss=4.090, moving_avg_reward=0.516, reward=0.612]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  97%|█████████▋| 440/452 [1:37:32<02:39,  0.08it/s, v_num=5dwa, train_loss=4.630, moving_avg_reward=0.519, reward=0.788]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  98%|█████████▊| 441/452 [1:37:52<02:26,  0.08it/s, v_num=5dwa, train_loss=3.810, moving_avg_reward=0.521, reward=0.728]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  98%|█████████▊| 442/452 [1:38:09<02:13,  0.08it/s, v_num=5dwa, train_loss=3.880, moving_avg_reward=0.522, reward=0.659]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  98%|█████████▊| 443/452 [1:38:34<02:00,  0.07it/s, v_num=5dwa, train_loss=0.325, moving_avg_reward=0.522, reward=0.533]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  98%|█████████▊| 444/452 [1:38:53<01:46,  0.07it/s, v_num=5dwa, train_loss=2.760, moving_avg_reward=0.522, reward=0.448]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  98%|█████████▊| 445/452 [1:39:19<01:33,  0.07it/s, v_num=5dwa, train_loss=1.280, moving_avg_reward=0.522, reward=0.527]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  99%|█████████▊| 446/452 [1:39:38<01:20,  0.07it/s, v_num=5dwa, train_loss=3.330, moving_avg_reward=0.523, reward=0.624]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  99%|█████████▉| 447/452 [1:39:56<01:07,  0.07it/s, v_num=5dwa, train_loss=5.670, moving_avg_reward=0.525, reward=0.790]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  99%|█████████▉| 448/452 [1:40:14<00:53,  0.07it/s, v_num=5dwa, train_loss=2.510, moving_avg_reward=0.527, reward=0.654]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0:  99%|█████████▉| 449/452 [1:40:37<00:40,  0.07it/s, v_num=5dwa, train_loss=4.910, moving_avg_reward=0.528, reward=0.713]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0: 100%|█████████▉| 450/452 [1:40:58<00:26,  0.07it/s, v_num=5dwa, train_loss=3.270, moving_avg_reward=0.531, reward=0.733]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 0: 100%|█████████▉| 451/452 [1:41:20<00:13,  0.07it/s, v_num=5dwa, train_loss=4.350, moving_avg_reward=0.532, reward=0.678]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   0%|          | 0/452 [00:00<?, ?it/s, v_num=5dwa, train_loss=5.100, moving_avg_reward=0.535, reward=0.795]            

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   0%|          | 1/452 [00:24<3:06:20,  0.04it/s, v_num=5dwa, train_loss=4.410, moving_avg_reward=0.536, reward=0.653]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   0%|          | 2/452 [00:48<3:02:04,  0.04it/s, v_num=5dwa, train_loss=3.020, moving_avg_reward=0.537, reward=0.641]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   1%|          | 3/452 [01:13<3:03:07,  0.04it/s, v_num=5dwa, train_loss=-0.667, moving_avg_reward=0.537, reward=0.553]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   1%|          | 4/452 [01:31<2:51:03,  0.04it/s, v_num=5dwa, train_loss=2.490, moving_avg_reward=0.537, reward=0.535] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   1%|          | 5/452 [01:54<2:51:04,  0.04it/s, v_num=5dwa, train_loss=3.410, moving_avg_reward=0.539, reward=0.704]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   1%|▏         | 6/452 [02:12<2:44:16,  0.05it/s, v_num=5dwa, train_loss=2.870, moving_avg_reward=0.540, reward=0.689]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   2%|▏         | 7/452 [02:29<2:38:32,  0.05it/s, v_num=5dwa, train_loss=1.970, moving_avg_reward=0.542, reward=0.732]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   2%|▏         | 8/452 [02:45<2:33:17,  0.05it/s, v_num=5dwa, train_loss=2.750, moving_avg_reward=0.543, reward=0.662]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   2%|▏         | 9/452 [03:03<2:30:24,  0.05it/s, v_num=5dwa, train_loss=2.120, moving_avg_reward=0.545, reward=0.709]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   2%|▏         | 10/452 [03:26<2:31:51,  0.05it/s, v_num=5dwa, train_loss=3.420, moving_avg_reward=0.546, reward=0.665]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   2%|▏         | 11/452 [03:47<2:32:00,  0.05it/s, v_num=5dwa, train_loss=-0.00653, moving_avg_reward=0.546, reward=0.507]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   3%|▎         | 12/452 [04:06<2:30:42,  0.05it/s, v_num=5dwa, train_loss=2.280, moving_avg_reward=0.545, reward=0.508]   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   3%|▎         | 13/452 [04:19<2:26:18,  0.05it/s, v_num=5dwa, train_loss=2.010, moving_avg_reward=0.548, reward=0.793]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   3%|▎         | 14/452 [04:35<2:23:31,  0.05it/s, v_num=5dwa, train_loss=2.980, moving_avg_reward=0.549, reward=0.664]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   3%|▎         | 15/452 [04:49<2:20:24,  0.05it/s, v_num=5dwa, train_loss=2.130, moving_avg_reward=0.550, reward=0.623]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   4%|▎         | 16/452 [05:01<2:17:06,  0.05it/s, v_num=5dwa, train_loss=3.120, moving_avg_reward=0.552, reward=0.814]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   4%|▍         | 17/452 [05:15<2:14:27,  0.05it/s, v_num=5dwa, train_loss=-0.276, moving_avg_reward=0.554, reward=0.700]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   4%|▍         | 18/452 [05:25<2:10:40,  0.06it/s, v_num=5dwa, train_loss=2.400, moving_avg_reward=0.555, reward=0.673] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   4%|▍         | 19/452 [05:37<2:08:13,  0.06it/s, v_num=5dwa, train_loss=0.581, moving_avg_reward=0.555, reward=0.569]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   4%|▍         | 20/452 [05:55<2:08:07,  0.06it/s, v_num=5dwa, train_loss=1.550, moving_avg_reward=0.557, reward=0.729]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   5%|▍         | 21/452 [06:07<2:05:40,  0.06it/s, v_num=5dwa, train_loss=0.346, moving_avg_reward=0.558, reward=0.626]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   5%|▍         | 22/452 [06:21<2:04:11,  0.06it/s, v_num=5dwa, train_loss=2.400, moving_avg_reward=0.560, reward=0.765]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   5%|▌         | 23/452 [06:31<2:01:42,  0.06it/s, v_num=5dwa, train_loss=0.838, moving_avg_reward=0.559, reward=0.516]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   5%|▌         | 24/452 [06:43<1:59:54,  0.06it/s, v_num=5dwa, train_loss=1.090, moving_avg_reward=0.559, reward=0.574]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   6%|▌         | 25/452 [06:59<1:59:24,  0.06it/s, v_num=5dwa, train_loss=3.980, moving_avg_reward=0.560, reward=0.624]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   6%|▌         | 26/452 [07:12<1:58:06,  0.06it/s, v_num=5dwa, train_loss=1.360, moving_avg_reward=0.560, reward=0.574]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   6%|▌         | 27/452 [07:24<1:56:40,  0.06it/s, v_num=5dwa, train_loss=1.290, moving_avg_reward=0.562, reward=0.742]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   6%|▌         | 28/452 [07:36<1:55:14,  0.06it/s, v_num=5dwa, train_loss=2.260, moving_avg_reward=0.564, reward=0.807]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   6%|▋         | 29/452 [07:47<1:53:46,  0.06it/s, v_num=5dwa, train_loss=2.340, moving_avg_reward=0.565, reward=0.655]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   7%|▋         | 30/452 [08:00<1:52:45,  0.06it/s, v_num=5dwa, train_loss=1.350, moving_avg_reward=0.566, reward=0.650]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   7%|▋         | 31/452 [08:10<1:51:05,  0.06it/s, v_num=5dwa, train_loss=0.652, moving_avg_reward=0.566, reward=0.560]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   7%|▋         | 32/452 [08:24<1:50:24,  0.06it/s, v_num=5dwa, train_loss=1.800, moving_avg_reward=0.569, reward=0.805]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   7%|▋         | 33/452 [08:38<1:49:44,  0.06it/s, v_num=5dwa, train_loss=1.880, moving_avg_reward=0.569, reward=0.607]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   8%|▊         | 34/452 [08:49<1:48:31,  0.06it/s, v_num=5dwa, train_loss=1.110, moving_avg_reward=0.569, reward=0.549]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   8%|▊         | 35/452 [09:02<1:47:40,  0.06it/s, v_num=5dwa, train_loss=1.820, moving_avg_reward=0.571, reward=0.748]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   8%|▊         | 36/452 [09:14<1:46:47,  0.06it/s, v_num=5dwa, train_loss=1.860, moving_avg_reward=0.571, reward=0.578]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   8%|▊         | 37/452 [09:24<1:45:28,  0.07it/s, v_num=5dwa, train_loss=0.607, moving_avg_reward=0.571, reward=0.563]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   8%|▊         | 38/452 [09:45<1:46:23,  0.06it/s, v_num=5dwa, train_loss=-0.26, moving_avg_reward=0.571, reward=0.636]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   9%|▊         | 39/452 [09:55<1:45:05,  0.07it/s, v_num=5dwa, train_loss=0.746, moving_avg_reward=0.571, reward=0.531]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   9%|▉         | 40/452 [10:08<1:44:26,  0.07it/s, v_num=5dwa, train_loss=3.400, moving_avg_reward=0.572, reward=0.705]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   9%|▉         | 41/452 [10:19<1:43:34,  0.07it/s, v_num=5dwa, train_loss=1.040, moving_avg_reward=0.572, reward=0.582]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:   9%|▉         | 42/452 [10:33<1:43:04,  0.07it/s, v_num=5dwa, train_loss=3.550, moving_avg_reward=0.575, reward=0.824]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  10%|▉         | 43/452 [10:46<1:42:24,  0.07it/s, v_num=5dwa, train_loss=1.970, moving_avg_reward=0.576, reward=0.687]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  10%|▉         | 44/452 [10:59<1:41:54,  0.07it/s, v_num=5dwa, train_loss=2.530, moving_avg_reward=0.577, reward=0.656]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  10%|▉         | 45/452 [11:10<1:41:06,  0.07it/s, v_num=5dwa, train_loss=2.780, moving_avg_reward=0.579, reward=0.816]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  10%|█         | 46/452 [11:22<1:40:22,  0.07it/s, v_num=5dwa, train_loss=1.180, moving_avg_reward=0.579, reward=0.590]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  10%|█         | 47/452 [11:32<1:39:30,  0.07it/s, v_num=5dwa, train_loss=1.710, moving_avg_reward=0.580, reward=0.680]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  11%|█         | 48/452 [11:45<1:38:55,  0.07it/s, v_num=5dwa, train_loss=1.800, moving_avg_reward=0.581, reward=0.689]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  11%|█         | 49/452 [11:55<1:38:04,  0.07it/s, v_num=5dwa, train_loss=-0.00728, moving_avg_reward=0.580, reward=0.466]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  11%|█         | 50/452 [12:06<1:37:23,  0.07it/s, v_num=5dwa, train_loss=0.340, moving_avg_reward=0.580, reward=0.583]   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  11%|█▏        | 51/452 [12:18<1:36:42,  0.07it/s, v_num=5dwa, train_loss=1.360, moving_avg_reward=0.580, reward=0.579]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  12%|█▏        | 52/452 [12:29<1:36:07,  0.07it/s, v_num=5dwa, train_loss=3.070, moving_avg_reward=0.582, reward=0.798]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  12%|█▏        | 53/452 [12:40<1:35:27,  0.07it/s, v_num=5dwa, train_loss=1.940, moving_avg_reward=0.582, reward=0.575]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  12%|█▏        | 54/452 [12:56<1:35:23,  0.07it/s, v_num=5dwa, train_loss=1.960, moving_avg_reward=0.584, reward=0.745]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  12%|█▏        | 55/452 [13:06<1:34:33,  0.07it/s, v_num=5dwa, train_loss=0.384, moving_avg_reward=0.584, reward=0.615]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  12%|█▏        | 56/452 [13:17<1:33:56,  0.07it/s, v_num=5dwa, train_loss=2.480, moving_avg_reward=0.586, reward=0.741]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  13%|█▎        | 57/452 [13:38<1:34:34,  0.07it/s, v_num=5dwa, train_loss=2.170, moving_avg_reward=0.587, reward=0.668]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  13%|█▎        | 58/452 [13:48<1:33:48,  0.07it/s, v_num=5dwa, train_loss=1.070, moving_avg_reward=0.587, reward=0.611]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  13%|█▎        | 59/452 [13:58<1:33:03,  0.07it/s, v_num=5dwa, train_loss=1.390, moving_avg_reward=0.587, reward=0.641]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  13%|█▎        | 60/452 [14:09<1:32:32,  0.07it/s, v_num=5dwa, train_loss=0.503, moving_avg_reward=0.587, reward=0.508]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  13%|█▎        | 61/452 [14:24<1:32:18,  0.07it/s, v_num=5dwa, train_loss=1.030, moving_avg_reward=0.587, reward=0.622]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  14%|█▎        | 62/452 [14:38<1:32:05,  0.07it/s, v_num=5dwa, train_loss=1.560, moving_avg_reward=0.586, reward=0.517]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  14%|█▍        | 63/452 [14:50<1:31:38,  0.07it/s, v_num=5dwa, train_loss=0.451, moving_avg_reward=0.585, reward=0.435]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  14%|█▍        | 64/452 [15:03<1:31:18,  0.07it/s, v_num=5dwa, train_loss=1.790, moving_avg_reward=0.586, reward=0.707]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  14%|█▍        | 65/452 [15:18<1:31:09,  0.07it/s, v_num=5dwa, train_loss=1.890, moving_avg_reward=0.588, reward=0.825]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  15%|█▍        | 66/452 [15:33<1:31:01,  0.07it/s, v_num=5dwa, train_loss=0.507, moving_avg_reward=0.588, reward=0.561]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  15%|█▍        | 67/452 [15:45<1:30:33,  0.07it/s, v_num=5dwa, train_loss=1.530, moving_avg_reward=0.590, reward=0.760]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  15%|█▌        | 68/452 [16:04<1:30:46,  0.07it/s, v_num=5dwa, train_loss=2.730, moving_avg_reward=0.591, reward=0.718]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  15%|█▌        | 69/452 [16:20<1:30:41,  0.07it/s, v_num=5dwa, train_loss=1.470, moving_avg_reward=0.592, reward=0.704]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  15%|█▌        | 70/452 [16:32<1:30:16,  0.07it/s, v_num=5dwa, train_loss=0.880, moving_avg_reward=0.592, reward=0.552]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  16%|█▌        | 71/452 [16:47<1:30:05,  0.07it/s, v_num=5dwa, train_loss=1.270, moving_avg_reward=0.591, reward=0.556]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  16%|█▌        | 72/452 [16:58<1:29:37,  0.07it/s, v_num=5dwa, train_loss=0.650, moving_avg_reward=0.591, reward=0.597]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  16%|█▌        | 73/452 [17:11<1:29:17,  0.07it/s, v_num=5dwa, train_loss=1.840, moving_avg_reward=0.592, reward=0.598]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  16%|█▋        | 74/452 [17:25<1:28:57,  0.07it/s, v_num=5dwa, train_loss=1.100, moving_avg_reward=0.590, reward=0.477]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  17%|█▋        | 75/452 [17:35<1:28:25,  0.07it/s, v_num=5dwa, train_loss=0.262, moving_avg_reward=0.588, reward=0.343]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  17%|█▋        | 76/452 [17:46<1:27:57,  0.07it/s, v_num=5dwa, train_loss=2.080, moving_avg_reward=0.591, reward=0.874]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  17%|█▋        | 77/452 [17:58<1:27:31,  0.07it/s, v_num=5dwa, train_loss=1.360, moving_avg_reward=0.591, reward=0.629]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  17%|█▋        | 78/452 [18:13<1:27:25,  0.07it/s, v_num=5dwa, train_loss=1.980, moving_avg_reward=0.594, reward=0.831]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  17%|█▋        | 79/452 [18:28<1:27:14,  0.07it/s, v_num=5dwa, train_loss=2.250, moving_avg_reward=0.596, reward=0.859]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  18%|█▊        | 80/452 [18:43<1:27:06,  0.07it/s, v_num=5dwa, train_loss=1.800, moving_avg_reward=0.597, reward=0.664]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  18%|█▊        | 81/452 [18:57<1:26:49,  0.07it/s, v_num=5dwa, train_loss=1.580, moving_avg_reward=0.598, reward=0.683]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  18%|█▊        | 82/452 [19:03<1:26:01,  0.07it/s, v_num=5dwa, train_loss=3.370, moving_avg_reward=0.602, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  18%|█▊        | 83/452 [19:18<1:25:49,  0.07it/s, v_num=5dwa, train_loss=0.392, moving_avg_reward=0.600, reward=0.465]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  19%|█▊        | 84/452 [19:29<1:25:23,  0.07it/s, v_num=5dwa, train_loss=0.833, moving_avg_reward=0.601, reward=0.643]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  19%|█▉        | 85/452 [19:42<1:25:05,  0.07it/s, v_num=5dwa, train_loss=1.760, moving_avg_reward=0.602, reward=0.751]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  19%|█▉        | 86/452 [20:00<1:25:10,  0.07it/s, v_num=5dwa, train_loss=2.460, moving_avg_reward=0.604, reward=0.806]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  19%|█▉        | 87/452 [20:18<1:25:12,  0.07it/s, v_num=5dwa, train_loss=3.740, moving_avg_reward=0.607, reward=0.843]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  19%|█▉        | 88/452 [20:36<1:25:13,  0.07it/s, v_num=5dwa, train_loss=1.310, moving_avg_reward=0.606, reward=0.535]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  20%|█▉        | 89/452 [20:53<1:25:14,  0.07it/s, v_num=5dwa, train_loss=2.050, moving_avg_reward=0.607, reward=0.763]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  20%|█▉        | 90/452 [21:13<1:25:23,  0.07it/s, v_num=5dwa, train_loss=0.884, moving_avg_reward=0.607, reward=0.571]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  20%|██        | 91/452 [21:35<1:25:39,  0.07it/s, v_num=5dwa, train_loss=-1.74, moving_avg_reward=0.604, reward=0.322]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  20%|██        | 92/452 [21:56<1:25:51,  0.07it/s, v_num=5dwa, train_loss=0.319, moving_avg_reward=0.604, reward=0.548]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  21%|██        | 93/452 [22:18<1:26:05,  0.07it/s, v_num=5dwa, train_loss=0.873, moving_avg_reward=0.603, reward=0.571]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  21%|██        | 94/452 [22:39<1:26:17,  0.07it/s, v_num=5dwa, train_loss=0.314, moving_avg_reward=0.603, reward=0.583]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  21%|██        | 95/452 [23:01<1:26:29,  0.07it/s, v_num=5dwa, train_loss=1.380, moving_avg_reward=0.604, reward=0.664]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  21%|██        | 96/452 [23:22<1:26:40,  0.07it/s, v_num=5dwa, train_loss=1.740, moving_avg_reward=0.605, reward=0.700]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  21%|██▏       | 97/452 [23:43<1:26:49,  0.07it/s, v_num=5dwa, train_loss=1.780, moving_avg_reward=0.605, reward=0.622]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  22%|██▏       | 98/452 [24:04<1:26:59,  0.07it/s, v_num=5dwa, train_loss=0.0435, moving_avg_reward=0.604, reward=0.504]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  22%|██▏       | 99/452 [24:26<1:27:08,  0.07it/s, v_num=5dwa, train_loss=1.230, moving_avg_reward=0.605, reward=0.716] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  22%|██▏       | 100/452 [24:45<1:27:09,  0.07it/s, v_num=5dwa, train_loss=2.260, moving_avg_reward=0.606, reward=0.715]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  22%|██▏       | 101/452 [25:04<1:27:09,  0.07it/s, v_num=5dwa, train_loss=0.767, moving_avg_reward=0.606, reward=0.555]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  23%|██▎       | 102/452 [25:20<1:26:57,  0.07it/s, v_num=5dwa, train_loss=1.570, moving_avg_reward=0.606, reward=0.609]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  23%|██▎       | 103/452 [25:41<1:27:03,  0.07it/s, v_num=5dwa, train_loss=1.350, moving_avg_reward=0.606, reward=0.660]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  23%|██▎       | 104/452 [26:02<1:27:08,  0.07it/s, v_num=5dwa, train_loss=1.410, moving_avg_reward=0.607, reward=0.682]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  23%|██▎       | 105/452 [26:23<1:27:12,  0.07it/s, v_num=5dwa, train_loss=0.573, moving_avg_reward=0.607, reward=0.593]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  23%|██▎       | 106/452 [26:36<1:26:51,  0.07it/s, v_num=5dwa, train_loss=0.594, moving_avg_reward=0.606, reward=0.547]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  24%|██▎       | 107/452 [26:58<1:26:58,  0.07it/s, v_num=5dwa, train_loss=2.030, moving_avg_reward=0.606, reward=0.613]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  24%|██▍       | 108/452 [27:17<1:26:56,  0.07it/s, v_num=5dwa, train_loss=0.746, moving_avg_reward=0.607, reward=0.644]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  24%|██▍       | 109/452 [27:35<1:26:48,  0.07it/s, v_num=5dwa, train_loss=1.770, moving_avg_reward=0.608, reward=0.787]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  24%|██▍       | 110/452 [27:50<1:26:33,  0.07it/s, v_num=5dwa, train_loss=2.090, moving_avg_reward=0.609, reward=0.679]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  25%|██▍       | 111/452 [28:10<1:26:32,  0.07it/s, v_num=5dwa, train_loss=1.870, moving_avg_reward=0.610, reward=0.695]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  25%|██▍       | 112/452 [28:23<1:26:12,  0.07it/s, v_num=5dwa, train_loss=0.157, moving_avg_reward=0.609, reward=0.518]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  25%|██▌       | 113/452 [28:41<1:26:05,  0.07it/s, v_num=5dwa, train_loss=2.230, moving_avg_reward=0.610, reward=0.722]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  25%|██▌       | 114/452 [29:03<1:26:08,  0.07it/s, v_num=5dwa, train_loss=1.770, moving_avg_reward=0.611, reward=0.732]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  25%|██▌       | 115/452 [29:21<1:26:01,  0.07it/s, v_num=5dwa, train_loss=0.557, moving_avg_reward=0.611, reward=0.594]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  26%|██▌       | 116/452 [29:40<1:25:56,  0.07it/s, v_num=5dwa, train_loss=0.743, moving_avg_reward=0.612, reward=0.676]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  26%|██▌       | 117/452 [29:58<1:25:49,  0.07it/s, v_num=5dwa, train_loss=1.880, moving_avg_reward=0.613, reward=0.696]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  26%|██▌       | 118/452 [30:15<1:25:38,  0.07it/s, v_num=5dwa, train_loss=1.200, moving_avg_reward=0.614, reward=0.789]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  26%|██▋       | 119/452 [30:32<1:25:28,  0.06it/s, v_num=5dwa, train_loss=0.865, moving_avg_reward=0.614, reward=0.581]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  27%|██▋       | 120/452 [30:54<1:25:30,  0.06it/s, v_num=5dwa, train_loss=-0.245, moving_avg_reward=0.614, reward=0.593]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  27%|██▋       | 121/452 [31:14<1:25:27,  0.06it/s, v_num=5dwa, train_loss=0.601, moving_avg_reward=0.614, reward=0.591] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  27%|██▋       | 122/452 [31:35<1:25:28,  0.06it/s, v_num=5dwa, train_loss=0.472, moving_avg_reward=0.615, reward=0.725]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  27%|██▋       | 123/452 [31:56<1:25:25,  0.06it/s, v_num=5dwa, train_loss=1.010, moving_avg_reward=0.616, reward=0.738]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  27%|██▋       | 124/452 [32:17<1:25:24,  0.06it/s, v_num=5dwa, train_loss=0.574, moving_avg_reward=0.616, reward=0.628]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  28%|██▊       | 125/452 [32:38<1:25:24,  0.06it/s, v_num=5dwa, train_loss=0.212, moving_avg_reward=0.616, reward=0.586]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  28%|██▊       | 126/452 [32:54<1:25:08,  0.06it/s, v_num=5dwa, train_loss=0.515, moving_avg_reward=0.616, reward=0.648]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  28%|██▊       | 127/452 [33:16<1:25:08,  0.06it/s, v_num=5dwa, train_loss=1.570, moving_avg_reward=0.617, reward=0.672]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  28%|██▊       | 128/452 [33:37<1:25:07,  0.06it/s, v_num=5dwa, train_loss=0.720, moving_avg_reward=0.618, reward=0.722]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  29%|██▊       | 129/452 [33:59<1:25:06,  0.06it/s, v_num=5dwa, train_loss=1.910, moving_avg_reward=0.618, reward=0.685]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  29%|██▉       | 130/452 [34:20<1:25:04,  0.06it/s, v_num=5dwa, train_loss=0.520, moving_avg_reward=0.618, reward=0.567]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  29%|██▉       | 131/452 [34:39<1:24:54,  0.06it/s, v_num=5dwa, train_loss=-0.726, moving_avg_reward=0.617, reward=0.532]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  29%|██▉       | 132/452 [35:00<1:24:52,  0.06it/s, v_num=5dwa, train_loss=0.189, moving_avg_reward=0.616, reward=0.556] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  29%|██▉       | 133/452 [35:21<1:24:49,  0.06it/s, v_num=5dwa, train_loss=0.476, moving_avg_reward=0.614, reward=0.409]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  30%|██▉       | 134/452 [35:40<1:24:39,  0.06it/s, v_num=5dwa, train_loss=1.470, moving_avg_reward=0.615, reward=0.714]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  30%|██▉       | 135/452 [35:59<1:24:30,  0.06it/s, v_num=5dwa, train_loss=0.463, moving_avg_reward=0.615, reward=0.549]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  30%|███       | 136/452 [36:17<1:24:19,  0.06it/s, v_num=5dwa, train_loss=0.827, moving_avg_reward=0.615, reward=0.643]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  30%|███       | 137/452 [36:35<1:24:08,  0.06it/s, v_num=5dwa, train_loss=1.090, moving_avg_reward=0.616, reward=0.739]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  31%|███       | 138/452 [36:55<1:24:01,  0.06it/s, v_num=5dwa, train_loss=1.300, moving_avg_reward=0.619, reward=0.855]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  31%|███       | 139/452 [37:15<1:23:52,  0.06it/s, v_num=5dwa, train_loss=0.977, moving_avg_reward=0.619, reward=0.656]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  31%|███       | 140/452 [37:35<1:23:45,  0.06it/s, v_num=5dwa, train_loss=0.713, moving_avg_reward=0.620, reward=0.703]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  31%|███       | 141/452 [37:55<1:23:38,  0.06it/s, v_num=5dwa, train_loss=0.574, moving_avg_reward=0.620, reward=0.631]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  31%|███▏      | 142/452 [38:16<1:23:33,  0.06it/s, v_num=5dwa, train_loss=2.500, moving_avg_reward=0.622, reward=0.865]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  32%|███▏      | 143/452 [38:37<1:23:27,  0.06it/s, v_num=5dwa, train_loss=0.824, moving_avg_reward=0.623, reward=0.671]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  32%|███▏      | 144/452 [38:58<1:23:22,  0.06it/s, v_num=5dwa, train_loss=-0.0655, moving_avg_reward=0.621, reward=0.397]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  32%|███▏      | 145/452 [39:20<1:23:17,  0.06it/s, v_num=5dwa, train_loss=0.596, moving_avg_reward=0.621, reward=0.671]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  32%|███▏      | 146/452 [39:41<1:23:11,  0.06it/s, v_num=5dwa, train_loss=1.300, moving_avg_reward=0.622, reward=0.693]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  33%|███▎      | 147/452 [40:02<1:23:05,  0.06it/s, v_num=5dwa, train_loss=0.421, moving_avg_reward=0.621, reward=0.560]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  33%|███▎      | 148/452 [40:24<1:22:59,  0.06it/s, v_num=5dwa, train_loss=-0.0175, moving_avg_reward=0.621, reward=0.574]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  33%|███▎      | 149/452 [40:45<1:22:53,  0.06it/s, v_num=5dwa, train_loss=0.318, moving_avg_reward=0.621, reward=0.638]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  33%|███▎      | 150/452 [41:04<1:22:40,  0.06it/s, v_num=5dwa, train_loss=0.958, moving_avg_reward=0.621, reward=0.666]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  33%|███▎      | 151/452 [41:20<1:22:24,  0.06it/s, v_num=5dwa, train_loss=1.070, moving_avg_reward=0.623, reward=0.739]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  34%|███▎      | 152/452 [41:40<1:22:15,  0.06it/s, v_num=5dwa, train_loss=0.893, moving_avg_reward=0.623, reward=0.709]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  34%|███▍      | 153/452 [41:57<1:22:00,  0.06it/s, v_num=5dwa, train_loss=0.901, moving_avg_reward=0.624, reward=0.638]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  34%|███▍      | 154/452 [42:09<1:21:34,  0.06it/s, v_num=5dwa, train_loss=0.554, moving_avg_reward=0.623, reward=0.603]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  34%|███▍      | 155/452 [42:25<1:21:17,  0.06it/s, v_num=5dwa, train_loss=0.858, moving_avg_reward=0.623, reward=0.619]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  35%|███▍      | 156/452 [42:40<1:20:57,  0.06it/s, v_num=5dwa, train_loss=0.984, moving_avg_reward=0.624, reward=0.650]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  35%|███▍      | 157/452 [42:51<1:20:32,  0.06it/s, v_num=5dwa, train_loss=0.619, moving_avg_reward=0.623, reward=0.539]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  35%|███▍      | 158/452 [43:05<1:20:11,  0.06it/s, v_num=5dwa, train_loss=1.190, moving_avg_reward=0.623, reward=0.660]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  35%|███▌      | 159/452 [43:21<1:19:53,  0.06it/s, v_num=5dwa, train_loss=1.060, moving_avg_reward=0.625, reward=0.783]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  35%|███▌      | 160/452 [43:34<1:19:32,  0.06it/s, v_num=5dwa, train_loss=1.120, moving_avg_reward=0.626, reward=0.756]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  36%|███▌      | 161/452 [43:46<1:19:07,  0.06it/s, v_num=5dwa, train_loss=1.270, moving_avg_reward=0.628, reward=0.803]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  36%|███▌      | 162/452 [43:59<1:18:45,  0.06it/s, v_num=5dwa, train_loss=0.832, moving_avg_reward=0.628, reward=0.677]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  36%|███▌      | 163/452 [44:12<1:18:23,  0.06it/s, v_num=5dwa, train_loss=0.913, moving_avg_reward=0.629, reward=0.716]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  36%|███▋      | 164/452 [44:22<1:17:56,  0.06it/s, v_num=5dwa, train_loss=0.350, moving_avg_reward=0.629, reward=0.639]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  37%|███▋      | 165/452 [44:32<1:17:29,  0.06it/s, v_num=5dwa, train_loss=0.300, moving_avg_reward=0.629, reward=0.615]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  37%|███▋      | 166/452 [44:43<1:17:03,  0.06it/s, v_num=5dwa, train_loss=0.895, moving_avg_reward=0.630, reward=0.687]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  37%|███▋      | 167/452 [44:56<1:16:42,  0.06it/s, v_num=5dwa, train_loss=-0.159, moving_avg_reward=0.629, reward=0.573]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  37%|███▋      | 168/452 [45:08<1:16:18,  0.06it/s, v_num=5dwa, train_loss=0.658, moving_avg_reward=0.629, reward=0.618] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  37%|███▋      | 169/452 [45:23<1:16:00,  0.06it/s, v_num=5dwa, train_loss=1.230, moving_avg_reward=0.631, reward=0.789]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  38%|███▊      | 170/452 [45:36<1:15:39,  0.06it/s, v_num=5dwa, train_loss=0.388, moving_avg_reward=0.632, reward=0.724]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  38%|███▊      | 171/452 [45:52<1:15:22,  0.06it/s, v_num=5dwa, train_loss=1.010, moving_avg_reward=0.631, reward=0.607]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  38%|███▊      | 172/452 [46:04<1:14:59,  0.06it/s, v_num=5dwa, train_loss=0.202, moving_avg_reward=0.633, reward=0.769]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  38%|███▊      | 173/452 [46:17<1:14:39,  0.06it/s, v_num=5dwa, train_loss=-0.101, moving_avg_reward=0.632, reward=0.577]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  38%|███▊      | 174/452 [46:31<1:14:19,  0.06it/s, v_num=5dwa, train_loss=1.030, moving_avg_reward=0.634, reward=0.857] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  39%|███▊      | 175/452 [46:47<1:14:03,  0.06it/s, v_num=5dwa, train_loss=-0.0685, moving_avg_reward=0.636, reward=0.802]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  39%|███▉      | 176/452 [47:00<1:13:43,  0.06it/s, v_num=5dwa, train_loss=1.200, moving_avg_reward=0.636, reward=0.657]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  39%|███▉      | 177/452 [47:14<1:13:23,  0.06it/s, v_num=5dwa, train_loss=-0.594, moving_avg_reward=0.636, reward=0.639]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  39%|███▉      | 178/452 [47:29<1:13:06,  0.06it/s, v_num=5dwa, train_loss=10.20, moving_avg_reward=0.637, reward=0.740] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  40%|███▉      | 179/452 [47:46<1:12:51,  0.06it/s, v_num=5dwa, train_loss=-1.42, moving_avg_reward=0.638, reward=0.673]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  40%|███▉      | 180/452 [48:04<1:12:39,  0.06it/s, v_num=5dwa, train_loss=2.790, moving_avg_reward=0.639, reward=0.802]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  40%|████      | 181/452 [48:21<1:12:24,  0.06it/s, v_num=5dwa, train_loss=-3.28, moving_avg_reward=0.640, reward=0.721]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  40%|████      | 182/452 [48:36<1:12:07,  0.06it/s, v_num=5dwa, train_loss=-3.29, moving_avg_reward=0.641, reward=0.731]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  40%|████      | 183/452 [48:53<1:11:52,  0.06it/s, v_num=5dwa, train_loss=9.870, moving_avg_reward=0.643, reward=0.857]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  41%|████      | 184/452 [49:13<1:11:41,  0.06it/s, v_num=5dwa, train_loss=2.940, moving_avg_reward=0.645, reward=0.831]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  41%|████      | 185/452 [49:35<1:11:33,  0.06it/s, v_num=5dwa, train_loss=-29.4, moving_avg_reward=0.646, reward=0.751]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  41%|████      | 186/452 [49:52<1:11:20,  0.06it/s, v_num=5dwa, train_loss=-5.37, moving_avg_reward=0.647, reward=0.767]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  41%|████▏     | 187/452 [50:13<1:11:10,  0.06it/s, v_num=5dwa, train_loss=7.070, moving_avg_reward=0.649, reward=0.832]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  42%|████▏     | 188/452 [50:29<1:10:54,  0.06it/s, v_num=5dwa, train_loss=0.172, moving_avg_reward=0.650, reward=0.699]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  42%|████▏     | 189/452 [50:48<1:10:41,  0.06it/s, v_num=5dwa, train_loss=0.760, moving_avg_reward=0.651, reward=0.800]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  42%|████▏     | 190/452 [51:09<1:10:32,  0.06it/s, v_num=5dwa, train_loss=-3.50, moving_avg_reward=0.652, reward=0.776]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  42%|████▏     | 191/452 [51:31<1:10:24,  0.06it/s, v_num=5dwa, train_loss=-38.6, moving_avg_reward=0.653, reward=0.717]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  42%|████▏     | 192/452 [51:47<1:10:08,  0.06it/s, v_num=5dwa, train_loss=0.303, moving_avg_reward=0.654, reward=0.793]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  43%|████▎     | 193/452 [52:04<1:09:53,  0.06it/s, v_num=5dwa, train_loss=0.773, moving_avg_reward=0.656, reward=0.824]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  43%|████▎     | 194/452 [52:26<1:09:44,  0.06it/s, v_num=5dwa, train_loss=0.739, moving_avg_reward=0.657, reward=0.782]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  43%|████▎     | 195/452 [52:43<1:09:29,  0.06it/s, v_num=5dwa, train_loss=1.010, moving_avg_reward=0.659, reward=0.833]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  43%|████▎     | 196/452 [52:58<1:09:11,  0.06it/s, v_num=5dwa, train_loss=0.243, moving_avg_reward=0.660, reward=0.767]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  44%|████▎     | 197/452 [53:17<1:08:58,  0.06it/s, v_num=5dwa, train_loss=0.875, moving_avg_reward=0.662, reward=0.846]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  44%|████▍     | 198/452 [53:38<1:08:49,  0.06it/s, v_num=5dwa, train_loss=0.860, moving_avg_reward=0.664, reward=0.874]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  44%|████▍     | 199/452 [53:58<1:08:37,  0.06it/s, v_num=5dwa, train_loss=0.934, moving_avg_reward=0.666, reward=0.864]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  44%|████▍     | 200/452 [54:20<1:08:27,  0.06it/s, v_num=5dwa, train_loss=0.249, moving_avg_reward=0.668, reward=0.814]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  44%|████▍     | 201/452 [54:41<1:08:18,  0.06it/s, v_num=5dwa, train_loss=1.170, moving_avg_reward=0.670, reward=0.847]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  45%|████▍     | 202/452 [55:03<1:08:08,  0.06it/s, v_num=5dwa, train_loss=0.631, moving_avg_reward=0.671, reward=0.783]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  45%|████▍     | 203/452 [55:24<1:07:57,  0.06it/s, v_num=5dwa, train_loss=0.202, moving_avg_reward=0.672, reward=0.822]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  45%|████▌     | 204/452 [55:46<1:07:47,  0.06it/s, v_num=5dwa, train_loss=0.860, moving_avg_reward=0.674, reward=0.903]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  45%|████▌     | 205/452 [56:02<1:07:31,  0.06it/s, v_num=5dwa, train_loss=0.923, moving_avg_reward=0.677, reward=0.928]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  46%|████▌     | 206/452 [56:21<1:07:18,  0.06it/s, v_num=5dwa, train_loss=0.695, moving_avg_reward=0.679, reward=0.921]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  46%|████▌     | 207/452 [56:39<1:07:03,  0.06it/s, v_num=5dwa, train_loss=1.290, moving_avg_reward=0.682, reward=0.922]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  46%|████▌     | 208/452 [56:59<1:06:51,  0.06it/s, v_num=5dwa, train_loss=1.370, moving_avg_reward=0.684, reward=0.892]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  46%|████▌     | 209/452 [57:14<1:06:33,  0.06it/s, v_num=5dwa, train_loss=0.550, moving_avg_reward=0.686, reward=0.918]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  46%|████▋     | 210/452 [57:31<1:06:17,  0.06it/s, v_num=5dwa, train_loss=0.991, moving_avg_reward=0.689, reward=0.959]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  47%|████▋     | 211/452 [57:47<1:06:00,  0.06it/s, v_num=5dwa, train_loss=1.550, moving_avg_reward=0.692, reward=0.939]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  47%|████▋     | 212/452 [57:59<1:05:38,  0.06it/s, v_num=5dwa, train_loss=3.990, moving_avg_reward=0.694, reward=0.928]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  47%|████▋     | 213/452 [58:12<1:05:19,  0.06it/s, v_num=5dwa, train_loss=0.830, moving_avg_reward=0.696, reward=0.942]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  47%|████▋     | 214/452 [58:29<1:05:03,  0.06it/s, v_num=5dwa, train_loss=0.793, moving_avg_reward=0.699, reward=0.916]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  48%|████▊     | 215/452 [58:42<1:04:42,  0.06it/s, v_num=5dwa, train_loss=1.240, moving_avg_reward=0.701, reward=0.958]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  48%|████▊     | 216/452 [58:57<1:04:24,  0.06it/s, v_num=5dwa, train_loss=1.430, moving_avg_reward=0.704, reward=0.964]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  48%|████▊     | 217/452 [59:14<1:04:09,  0.06it/s, v_num=5dwa, train_loss=1.550, moving_avg_reward=0.706, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  48%|████▊     | 218/452 [59:29<1:03:51,  0.06it/s, v_num=5dwa, train_loss=0.613, moving_avg_reward=0.709, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  48%|████▊     | 219/452 [59:46<1:03:36,  0.06it/s, v_num=5dwa, train_loss=1.560, moving_avg_reward=0.712, reward=0.965]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  49%|████▊     | 220/452 [1:00:00<1:03:17,  0.06it/s, v_num=5dwa, train_loss=0.863, moving_avg_reward=0.714, reward=0.962]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  49%|████▉     | 221/452 [1:00:17<1:03:01,  0.06it/s, v_num=5dwa, train_loss=0.825, moving_avg_reward=0.717, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  49%|████▉     | 222/452 [1:00:29<1:02:40,  0.06it/s, v_num=5dwa, train_loss=0.538, moving_avg_reward=0.719, reward=0.964]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  49%|████▉     | 223/452 [1:00:46<1:02:24,  0.06it/s, v_num=5dwa, train_loss=0.745, moving_avg_reward=0.722, reward=0.949]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  50%|████▉     | 224/452 [1:01:05<1:02:10,  0.06it/s, v_num=5dwa, train_loss=0.669, moving_avg_reward=0.724, reward=0.953]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  50%|████▉     | 225/452 [1:01:26<1:01:58,  0.06it/s, v_num=5dwa, train_loss=1.160, moving_avg_reward=0.726, reward=0.967]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  50%|█████     | 226/452 [1:01:42<1:01:42,  0.06it/s, v_num=5dwa, train_loss=0.761, moving_avg_reward=0.729, reward=0.963]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  50%|█████     | 227/452 [1:02:03<1:01:30,  0.06it/s, v_num=5dwa, train_loss=0.800, moving_avg_reward=0.731, reward=0.959]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  50%|█████     | 228/452 [1:02:24<1:01:19,  0.06it/s, v_num=5dwa, train_loss=-1.40, moving_avg_reward=0.733, reward=0.912]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  51%|█████     | 229/452 [1:02:46<1:01:07,  0.06it/s, v_num=5dwa, train_loss=0.235, moving_avg_reward=0.735, reward=0.921]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  51%|█████     | 230/452 [1:03:06<1:00:55,  0.06it/s, v_num=5dwa, train_loss=0.490, moving_avg_reward=0.737, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  51%|█████     | 231/452 [1:03:28<1:00:43,  0.06it/s, v_num=5dwa, train_loss=0.321, moving_avg_reward=0.738, reward=0.854]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  51%|█████▏    | 232/452 [1:03:49<1:00:31,  0.06it/s, v_num=5dwa, train_loss=-1.93, moving_avg_reward=0.740, reward=0.905]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  52%|█████▏    | 233/452 [1:04:11<1:00:19,  0.06it/s, v_num=5dwa, train_loss=-22.7, moving_avg_reward=0.741, reward=0.831]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  52%|█████▏    | 234/452 [1:04:32<1:00:07,  0.06it/s, v_num=5dwa, train_loss=0.0093, moving_avg_reward=0.742, reward=0.807]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  52%|█████▏    | 235/452 [1:04:53<59:55,  0.06it/s, v_num=5dwa, train_loss=1.090, moving_avg_reward=0.743, reward=0.922]   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  52%|█████▏    | 236/452 [1:05:15<59:43,  0.06it/s, v_num=5dwa, train_loss=0.668, moving_avg_reward=0.746, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  52%|█████▏    | 237/452 [1:05:30<59:25,  0.06it/s, v_num=5dwa, train_loss=0.515, moving_avg_reward=0.748, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  53%|█████▎    | 238/452 [1:05:47<59:09,  0.06it/s, v_num=5dwa, train_loss=0.656, moving_avg_reward=0.750, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  53%|█████▎    | 239/452 [1:06:04<58:53,  0.06it/s, v_num=5dwa, train_loss=1.250, moving_avg_reward=0.752, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  53%|█████▎    | 240/452 [1:06:23<58:38,  0.06it/s, v_num=5dwa, train_loss=0.846, moving_avg_reward=0.755, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  53%|█████▎    | 241/452 [1:06:36<58:19,  0.06it/s, v_num=5dwa, train_loss=0.357, moving_avg_reward=0.757, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  54%|█████▎    | 242/452 [1:06:53<58:03,  0.06it/s, v_num=5dwa, train_loss=0.829, moving_avg_reward=0.759, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  54%|█████▍    | 243/452 [1:07:07<57:43,  0.06it/s, v_num=5dwa, train_loss=1.020, moving_avg_reward=0.761, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  54%|█████▍    | 244/452 [1:07:22<57:26,  0.06it/s, v_num=5dwa, train_loss=0.0934, moving_avg_reward=0.763, reward=0.953]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  54%|█████▍    | 245/452 [1:07:38<57:08,  0.06it/s, v_num=5dwa, train_loss=0.681, moving_avg_reward=0.765, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  54%|█████▍    | 246/452 [1:07:56<56:53,  0.06it/s, v_num=5dwa, train_loss=1.100, moving_avg_reward=0.767, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  55%|█████▍    | 247/452 [1:08:07<56:32,  0.06it/s, v_num=5dwa, train_loss=0.271, moving_avg_reward=0.769, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  55%|█████▍    | 248/452 [1:08:21<56:13,  0.06it/s, v_num=5dwa, train_loss=0.764, moving_avg_reward=0.772, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  55%|█████▌    | 249/452 [1:08:36<55:56,  0.06it/s, v_num=5dwa, train_loss=0.493, moving_avg_reward=0.774, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  55%|█████▌    | 250/452 [1:08:51<55:38,  0.06it/s, v_num=5dwa, train_loss=1.360, moving_avg_reward=0.776, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  56%|█████▌    | 251/452 [1:09:06<55:20,  0.06it/s, v_num=5dwa, train_loss=0.265, moving_avg_reward=0.778, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  56%|█████▌    | 252/452 [1:09:20<55:01,  0.06it/s, v_num=5dwa, train_loss=0.700, moving_avg_reward=0.780, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  56%|█████▌    | 253/452 [1:09:34<54:43,  0.06it/s, v_num=5dwa, train_loss=0.458, moving_avg_reward=0.782, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  56%|█████▌    | 254/452 [1:09:49<54:25,  0.06it/s, v_num=5dwa, train_loss=0.347, moving_avg_reward=0.784, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  56%|█████▋    | 255/452 [1:10:07<54:10,  0.06it/s, v_num=5dwa, train_loss=0.436, moving_avg_reward=0.785, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  57%|█████▋    | 256/452 [1:10:21<53:52,  0.06it/s, v_num=5dwa, train_loss=0.486, moving_avg_reward=0.787, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  57%|█████▋    | 257/452 [1:10:38<53:35,  0.06it/s, v_num=5dwa, train_loss=0.251, moving_avg_reward=0.789, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  57%|█████▋    | 258/452 [1:10:53<53:18,  0.06it/s, v_num=5dwa, train_loss=0.858, moving_avg_reward=0.791, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  57%|█████▋    | 259/452 [1:11:08<53:01,  0.06it/s, v_num=5dwa, train_loss=0.204, moving_avg_reward=0.793, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  58%|█████▊    | 260/452 [1:11:27<52:46,  0.06it/s, v_num=5dwa, train_loss=0.688, moving_avg_reward=0.795, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  58%|█████▊    | 261/452 [1:11:49<52:33,  0.06it/s, v_num=5dwa, train_loss=0.391, moving_avg_reward=0.796, reward=0.923]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  58%|█████▊    | 262/452 [1:12:05<52:17,  0.06it/s, v_num=5dwa, train_loss=0.987, moving_avg_reward=0.798, reward=0.969]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  58%|█████▊    | 263/452 [1:12:19<51:58,  0.06it/s, v_num=5dwa, train_loss=0.821, moving_avg_reward=0.800, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  58%|█████▊    | 264/452 [1:12:34<51:41,  0.06it/s, v_num=5dwa, train_loss=0.349, moving_avg_reward=0.801, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  59%|█████▊    | 265/452 [1:12:48<51:22,  0.06it/s, v_num=5dwa, train_loss=0.384, moving_avg_reward=0.803, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  59%|█████▉    | 266/452 [1:13:04<51:05,  0.06it/s, v_num=5dwa, train_loss=0.553, moving_avg_reward=0.805, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  59%|█████▉    | 267/452 [1:13:17<50:47,  0.06it/s, v_num=5dwa, train_loss=0.463, moving_avg_reward=0.806, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  59%|█████▉    | 268/452 [1:13:29<50:27,  0.06it/s, v_num=5dwa, train_loss=0.354, moving_avg_reward=0.808, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  60%|█████▉    | 269/452 [1:13:45<50:10,  0.06it/s, v_num=5dwa, train_loss=0.623, moving_avg_reward=0.810, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  60%|█████▉    | 270/452 [1:13:58<49:51,  0.06it/s, v_num=5dwa, train_loss=0.480, moving_avg_reward=0.811, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  60%|█████▉    | 271/452 [1:14:12<49:33,  0.06it/s, v_num=5dwa, train_loss=0.519, moving_avg_reward=0.813, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  60%|██████    | 272/452 [1:14:25<49:15,  0.06it/s, v_num=5dwa, train_loss=0.205, moving_avg_reward=0.815, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  60%|██████    | 273/452 [1:14:38<48:56,  0.06it/s, v_num=5dwa, train_loss=0.401, moving_avg_reward=0.816, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  61%|██████    | 274/452 [1:14:51<48:37,  0.06it/s, v_num=5dwa, train_loss=0.553, moving_avg_reward=0.818, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  61%|██████    | 275/452 [1:15:05<48:19,  0.06it/s, v_num=5dwa, train_loss=0.361, moving_avg_reward=0.820, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  61%|██████    | 276/452 [1:15:22<48:03,  0.06it/s, v_num=5dwa, train_loss=0.581, moving_avg_reward=0.821, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  61%|██████▏   | 277/452 [1:15:34<47:44,  0.06it/s, v_num=5dwa, train_loss=0.236, moving_avg_reward=0.823, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  62%|██████▏   | 278/452 [1:15:49<47:27,  0.06it/s, v_num=5dwa, train_loss=0.373, moving_avg_reward=0.824, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  62%|██████▏   | 279/452 [1:16:06<47:11,  0.06it/s, v_num=5dwa, train_loss=0.459, moving_avg_reward=0.826, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  62%|██████▏   | 280/452 [1:16:23<46:55,  0.06it/s, v_num=5dwa, train_loss=0.310, moving_avg_reward=0.827, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  62%|██████▏   | 281/452 [1:16:36<46:37,  0.06it/s, v_num=5dwa, train_loss=0.449, moving_avg_reward=0.829, reward=0.969]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  62%|██████▏   | 282/452 [1:16:55<46:22,  0.06it/s, v_num=5dwa, train_loss=0.337, moving_avg_reward=0.830, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  63%|██████▎   | 283/452 [1:17:10<46:05,  0.06it/s, v_num=5dwa, train_loss=0.501, moving_avg_reward=0.832, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  63%|██████▎   | 284/452 [1:17:24<45:47,  0.06it/s, v_num=5dwa, train_loss=0.435, moving_avg_reward=0.833, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  63%|██████▎   | 285/452 [1:17:42<45:32,  0.06it/s, v_num=5dwa, train_loss=0.229, moving_avg_reward=0.835, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  63%|██████▎   | 286/452 [1:17:57<45:15,  0.06it/s, v_num=5dwa, train_loss=0.160, moving_avg_reward=0.836, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  63%|██████▎   | 287/452 [1:18:16<45:00,  0.06it/s, v_num=5dwa, train_loss=0.229, moving_avg_reward=0.837, reward=0.965]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  64%|██████▎   | 288/452 [1:18:35<44:45,  0.06it/s, v_num=5dwa, train_loss=0.470, moving_avg_reward=0.839, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  64%|██████▍   | 289/452 [1:18:54<44:30,  0.06it/s, v_num=5dwa, train_loss=0.328, moving_avg_reward=0.840, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  64%|██████▍   | 290/452 [1:19:14<44:16,  0.06it/s, v_num=5dwa, train_loss=0.220, moving_avg_reward=0.841, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  64%|██████▍   | 291/452 [1:19:36<44:02,  0.06it/s, v_num=5dwa, train_loss=0.182, moving_avg_reward=0.842, reward=0.919]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  65%|██████▍   | 292/452 [1:19:58<43:49,  0.06it/s, v_num=5dwa, train_loss=0.528, moving_avg_reward=0.843, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  65%|██████▍   | 293/452 [1:20:18<43:34,  0.06it/s, v_num=5dwa, train_loss=0.256, moving_avg_reward=0.845, reward=0.967]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  65%|██████▌   | 294/452 [1:20:36<43:19,  0.06it/s, v_num=5dwa, train_loss=0.256, moving_avg_reward=0.846, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  65%|██████▌   | 295/452 [1:20:55<43:04,  0.06it/s, v_num=5dwa, train_loss=0.333, moving_avg_reward=0.847, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  65%|██████▌   | 296/452 [1:21:14<42:48,  0.06it/s, v_num=5dwa, train_loss=0.460, moving_avg_reward=0.848, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  66%|██████▌   | 297/452 [1:21:32<42:33,  0.06it/s, v_num=5dwa, train_loss=0.809, moving_avg_reward=0.850, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  66%|██████▌   | 298/452 [1:21:54<42:19,  0.06it/s, v_num=5dwa, train_loss=0.271, moving_avg_reward=0.851, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  66%|██████▌   | 299/452 [1:22:15<42:05,  0.06it/s, v_num=5dwa, train_loss=0.185, moving_avg_reward=0.852, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  66%|██████▋   | 300/452 [1:22:32<41:49,  0.06it/s, v_num=5dwa, train_loss=0.416, moving_avg_reward=0.853, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  67%|██████▋   | 301/452 [1:22:54<41:35,  0.06it/s, v_num=5dwa, train_loss=0.447, moving_avg_reward=0.855, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  67%|██████▋   | 302/452 [1:23:15<41:21,  0.06it/s, v_num=5dwa, train_loss=-2.03, moving_avg_reward=0.855, reward=0.923]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  67%|██████▋   | 303/452 [1:23:37<41:07,  0.06it/s, v_num=5dwa, train_loss=-1.94, moving_avg_reward=0.856, reward=0.905]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  67%|██████▋   | 304/452 [1:23:58<40:52,  0.06it/s, v_num=5dwa, train_loss=0.00799, moving_avg_reward=0.856, reward=0.918]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  67%|██████▋   | 305/452 [1:24:17<40:37,  0.06it/s, v_num=5dwa, train_loss=0.518, moving_avg_reward=0.858, reward=0.974]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  68%|██████▊   | 306/452 [1:24:36<40:21,  0.06it/s, v_num=5dwa, train_loss=0.443, moving_avg_reward=0.859, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  68%|██████▊   | 307/452 [1:24:53<40:05,  0.06it/s, v_num=5dwa, train_loss=0.342, moving_avg_reward=0.860, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  68%|██████▊   | 308/452 [1:25:08<39:48,  0.06it/s, v_num=5dwa, train_loss=0.330, moving_avg_reward=0.861, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  68%|██████▊   | 309/452 [1:25:25<39:31,  0.06it/s, v_num=5dwa, train_loss=0.293, moving_avg_reward=0.862, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  69%|██████▊   | 310/452 [1:25:38<39:13,  0.06it/s, v_num=5dwa, train_loss=0.538, moving_avg_reward=0.863, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  69%|██████▉   | 311/452 [1:25:53<38:56,  0.06it/s, v_num=5dwa, train_loss=0.436, moving_avg_reward=0.864, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  69%|██████▉   | 312/452 [1:26:06<38:38,  0.06it/s, v_num=5dwa, train_loss=0.315, moving_avg_reward=0.866, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  69%|██████▉   | 313/452 [1:26:20<38:20,  0.06it/s, v_num=5dwa, train_loss=0.177, moving_avg_reward=0.867, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  69%|██████▉   | 314/452 [1:26:32<38:02,  0.06it/s, v_num=5dwa, train_loss=0.162, moving_avg_reward=0.868, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  70%|██████▉   | 315/452 [1:26:44<37:43,  0.06it/s, v_num=5dwa, train_loss=0.223, moving_avg_reward=0.869, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  70%|██████▉   | 316/452 [1:26:58<37:25,  0.06it/s, v_num=5dwa, train_loss=0.221, moving_avg_reward=0.870, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  70%|███████   | 317/452 [1:27:15<37:09,  0.06it/s, v_num=5dwa, train_loss=0.307, moving_avg_reward=0.871, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  70%|███████   | 318/452 [1:27:29<36:51,  0.06it/s, v_num=5dwa, train_loss=0.234, moving_avg_reward=0.872, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  71%|███████   | 319/452 [1:27:41<36:33,  0.06it/s, v_num=5dwa, train_loss=0.242, moving_avg_reward=0.873, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  71%|███████   | 320/452 [1:27:54<36:15,  0.06it/s, v_num=5dwa, train_loss=0.215, moving_avg_reward=0.874, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  71%|███████   | 321/452 [1:28:08<35:58,  0.06it/s, v_num=5dwa, train_loss=0.363, moving_avg_reward=0.875, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  71%|███████   | 322/452 [1:28:20<35:40,  0.06it/s, v_num=5dwa, train_loss=0.162, moving_avg_reward=0.876, reward=0.951]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  71%|███████▏  | 323/452 [1:28:32<35:21,  0.06it/s, v_num=5dwa, train_loss=0.322, moving_avg_reward=0.877, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  72%|███████▏  | 324/452 [1:28:42<35:02,  0.06it/s, v_num=5dwa, train_loss=0.178, moving_avg_reward=0.878, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  72%|███████▏  | 325/452 [1:28:54<34:44,  0.06it/s, v_num=5dwa, train_loss=0.509, moving_avg_reward=0.879, reward=0.966]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  72%|███████▏  | 326/452 [1:29:08<34:27,  0.06it/s, v_num=5dwa, train_loss=0.241, moving_avg_reward=0.880, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  72%|███████▏  | 327/452 [1:29:29<34:12,  0.06it/s, v_num=5dwa, train_loss=0.599, moving_avg_reward=0.881, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  73%|███████▎  | 328/452 [1:29:42<33:54,  0.06it/s, v_num=5dwa, train_loss=0.368, moving_avg_reward=0.881, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  73%|███████▎  | 329/452 [1:29:58<33:38,  0.06it/s, v_num=5dwa, train_loss=0.145, moving_avg_reward=0.882, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  73%|███████▎  | 330/452 [1:30:11<33:20,  0.06it/s, v_num=5dwa, train_loss=0.322, moving_avg_reward=0.883, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  73%|███████▎  | 331/452 [1:30:21<33:01,  0.06it/s, v_num=5dwa, train_loss=0.252, moving_avg_reward=0.884, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  73%|███████▎  | 332/452 [1:30:32<32:43,  0.06it/s, v_num=5dwa, train_loss=0.230, moving_avg_reward=0.885, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  74%|███████▎  | 333/452 [1:30:47<32:26,  0.06it/s, v_num=5dwa, train_loss=0.219, moving_avg_reward=0.886, reward=0.966]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  74%|███████▍  | 334/452 [1:31:05<32:10,  0.06it/s, v_num=5dwa, train_loss=0.132, moving_avg_reward=0.887, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  74%|███████▍  | 335/452 [1:31:19<31:53,  0.06it/s, v_num=5dwa, train_loss=0.0866, moving_avg_reward=0.888, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  74%|███████▍  | 336/452 [1:31:36<31:37,  0.06it/s, v_num=5dwa, train_loss=0.328, moving_avg_reward=0.889, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  75%|███████▍  | 337/452 [1:31:48<31:19,  0.06it/s, v_num=5dwa, train_loss=0.145, moving_avg_reward=0.890, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  75%|███████▍  | 338/452 [1:32:03<31:03,  0.06it/s, v_num=5dwa, train_loss=0.189, moving_avg_reward=0.890, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  75%|███████▌  | 339/452 [1:32:19<30:46,  0.06it/s, v_num=5dwa, train_loss=0.216, moving_avg_reward=0.891, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  75%|███████▌  | 340/452 [1:32:33<30:29,  0.06it/s, v_num=5dwa, train_loss=0.308, moving_avg_reward=0.892, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  75%|███████▌  | 341/452 [1:32:51<30:13,  0.06it/s, v_num=5dwa, train_loss=0.237, moving_avg_reward=0.893, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  76%|███████▌  | 342/452 [1:33:07<29:57,  0.06it/s, v_num=5dwa, train_loss=0.181, moving_avg_reward=0.894, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  76%|███████▌  | 343/452 [1:33:21<29:40,  0.06it/s, v_num=5dwa, train_loss=0.195, moving_avg_reward=0.895, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  76%|███████▌  | 344/452 [1:33:34<29:22,  0.06it/s, v_num=5dwa, train_loss=0.248, moving_avg_reward=0.895, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  76%|███████▋  | 345/452 [1:33:48<29:05,  0.06it/s, v_num=5dwa, train_loss=0.168, moving_avg_reward=0.896, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  77%|███████▋  | 346/452 [1:34:00<28:47,  0.06it/s, v_num=5dwa, train_loss=0.136, moving_avg_reward=0.897, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  77%|███████▋  | 347/452 [1:34:12<28:30,  0.06it/s, v_num=5dwa, train_loss=0.133, moving_avg_reward=0.898, reward=0.966]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  77%|███████▋  | 348/452 [1:34:25<28:13,  0.06it/s, v_num=5dwa, train_loss=0.214, moving_avg_reward=0.898, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  77%|███████▋  | 349/452 [1:34:36<27:55,  0.06it/s, v_num=5dwa, train_loss=0.128, moving_avg_reward=0.899, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  77%|███████▋  | 350/452 [1:34:46<27:37,  0.06it/s, v_num=5dwa, train_loss=0.292, moving_avg_reward=0.900, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  78%|███████▊  | 351/452 [1:35:00<27:20,  0.06it/s, v_num=5dwa, train_loss=0.135, moving_avg_reward=0.901, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  78%|███████▊  | 352/452 [1:35:15<27:03,  0.06it/s, v_num=5dwa, train_loss=0.117, moving_avg_reward=0.901, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  78%|███████▊  | 353/452 [1:35:27<26:46,  0.06it/s, v_num=5dwa, train_loss=0.210, moving_avg_reward=0.902, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  78%|███████▊  | 354/452 [1:35:40<26:29,  0.06it/s, v_num=5dwa, train_loss=0.129, moving_avg_reward=0.903, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  79%|███████▊  | 355/452 [1:35:54<26:12,  0.06it/s, v_num=5dwa, train_loss=0.163, moving_avg_reward=0.904, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  79%|███████▉  | 356/452 [1:36:06<25:54,  0.06it/s, v_num=5dwa, train_loss=0.151, moving_avg_reward=0.904, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  79%|███████▉  | 357/452 [1:36:16<25:37,  0.06it/s, v_num=5dwa, train_loss=0.118, moving_avg_reward=0.905, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  79%|███████▉  | 358/452 [1:36:28<25:19,  0.06it/s, v_num=5dwa, train_loss=0.124, moving_avg_reward=0.906, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  79%|███████▉  | 359/452 [1:36:41<25:02,  0.06it/s, v_num=5dwa, train_loss=0.345, moving_avg_reward=0.907, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  80%|███████▉  | 360/452 [1:36:57<24:46,  0.06it/s, v_num=5dwa, train_loss=0.0862, moving_avg_reward=0.907, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  80%|███████▉  | 361/452 [1:37:16<24:31,  0.06it/s, v_num=5dwa, train_loss=0.372, moving_avg_reward=0.908, reward=0.974] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  80%|████████  | 362/452 [1:37:29<24:14,  0.06it/s, v_num=5dwa, train_loss=0.344, moving_avg_reward=0.909, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  80%|████████  | 363/452 [1:37:41<23:57,  0.06it/s, v_num=5dwa, train_loss=0.136, moving_avg_reward=0.909, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  81%|████████  | 364/452 [1:37:57<23:40,  0.06it/s, v_num=5dwa, train_loss=0.206, moving_avg_reward=0.910, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  81%|████████  | 365/452 [1:38:08<23:23,  0.06it/s, v_num=5dwa, train_loss=0.302, moving_avg_reward=0.911, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  81%|████████  | 366/452 [1:38:25<23:07,  0.06it/s, v_num=5dwa, train_loss=0.159, moving_avg_reward=0.911, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  81%|████████  | 367/452 [1:38:39<22:50,  0.06it/s, v_num=5dwa, train_loss=0.201, moving_avg_reward=0.912, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  81%|████████▏ | 368/452 [1:38:54<22:34,  0.06it/s, v_num=5dwa, train_loss=0.140, moving_avg_reward=0.912, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  82%|████████▏ | 369/452 [1:39:08<22:18,  0.06it/s, v_num=5dwa, train_loss=0.276, moving_avg_reward=0.913, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  82%|████████▏ | 370/452 [1:39:23<22:01,  0.06it/s, v_num=5dwa, train_loss=0.227, moving_avg_reward=0.914, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  82%|████████▏ | 371/452 [1:39:34<21:44,  0.06it/s, v_num=5dwa, train_loss=0.121, moving_avg_reward=0.914, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  82%|████████▏ | 372/452 [1:39:49<21:28,  0.06it/s, v_num=5dwa, train_loss=0.131, moving_avg_reward=0.915, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  83%|████████▎ | 373/452 [1:40:06<21:12,  0.06it/s, v_num=5dwa, train_loss=0.277, moving_avg_reward=0.915, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  83%|████████▎ | 374/452 [1:40:20<20:55,  0.06it/s, v_num=5dwa, train_loss=0.114, moving_avg_reward=0.916, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  83%|████████▎ | 375/452 [1:40:33<20:38,  0.06it/s, v_num=5dwa, train_loss=0.118, moving_avg_reward=0.917, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  83%|████████▎ | 376/452 [1:40:50<20:22,  0.06it/s, v_num=5dwa, train_loss=0.136, moving_avg_reward=0.917, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  83%|████████▎ | 377/452 [1:41:09<20:07,  0.06it/s, v_num=5dwa, train_loss=0.226, moving_avg_reward=0.918, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  84%|████████▎ | 378/452 [1:41:26<19:51,  0.06it/s, v_num=5dwa, train_loss=0.127, moving_avg_reward=0.919, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  84%|████████▍ | 379/452 [1:41:38<19:34,  0.06it/s, v_num=5dwa, train_loss=0.0911, moving_avg_reward=0.919, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  84%|████████▍ | 380/452 [1:41:53<19:18,  0.06it/s, v_num=5dwa, train_loss=0.179, moving_avg_reward=0.920, reward=0.975] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  84%|████████▍ | 381/452 [1:42:05<19:01,  0.06it/s, v_num=5dwa, train_loss=0.0863, moving_avg_reward=0.920, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  85%|████████▍ | 382/452 [1:42:20<18:45,  0.06it/s, v_num=5dwa, train_loss=0.135, moving_avg_reward=0.921, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  85%|████████▍ | 383/452 [1:42:35<18:29,  0.06it/s, v_num=5dwa, train_loss=0.214, moving_avg_reward=0.921, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  85%|████████▍ | 384/452 [1:42:47<18:12,  0.06it/s, v_num=5dwa, train_loss=0.180, moving_avg_reward=0.922, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  85%|████████▌ | 385/452 [1:43:04<17:56,  0.06it/s, v_num=5dwa, train_loss=0.104, moving_avg_reward=0.922, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  85%|████████▌ | 386/452 [1:43:18<17:39,  0.06it/s, v_num=5dwa, train_loss=0.143, moving_avg_reward=0.923, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  86%|████████▌ | 387/452 [1:43:28<17:22,  0.06it/s, v_num=5dwa, train_loss=0.112, moving_avg_reward=0.923, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  86%|████████▌ | 388/452 [1:43:43<17:06,  0.06it/s, v_num=5dwa, train_loss=0.0952, moving_avg_reward=0.924, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  86%|████████▌ | 389/452 [1:44:00<16:50,  0.06it/s, v_num=5dwa, train_loss=0.0776, moving_avg_reward=0.924, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  86%|████████▋ | 390/452 [1:44:15<16:34,  0.06it/s, v_num=5dwa, train_loss=0.134, moving_avg_reward=0.925, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  87%|████████▋ | 391/452 [1:44:34<16:18,  0.06it/s, v_num=5dwa, train_loss=0.0836, moving_avg_reward=0.926, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  87%|████████▋ | 392/452 [1:44:46<16:02,  0.06it/s, v_num=5dwa, train_loss=0.102, moving_avg_reward=0.926, reward=0.969] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  87%|████████▋ | 393/452 [1:44:58<15:45,  0.06it/s, v_num=5dwa, train_loss=-21.0, moving_avg_reward=0.926, reward=0.946]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  87%|████████▋ | 394/452 [1:45:15<15:29,  0.06it/s, v_num=5dwa, train_loss=0.227, moving_avg_reward=0.927, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  87%|████████▋ | 395/452 [1:45:27<15:13,  0.06it/s, v_num=5dwa, train_loss=0.0805, moving_avg_reward=0.927, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  88%|████████▊ | 396/452 [1:45:42<14:56,  0.06it/s, v_num=5dwa, train_loss=0.0781, moving_avg_reward=0.928, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  88%|████████▊ | 397/452 [1:46:01<14:41,  0.06it/s, v_num=5dwa, train_loss=0.111, moving_avg_reward=0.928, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  88%|████████▊ | 398/452 [1:46:16<14:25,  0.06it/s, v_num=5dwa, train_loss=0.0663, moving_avg_reward=0.929, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  88%|████████▊ | 399/452 [1:46:33<14:09,  0.06it/s, v_num=5dwa, train_loss=0.188, moving_avg_reward=0.929, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  88%|████████▊ | 400/452 [1:46:53<13:53,  0.06it/s, v_num=5dwa, train_loss=0.176, moving_avg_reward=0.930, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  89%|████████▊ | 401/452 [1:47:06<13:37,  0.06it/s, v_num=5dwa, train_loss=0.170, moving_avg_reward=0.930, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  89%|████████▉ | 402/452 [1:47:21<13:21,  0.06it/s, v_num=5dwa, train_loss=0.165, moving_avg_reward=0.930, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  89%|████████▉ | 403/452 [1:47:35<13:04,  0.06it/s, v_num=5dwa, train_loss=0.121, moving_avg_reward=0.931, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  89%|████████▉ | 404/452 [1:47:52<12:48,  0.06it/s, v_num=5dwa, train_loss=0.177, moving_avg_reward=0.931, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  90%|████████▉ | 405/452 [1:48:01<12:32,  0.06it/s, v_num=5dwa, train_loss=0.116, moving_avg_reward=0.932, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  90%|████████▉ | 406/452 [1:48:11<12:15,  0.06it/s, v_num=5dwa, train_loss=0.191, moving_avg_reward=0.932, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  90%|█████████ | 407/452 [1:48:31<11:59,  0.06it/s, v_num=5dwa, train_loss=0.276, moving_avg_reward=0.933, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  90%|█████████ | 408/452 [1:48:53<11:44,  0.06it/s, v_num=5dwa, train_loss=0.150, moving_avg_reward=0.933, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  90%|█████████ | 409/452 [1:49:13<11:28,  0.06it/s, v_num=5dwa, train_loss=0.138, moving_avg_reward=0.934, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  91%|█████████ | 410/452 [1:49:30<11:13,  0.06it/s, v_num=5dwa, train_loss=0.0574, moving_avg_reward=0.934, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  91%|█████████ | 411/452 [1:49:46<10:57,  0.06it/s, v_num=5dwa, train_loss=0.0657, moving_avg_reward=0.934, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  91%|█████████ | 412/452 [1:50:02<10:40,  0.06it/s, v_num=5dwa, train_loss=0.0629, moving_avg_reward=0.935, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  91%|█████████▏| 413/452 [1:50:22<10:25,  0.06it/s, v_num=5dwa, train_loss=0.0915, moving_avg_reward=0.935, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  92%|█████████▏| 414/452 [1:50:37<10:09,  0.06it/s, v_num=5dwa, train_loss=0.0319, moving_avg_reward=0.936, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  92%|█████████▏| 415/452 [1:50:55<09:53,  0.06it/s, v_num=5dwa, train_loss=0.179, moving_avg_reward=0.936, reward=0.970] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  92%|█████████▏| 416/452 [1:51:10<09:37,  0.06it/s, v_num=5dwa, train_loss=0.0722, moving_avg_reward=0.936, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  92%|█████████▏| 417/452 [1:51:32<09:21,  0.06it/s, v_num=5dwa, train_loss=-0.73, moving_avg_reward=0.936, reward=0.917] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  92%|█████████▏| 418/452 [1:51:48<09:05,  0.06it/s, v_num=5dwa, train_loss=0.0807, moving_avg_reward=0.937, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  93%|█████████▎| 419/452 [1:51:56<08:49,  0.06it/s, v_num=5dwa, train_loss=0.0985, moving_avg_reward=0.937, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  93%|█████████▎| 420/452 [1:52:08<08:32,  0.06it/s, v_num=5dwa, train_loss=0.0567, moving_avg_reward=0.937, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  93%|█████████▎| 421/452 [1:52:18<08:16,  0.06it/s, v_num=5dwa, train_loss=0.0802, moving_avg_reward=0.938, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  93%|█████████▎| 422/452 [1:52:27<07:59,  0.06it/s, v_num=5dwa, train_loss=0.072, moving_avg_reward=0.938, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  94%|█████████▎| 423/452 [1:52:41<07:43,  0.06it/s, v_num=5dwa, train_loss=0.148, moving_avg_reward=0.939, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  94%|█████████▍| 424/452 [1:52:51<07:27,  0.06it/s, v_num=5dwa, train_loss=0.130, moving_avg_reward=0.939, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  94%|█████████▍| 425/452 [1:53:03<07:10,  0.06it/s, v_num=5dwa, train_loss=0.0215, moving_avg_reward=0.939, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  94%|█████████▍| 426/452 [1:53:13<06:54,  0.06it/s, v_num=5dwa, train_loss=0.0457, moving_avg_reward=0.940, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  94%|█████████▍| 427/452 [1:53:22<06:38,  0.06it/s, v_num=5dwa, train_loss=0.0589, moving_avg_reward=0.940, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  95%|█████████▍| 428/452 [1:53:31<06:21,  0.06it/s, v_num=5dwa, train_loss=0.0793, moving_avg_reward=0.940, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  95%|█████████▍| 429/452 [1:53:40<06:05,  0.06it/s, v_num=5dwa, train_loss=0.114, moving_avg_reward=0.941, reward=0.972] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  95%|█████████▌| 430/452 [1:53:51<05:49,  0.06it/s, v_num=5dwa, train_loss=0.0758, moving_avg_reward=0.941, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  95%|█████████▌| 431/452 [1:54:00<05:33,  0.06it/s, v_num=5dwa, train_loss=0.0388, moving_avg_reward=0.942, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  96%|█████████▌| 432/452 [1:54:08<05:17,  0.06it/s, v_num=5dwa, train_loss=0.0474, moving_avg_reward=0.942, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  96%|█████████▌| 433/452 [1:54:18<05:00,  0.06it/s, v_num=5dwa, train_loss=0.0499, moving_avg_reward=0.942, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  96%|█████████▌| 434/452 [1:54:26<04:44,  0.06it/s, v_num=5dwa, train_loss=0.0413, moving_avg_reward=0.943, reward=0.983]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  96%|█████████▌| 435/452 [1:54:33<04:28,  0.06it/s, v_num=5dwa, train_loss=0.0602, moving_avg_reward=0.943, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  96%|█████████▋| 436/452 [1:54:42<04:12,  0.06it/s, v_num=5dwa, train_loss=0.0663, moving_avg_reward=0.943, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  97%|█████████▋| 437/452 [1:54:50<03:56,  0.06it/s, v_num=5dwa, train_loss=0.0914, moving_avg_reward=0.944, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  97%|█████████▋| 438/452 [1:54:59<03:40,  0.06it/s, v_num=5dwa, train_loss=0.0398, moving_avg_reward=0.944, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  97%|█████████▋| 439/452 [1:55:08<03:24,  0.06it/s, v_num=5dwa, train_loss=0.101, moving_avg_reward=0.944, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  97%|█████████▋| 440/452 [1:55:16<03:08,  0.06it/s, v_num=5dwa, train_loss=0.0635, moving_avg_reward=0.945, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  98%|█████████▊| 441/452 [1:55:29<02:52,  0.06it/s, v_num=5dwa, train_loss=0.139, moving_avg_reward=0.945, reward=0.974] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  98%|█████████▊| 442/452 [1:55:38<02:36,  0.06it/s, v_num=5dwa, train_loss=0.0503, moving_avg_reward=0.945, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  98%|█████████▊| 443/452 [1:55:46<02:21,  0.06it/s, v_num=5dwa, train_loss=0.0615, moving_avg_reward=0.946, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  98%|█████████▊| 444/452 [1:55:56<02:05,  0.06it/s, v_num=5dwa, train_loss=0.0461, moving_avg_reward=0.946, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  98%|█████████▊| 445/452 [1:56:03<01:49,  0.06it/s, v_num=5dwa, train_loss=0.0475, moving_avg_reward=0.946, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  99%|█████████▊| 446/452 [1:56:12<01:33,  0.06it/s, v_num=5dwa, train_loss=0.0246, moving_avg_reward=0.946, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  99%|█████████▉| 447/452 [1:56:21<01:18,  0.06it/s, v_num=5dwa, train_loss=0.0348, moving_avg_reward=0.947, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  99%|█████████▉| 448/452 [1:56:30<01:02,  0.06it/s, v_num=5dwa, train_loss=0.105, moving_avg_reward=0.947, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1:  99%|█████████▉| 449/452 [1:56:41<00:46,  0.06it/s, v_num=5dwa, train_loss=0.140, moving_avg_reward=0.947, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1: 100%|█████████▉| 450/452 [1:56:53<00:31,  0.06it/s, v_num=5dwa, train_loss=0.0621, moving_avg_reward=0.948, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 1: 100%|█████████▉| 451/452 [1:57:02<00:15,  0.06it/s, v_num=5dwa, train_loss=0.053, moving_avg_reward=0.948, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   0%|          | 0/452 [00:00<?, ?it/s, v_num=5dwa, train_loss=0.0428, moving_avg_reward=0.948, reward=0.976]            

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   0%|          | 1/452 [00:08<1:06:19,  0.11it/s, v_num=5dwa, train_loss=0.0729, moving_avg_reward=0.948, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   0%|          | 2/452 [00:17<1:05:15,  0.11it/s, v_num=5dwa, train_loss=0.0525, moving_avg_reward=0.949, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   1%|          | 3/452 [00:26<1:06:51,  0.11it/s, v_num=5dwa, train_loss=0.0573, moving_avg_reward=0.949, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   1%|          | 4/452 [00:37<1:09:37,  0.11it/s, v_num=5dwa, train_loss=0.0153, moving_avg_reward=0.949, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   1%|          | 5/452 [00:51<1:16:16,  0.10it/s, v_num=5dwa, train_loss=0.188, moving_avg_reward=0.950, reward=0.972] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   1%|▏         | 6/452 [00:59<1:14:10,  0.10it/s, v_num=5dwa, train_loss=0.0404, moving_avg_reward=0.950, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   2%|▏         | 7/452 [01:08<1:12:58,  0.10it/s, v_num=5dwa, train_loss=0.059, moving_avg_reward=0.950, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   2%|▏         | 8/452 [01:17<1:11:43,  0.10it/s, v_num=5dwa, train_loss=0.0763, moving_avg_reward=0.950, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   2%|▏         | 9/452 [01:27<1:11:51,  0.10it/s, v_num=5dwa, train_loss=0.0835, moving_avg_reward=0.951, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   2%|▏         | 10/452 [01:34<1:09:45,  0.11it/s, v_num=5dwa, train_loss=0.0395, moving_avg_reward=0.951, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   2%|▏         | 11/452 [01:45<1:10:11,  0.10it/s, v_num=5dwa, train_loss=0.0316, moving_avg_reward=0.951, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   3%|▎         | 12/452 [01:52<1:08:41,  0.11it/s, v_num=5dwa, train_loss=0.0672, moving_avg_reward=0.951, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   3%|▎         | 13/452 [02:01<1:08:10,  0.11it/s, v_num=5dwa, train_loss=0.0547, moving_avg_reward=0.952, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   3%|▎         | 14/452 [02:08<1:07:07,  0.11it/s, v_num=5dwa, train_loss=0.0446, moving_avg_reward=0.952, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   3%|▎         | 15/452 [02:18<1:07:28,  0.11it/s, v_num=5dwa, train_loss=0.013, moving_avg_reward=0.952, reward=0.973] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   4%|▎         | 16/452 [02:27<1:07:02,  0.11it/s, v_num=5dwa, train_loss=0.0339, moving_avg_reward=0.952, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   4%|▍         | 17/452 [02:36<1:06:38,  0.11it/s, v_num=5dwa, train_loss=0.0264, moving_avg_reward=0.953, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   4%|▍         | 18/452 [02:48<1:07:33,  0.11it/s, v_num=5dwa, train_loss=0.0558, moving_avg_reward=0.953, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   4%|▍         | 19/452 [02:58<1:07:44,  0.11it/s, v_num=5dwa, train_loss=0.0289, moving_avg_reward=0.953, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   4%|▍         | 20/452 [03:08<1:08:00,  0.11it/s, v_num=5dwa, train_loss=0.0378, moving_avg_reward=0.953, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   5%|▍         | 21/452 [03:18<1:07:55,  0.11it/s, v_num=5dwa, train_loss=0.0533, moving_avg_reward=0.954, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   5%|▍         | 22/452 [03:30<1:08:33,  0.10it/s, v_num=5dwa, train_loss=0.0713, moving_avg_reward=0.954, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   5%|▌         | 23/452 [03:39<1:08:06,  0.10it/s, v_num=5dwa, train_loss=0.0515, moving_avg_reward=0.954, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   5%|▌         | 24/452 [03:47<1:07:41,  0.11it/s, v_num=5dwa, train_loss=0.0425, moving_avg_reward=0.954, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   6%|▌         | 25/452 [03:56<1:07:18,  0.11it/s, v_num=5dwa, train_loss=-0.875, moving_avg_reward=0.955, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   6%|▌         | 26/452 [04:08<1:07:48,  0.10it/s, v_num=5dwa, train_loss=0.0284, moving_avg_reward=0.955, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   6%|▌         | 27/452 [04:18<1:07:46,  0.10it/s, v_num=5dwa, train_loss=0.0327, moving_avg_reward=0.955, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   6%|▌         | 28/452 [04:28<1:07:47,  0.10it/s, v_num=5dwa, train_loss=0.0375, moving_avg_reward=0.955, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   6%|▋         | 29/452 [04:38<1:07:47,  0.10it/s, v_num=5dwa, train_loss=0.0608, moving_avg_reward=0.955, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   7%|▋         | 30/452 [04:49<1:07:50,  0.10it/s, v_num=5dwa, train_loss=0.0595, moving_avg_reward=0.956, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   7%|▋         | 31/452 [04:58<1:07:27,  0.10it/s, v_num=5dwa, train_loss=0.053, moving_avg_reward=0.956, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   7%|▋         | 32/452 [05:06<1:07:04,  0.10it/s, v_num=5dwa, train_loss=0.0266, moving_avg_reward=0.956, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   7%|▋         | 33/452 [05:15<1:06:46,  0.10it/s, v_num=5dwa, train_loss=0.032, moving_avg_reward=0.956, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   8%|▊         | 34/452 [05:25<1:06:38,  0.10it/s, v_num=5dwa, train_loss=0.0515, moving_avg_reward=0.956, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   8%|▊         | 35/452 [05:36<1:06:54,  0.10it/s, v_num=5dwa, train_loss=0.0299, moving_avg_reward=0.957, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   8%|▊         | 36/452 [05:47<1:06:52,  0.10it/s, v_num=5dwa, train_loss=0.032, moving_avg_reward=0.957, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   8%|▊         | 37/452 [05:57<1:06:50,  0.10it/s, v_num=5dwa, train_loss=0.0623, moving_avg_reward=0.957, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   8%|▊         | 38/452 [06:06<1:06:38,  0.10it/s, v_num=5dwa, train_loss=0.0292, moving_avg_reward=0.957, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   9%|▊         | 39/452 [06:15<1:06:20,  0.10it/s, v_num=5dwa, train_loss=0.0478, moving_avg_reward=0.958, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   9%|▉         | 40/452 [06:24<1:06:03,  0.10it/s, v_num=5dwa, train_loss=0.0517, moving_avg_reward=0.958, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   9%|▉         | 41/452 [06:33<1:05:47,  0.10it/s, v_num=5dwa, train_loss=0.0232, moving_avg_reward=0.958, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:   9%|▉         | 42/452 [06:41<1:05:20,  0.10it/s, v_num=5dwa, train_loss=0.0475, moving_avg_reward=0.958, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  10%|▉         | 43/452 [06:50<1:05:04,  0.10it/s, v_num=5dwa, train_loss=0.0643, moving_avg_reward=0.958, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  10%|▉         | 44/452 [06:59<1:04:49,  0.10it/s, v_num=5dwa, train_loss=0.043, moving_avg_reward=0.959, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  10%|▉         | 45/452 [07:08<1:04:36,  0.10it/s, v_num=5dwa, train_loss=0.0509, moving_avg_reward=0.959, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  10%|█         | 46/452 [07:18<1:04:28,  0.10it/s, v_num=5dwa, train_loss=0.0346, moving_avg_reward=0.959, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  10%|█         | 47/452 [07:27<1:04:12,  0.11it/s, v_num=5dwa, train_loss=0.0319, moving_avg_reward=0.959, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  11%|█         | 48/452 [07:37<1:04:09,  0.10it/s, v_num=5dwa, train_loss=0.020, moving_avg_reward=0.959, reward=0.975] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  11%|█         | 49/452 [07:47<1:04:06,  0.10it/s, v_num=5dwa, train_loss=0.0292, moving_avg_reward=0.959, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  11%|█         | 50/452 [07:56<1:03:49,  0.10it/s, v_num=5dwa, train_loss=0.0248, moving_avg_reward=0.960, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  11%|█▏        | 51/452 [08:04<1:03:33,  0.11it/s, v_num=5dwa, train_loss=0.0236, moving_avg_reward=0.960, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  12%|█▏        | 52/452 [08:26<1:04:57,  0.10it/s, v_num=5dwa, train_loss=-0.373, moving_avg_reward=0.960, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  12%|█▏        | 53/452 [08:35<1:04:40,  0.10it/s, v_num=5dwa, train_loss=0.0232, moving_avg_reward=0.960, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  12%|█▏        | 54/452 [08:42<1:04:12,  0.10it/s, v_num=5dwa, train_loss=0.0239, moving_avg_reward=0.960, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  12%|█▏        | 55/452 [08:51<1:03:55,  0.10it/s, v_num=5dwa, train_loss=0.029, moving_avg_reward=0.960, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  12%|█▏        | 56/452 [09:02<1:03:54,  0.10it/s, v_num=5dwa, train_loss=0.116, moving_avg_reward=0.961, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  13%|█▎        | 57/452 [09:12<1:03:50,  0.10it/s, v_num=5dwa, train_loss=0.022, moving_avg_reward=0.961, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  13%|█▎        | 58/452 [09:24<1:03:51,  0.10it/s, v_num=5dwa, train_loss=0.0479, moving_avg_reward=0.961, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  13%|█▎        | 59/452 [09:34<1:03:45,  0.10it/s, v_num=5dwa, train_loss=0.0196, moving_avg_reward=0.961, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  13%|█▎        | 60/452 [09:44<1:03:38,  0.10it/s, v_num=5dwa, train_loss=0.0514, moving_avg_reward=0.961, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  13%|█▎        | 61/452 [09:54<1:03:29,  0.10it/s, v_num=5dwa, train_loss=0.0198, moving_avg_reward=0.961, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  14%|█▎        | 62/452 [10:04<1:03:24,  0.10it/s, v_num=5dwa, train_loss=0.0502, moving_avg_reward=0.961, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  14%|█▍        | 63/452 [10:15<1:03:19,  0.10it/s, v_num=5dwa, train_loss=0.0275, moving_avg_reward=0.962, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  14%|█▍        | 64/452 [10:25<1:03:12,  0.10it/s, v_num=5dwa, train_loss=0.0415, moving_avg_reward=0.962, reward=0.967]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  14%|█▍        | 65/452 [10:35<1:03:05,  0.10it/s, v_num=5dwa, train_loss=0.0283, moving_avg_reward=0.962, reward=0.969]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  15%|█▍        | 66/452 [10:44<1:02:49,  0.10it/s, v_num=5dwa, train_loss=0.0495, moving_avg_reward=0.962, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  15%|█▍        | 67/452 [10:54<1:02:41,  0.10it/s, v_num=5dwa, train_loss=0.0246, moving_avg_reward=0.962, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  15%|█▌        | 68/452 [11:04<1:02:33,  0.10it/s, v_num=5dwa, train_loss=0.0316, moving_avg_reward=0.962, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  15%|█▌        | 69/452 [11:15<1:02:27,  0.10it/s, v_num=5dwa, train_loss=0.0245, moving_avg_reward=0.962, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  15%|█▌        | 70/452 [11:28<1:02:39,  0.10it/s, v_num=5dwa, train_loss=0.036, moving_avg_reward=0.962, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  16%|█▌        | 71/452 [11:36<1:02:15,  0.10it/s, v_num=5dwa, train_loss=0.0163, moving_avg_reward=0.963, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  16%|█▌        | 72/452 [11:44<1:01:59,  0.10it/s, v_num=5dwa, train_loss=0.0185, moving_avg_reward=0.963, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  16%|█▌        | 73/452 [11:51<1:01:35,  0.10it/s, v_num=5dwa, train_loss=0.0274, moving_avg_reward=0.963, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  16%|█▋        | 74/452 [12:02<1:01:28,  0.10it/s, v_num=5dwa, train_loss=0.0522, moving_avg_reward=0.963, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  17%|█▋        | 75/452 [12:11<1:01:18,  0.10it/s, v_num=5dwa, train_loss=0.0142, moving_avg_reward=0.963, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  17%|█▋        | 76/452 [12:19<1:00:56,  0.10it/s, v_num=5dwa, train_loss=0.0164, moving_avg_reward=0.963, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  17%|█▋        | 77/452 [12:27<1:00:42,  0.10it/s, v_num=5dwa, train_loss=0.0222, moving_avg_reward=0.964, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  17%|█▋        | 78/452 [12:43<1:00:59,  0.10it/s, v_num=5dwa, train_loss=0.0555, moving_avg_reward=0.964, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  17%|█▋        | 79/452 [12:51<1:00:43,  0.10it/s, v_num=5dwa, train_loss=0.0307, moving_avg_reward=0.964, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  18%|█▊        | 80/452 [13:03<1:00:43,  0.10it/s, v_num=5dwa, train_loss=0.0726, moving_avg_reward=0.964, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  18%|█▊        | 81/452 [13:13<1:00:35,  0.10it/s, v_num=5dwa, train_loss=0.0225, moving_avg_reward=0.964, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  18%|█▊        | 82/452 [13:24<1:00:28,  0.10it/s, v_num=5dwa, train_loss=0.0156, moving_avg_reward=0.964, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  18%|█▊        | 83/452 [13:32<1:00:13,  0.10it/s, v_num=5dwa, train_loss=0.0203, moving_avg_reward=0.964, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  19%|█▊        | 84/452 [13:46<1:00:21,  0.10it/s, v_num=5dwa, train_loss=0.0138, moving_avg_reward=0.964, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  19%|█▉        | 85/452 [13:58<1:00:21,  0.10it/s, v_num=5dwa, train_loss=0.0368, moving_avg_reward=0.964, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  19%|█▉        | 86/452 [14:08<1:00:12,  0.10it/s, v_num=5dwa, train_loss=0.0312, moving_avg_reward=0.965, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  19%|█▉        | 87/452 [14:19<1:00:03,  0.10it/s, v_num=5dwa, train_loss=0.0427, moving_avg_reward=0.965, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  19%|█▉        | 88/452 [14:30<1:00:00,  0.10it/s, v_num=5dwa, train_loss=0.0674, moving_avg_reward=0.965, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  20%|█▉        | 89/452 [14:39<59:46,  0.10it/s, v_num=5dwa, train_loss=0.0507, moving_avg_reward=0.965, reward=0.971]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  20%|█▉        | 90/452 [14:51<59:47,  0.10it/s, v_num=5dwa, train_loss=0.0476, moving_avg_reward=0.965, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  20%|██        | 91/452 [15:01<59:36,  0.10it/s, v_num=5dwa, train_loss=0.057, moving_avg_reward=0.965, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  20%|██        | 92/452 [15:13<59:34,  0.10it/s, v_num=5dwa, train_loss=0.020, moving_avg_reward=0.965, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  21%|██        | 93/452 [15:25<59:32,  0.10it/s, v_num=5dwa, train_loss=0.0161, moving_avg_reward=0.965, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  21%|██        | 94/452 [15:37<59:30,  0.10it/s, v_num=5dwa, train_loss=0.0457, moving_avg_reward=0.965, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  21%|██        | 95/452 [15:46<59:16,  0.10it/s, v_num=5dwa, train_loss=0.0353, moving_avg_reward=0.966, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  21%|██        | 96/452 [15:56<59:08,  0.10it/s, v_num=5dwa, train_loss=0.0138, moving_avg_reward=0.966, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  21%|██▏       | 97/452 [16:07<58:59,  0.10it/s, v_num=5dwa, train_loss=0.0112, moving_avg_reward=0.966, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  22%|██▏       | 98/452 [16:18<58:54,  0.10it/s, v_num=5dwa, train_loss=0.0375, moving_avg_reward=0.966, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  22%|██▏       | 99/452 [16:28<58:44,  0.10it/s, v_num=5dwa, train_loss=0.0372, moving_avg_reward=0.966, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  22%|██▏       | 100/452 [16:40<58:41,  0.10it/s, v_num=5dwa, train_loss=0.0199, moving_avg_reward=0.966, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  22%|██▏       | 101/452 [16:49<58:27,  0.10it/s, v_num=5dwa, train_loss=0.0124, moving_avg_reward=0.966, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  23%|██▎       | 102/452 [16:56<58:08,  0.10it/s, v_num=5dwa, train_loss=0.0105, moving_avg_reward=0.966, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  23%|██▎       | 103/452 [17:05<57:53,  0.10it/s, v_num=5dwa, train_loss=0.0206, moving_avg_reward=0.966, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  23%|██▎       | 104/452 [17:13<57:39,  0.10it/s, v_num=5dwa, train_loss=0.0241, moving_avg_reward=0.966, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  23%|██▎       | 105/452 [17:26<57:37,  0.10it/s, v_num=5dwa, train_loss=0.0198, moving_avg_reward=0.967, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  23%|██▎       | 106/452 [17:36<57:28,  0.10it/s, v_num=5dwa, train_loss=0.0199, moving_avg_reward=0.967, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  24%|██▎       | 107/452 [17:48<57:25,  0.10it/s, v_num=5dwa, train_loss=0.0126, moving_avg_reward=0.967, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  24%|██▍       | 108/452 [18:00<57:21,  0.10it/s, v_num=5dwa, train_loss=0.0229, moving_avg_reward=0.967, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  24%|██▍       | 109/452 [18:09<57:07,  0.10it/s, v_num=5dwa, train_loss=0.0247, moving_avg_reward=0.967, reward=0.966]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  24%|██▍       | 110/452 [18:17<56:53,  0.10it/s, v_num=5dwa, train_loss=0.0197, moving_avg_reward=0.967, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  25%|██▍       | 111/452 [18:26<56:38,  0.10it/s, v_num=5dwa, train_loss=0.0214, moving_avg_reward=0.967, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  25%|██▍       | 112/452 [18:38<56:34,  0.10it/s, v_num=5dwa, train_loss=0.00992, moving_avg_reward=0.967, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  25%|██▌       | 113/452 [18:47<56:22,  0.10it/s, v_num=5dwa, train_loss=0.00839, moving_avg_reward=0.967, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  25%|██▌       | 114/452 [18:56<56:08,  0.10it/s, v_num=5dwa, train_loss=0.00583, moving_avg_reward=0.967, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  25%|██▌       | 115/452 [19:04<55:55,  0.10it/s, v_num=5dwa, train_loss=0.0299, moving_avg_reward=0.967, reward=0.973] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  26%|██▌       | 116/452 [19:15<55:46,  0.10it/s, v_num=5dwa, train_loss=0.0172, moving_avg_reward=0.968, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  26%|██▌       | 117/452 [19:27<55:41,  0.10it/s, v_num=5dwa, train_loss=0.031, moving_avg_reward=0.968, reward=0.980] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  26%|██▌       | 118/452 [19:37<55:32,  0.10it/s, v_num=5dwa, train_loss=0.0314, moving_avg_reward=0.968, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  26%|██▋       | 119/452 [19:46<55:19,  0.10it/s, v_num=5dwa, train_loss=0.00712, moving_avg_reward=0.968, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  27%|██▋       | 120/452 [19:54<55:05,  0.10it/s, v_num=5dwa, train_loss=0.0151, moving_avg_reward=0.968, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  27%|██▋       | 121/452 [20:03<54:51,  0.10it/s, v_num=5dwa, train_loss=0.0162, moving_avg_reward=0.968, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  27%|██▋       | 122/452 [20:13<54:42,  0.10it/s, v_num=5dwa, train_loss=0.0151, moving_avg_reward=0.968, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  27%|██▋       | 123/452 [20:22<54:28,  0.10it/s, v_num=5dwa, train_loss=0.0175, moving_avg_reward=0.968, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  27%|██▋       | 124/452 [20:33<54:24,  0.10it/s, v_num=5dwa, train_loss=0.0104, moving_avg_reward=0.968, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  28%|██▊       | 125/452 [20:44<54:14,  0.10it/s, v_num=5dwa, train_loss=0.0194, moving_avg_reward=0.968, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  28%|██▊       | 126/452 [20:54<54:05,  0.10it/s, v_num=5dwa, train_loss=0.0273, moving_avg_reward=0.969, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  28%|██▊       | 127/452 [21:04<53:55,  0.10it/s, v_num=5dwa, train_loss=0.0199, moving_avg_reward=0.969, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  28%|██▊       | 128/452 [21:17<53:54,  0.10it/s, v_num=5dwa, train_loss=0.0224, moving_avg_reward=0.969, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  29%|██▊       | 129/452 [21:28<53:46,  0.10it/s, v_num=5dwa, train_loss=0.0315, moving_avg_reward=0.969, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  29%|██▉       | 130/452 [21:38<53:36,  0.10it/s, v_num=5dwa, train_loss=0.0182, moving_avg_reward=0.969, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  29%|██▉       | 131/452 [21:47<53:23,  0.10it/s, v_num=5dwa, train_loss=0.0128, moving_avg_reward=0.969, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  29%|██▉       | 132/452 [21:57<53:13,  0.10it/s, v_num=5dwa, train_loss=0.0222, moving_avg_reward=0.969, reward=0.967]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  29%|██▉       | 133/452 [22:09<53:08,  0.10it/s, v_num=5dwa, train_loss=0.0107, moving_avg_reward=0.969, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  30%|██▉       | 134/452 [22:17<52:54,  0.10it/s, v_num=5dwa, train_loss=0.00727, moving_avg_reward=0.969, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  30%|██▉       | 135/452 [22:29<52:49,  0.10it/s, v_num=5dwa, train_loss=0.0203, moving_avg_reward=0.969, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  30%|███       | 136/452 [22:39<52:39,  0.10it/s, v_num=5dwa, train_loss=-0.00859, moving_avg_reward=0.969, reward=0.969]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  30%|███       | 137/452 [22:49<52:28,  0.10it/s, v_num=5dwa, train_loss=0.0382, moving_avg_reward=0.969, reward=0.975]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  31%|███       | 138/452 [23:01<52:22,  0.10it/s, v_num=5dwa, train_loss=0.0202, moving_avg_reward=0.969, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  31%|███       | 139/452 [23:11<52:13,  0.10it/s, v_num=5dwa, train_loss=0.00349, moving_avg_reward=0.969, reward=0.983]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  31%|███       | 140/452 [23:21<52:03,  0.10it/s, v_num=5dwa, train_loss=0.0112, moving_avg_reward=0.970, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  31%|███       | 141/452 [23:35<52:01,  0.10it/s, v_num=5dwa, train_loss=0.00312, moving_avg_reward=0.970, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  31%|███▏      | 142/452 [23:47<51:56,  0.10it/s, v_num=5dwa, train_loss=0.0286, moving_avg_reward=0.970, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  32%|███▏      | 143/452 [23:56<51:43,  0.10it/s, v_num=5dwa, train_loss=0.0112, moving_avg_reward=0.970, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  32%|███▏      | 144/452 [24:06<51:33,  0.10it/s, v_num=5dwa, train_loss=0.0075, moving_avg_reward=0.970, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  32%|███▏      | 145/452 [24:15<51:21,  0.10it/s, v_num=5dwa, train_loss=0.0128, moving_avg_reward=0.970, reward=0.966]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  32%|███▏      | 146/452 [24:28<51:17,  0.10it/s, v_num=5dwa, train_loss=0.0527, moving_avg_reward=0.970, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  33%|███▎      | 147/452 [24:37<51:04,  0.10it/s, v_num=5dwa, train_loss=0.0158, moving_avg_reward=0.970, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  33%|███▎      | 148/452 [24:52<51:05,  0.10it/s, v_num=5dwa, train_loss=0.0767, moving_avg_reward=0.970, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  33%|███▎      | 149/452 [25:02<50:55,  0.10it/s, v_num=5dwa, train_loss=0.010, moving_avg_reward=0.970, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  33%|███▎      | 150/452 [25:11<50:42,  0.10it/s, v_num=5dwa, train_loss=0.00679, moving_avg_reward=0.970, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  33%|███▎      | 151/452 [25:21<50:33,  0.10it/s, v_num=5dwa, train_loss=0.0367, moving_avg_reward=0.970, reward=0.975] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  34%|███▎      | 152/452 [25:32<50:24,  0.10it/s, v_num=5dwa, train_loss=0.0115, moving_avg_reward=0.970, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  34%|███▍      | 153/452 [25:44<50:18,  0.10it/s, v_num=5dwa, train_loss=0.00942, moving_avg_reward=0.970, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  34%|███▍      | 154/452 [25:54<50:08,  0.10it/s, v_num=5dwa, train_loss=0.0578, moving_avg_reward=0.970, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  34%|███▍      | 155/452 [26:01<49:52,  0.10it/s, v_num=5dwa, train_loss=0.0179, moving_avg_reward=0.971, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  35%|███▍      | 156/452 [26:12<49:43,  0.10it/s, v_num=5dwa, train_loss=0.012, moving_avg_reward=0.971, reward=0.974] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  35%|███▍      | 157/452 [26:22<49:33,  0.10it/s, v_num=5dwa, train_loss=0.00745, moving_avg_reward=0.971, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  35%|███▍      | 158/452 [26:36<49:30,  0.10it/s, v_num=5dwa, train_loss=0.0105, moving_avg_reward=0.971, reward=0.969] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  35%|███▌      | 159/452 [26:49<49:26,  0.10it/s, v_num=5dwa, train_loss=0.0114, moving_avg_reward=0.971, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  35%|███▌      | 160/452 [26:58<49:14,  0.10it/s, v_num=5dwa, train_loss=0.026, moving_avg_reward=0.971, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  36%|███▌      | 161/452 [27:05<48:58,  0.10it/s, v_num=5dwa, train_loss=0.0116, moving_avg_reward=0.971, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  36%|███▌      | 162/452 [27:14<48:46,  0.10it/s, v_num=5dwa, train_loss=0.0293, moving_avg_reward=0.971, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  36%|███▌      | 163/452 [27:24<48:36,  0.10it/s, v_num=5dwa, train_loss=0.0122, moving_avg_reward=0.971, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  36%|███▋      | 164/452 [27:31<48:20,  0.10it/s, v_num=5dwa, train_loss=0.00841, moving_avg_reward=0.971, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  37%|███▋      | 165/452 [27:43<48:13,  0.10it/s, v_num=5dwa, train_loss=0.00533, moving_avg_reward=0.971, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  37%|███▋      | 166/452 [27:55<48:06,  0.10it/s, v_num=5dwa, train_loss=0.0177, moving_avg_reward=0.971, reward=0.974] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  37%|███▋      | 167/452 [28:05<47:56,  0.10it/s, v_num=5dwa, train_loss=0.00902, moving_avg_reward=0.971, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  37%|███▋      | 168/452 [28:16<47:47,  0.10it/s, v_num=5dwa, train_loss=0.0131, moving_avg_reward=0.971, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  37%|███▋      | 169/452 [28:28<47:40,  0.10it/s, v_num=5dwa, train_loss=0.0168, moving_avg_reward=0.971, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  38%|███▊      | 170/452 [28:38<47:30,  0.10it/s, v_num=5dwa, train_loss=0.00424, moving_avg_reward=0.971, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  38%|███▊      | 171/452 [28:47<47:18,  0.10it/s, v_num=5dwa, train_loss=0.00648, moving_avg_reward=0.971, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  38%|███▊      | 172/452 [28:55<47:05,  0.10it/s, v_num=5dwa, train_loss=-0.00916, moving_avg_reward=0.972, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  38%|███▊      | 173/452 [29:07<46:58,  0.10it/s, v_num=5dwa, train_loss=0.0347, moving_avg_reward=0.972, reward=0.976]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  38%|███▊      | 174/452 [29:16<46:46,  0.10it/s, v_num=5dwa, train_loss=0.000999, moving_avg_reward=0.972, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  39%|███▊      | 175/452 [29:24<46:32,  0.10it/s, v_num=5dwa, train_loss=0.0102, moving_avg_reward=0.972, reward=0.975]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  39%|███▉      | 176/452 [29:35<46:24,  0.10it/s, v_num=5dwa, train_loss=0.00646, moving_avg_reward=0.972, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  39%|███▉      | 177/452 [29:43<46:10,  0.10it/s, v_num=5dwa, train_loss=0.0228, moving_avg_reward=0.972, reward=0.980] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  39%|███▉      | 178/452 [29:51<45:58,  0.10it/s, v_num=5dwa, train_loss=0.0211, moving_avg_reward=0.972, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  40%|███▉      | 179/452 [30:02<45:48,  0.10it/s, v_num=5dwa, train_loss=0.0136, moving_avg_reward=0.972, reward=0.964]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  40%|███▉      | 180/452 [30:12<45:39,  0.10it/s, v_num=5dwa, train_loss=0.00412, moving_avg_reward=0.972, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  40%|████      | 181/452 [30:21<45:27,  0.10it/s, v_num=5dwa, train_loss=0.0162, moving_avg_reward=0.972, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  40%|████      | 182/452 [30:30<45:15,  0.10it/s, v_num=5dwa, train_loss=0.0184, moving_avg_reward=0.972, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  40%|████      | 183/452 [30:42<45:08,  0.10it/s, v_num=5dwa, train_loss=0.00609, moving_avg_reward=0.972, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  41%|████      | 184/452 [30:52<44:58,  0.10it/s, v_num=5dwa, train_loss=-0.00292, moving_avg_reward=0.972, reward=0.963]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  41%|████      | 185/452 [31:14<45:04,  0.10it/s, v_num=5dwa, train_loss=-0.375, moving_avg_reward=0.971, reward=0.928]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  41%|████      | 186/452 [31:24<44:55,  0.10it/s, v_num=5dwa, train_loss=0.00732, moving_avg_reward=0.971, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  41%|████▏     | 187/452 [31:36<44:47,  0.10it/s, v_num=5dwa, train_loss=0.00712, moving_avg_reward=0.971, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  42%|████▏     | 188/452 [31:48<44:40,  0.10it/s, v_num=5dwa, train_loss=0.00703, moving_avg_reward=0.971, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  42%|████▏     | 189/452 [31:59<44:30,  0.10it/s, v_num=5dwa, train_loss=0.00696, moving_avg_reward=0.972, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  42%|████▏     | 190/452 [32:10<44:22,  0.10it/s, v_num=5dwa, train_loss=0.00684, moving_avg_reward=0.972, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  42%|████▏     | 191/452 [32:21<44:13,  0.10it/s, v_num=5dwa, train_loss=0.0029, moving_avg_reward=0.972, reward=0.970] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  42%|████▏     | 192/452 [32:30<44:00,  0.10it/s, v_num=5dwa, train_loss=0.0192, moving_avg_reward=0.972, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  43%|████▎     | 193/452 [32:42<43:52,  0.10it/s, v_num=5dwa, train_loss=0.0194, moving_avg_reward=0.972, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  43%|████▎     | 194/452 [32:53<43:45,  0.10it/s, v_num=5dwa, train_loss=0.0076, moving_avg_reward=0.972, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  43%|████▎     | 195/452 [33:04<43:34,  0.10it/s, v_num=5dwa, train_loss=0.0213, moving_avg_reward=0.972, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  43%|████▎     | 196/452 [33:13<43:23,  0.10it/s, v_num=5dwa, train_loss=0.00655, moving_avg_reward=0.972, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  44%|████▎     | 197/452 [33:21<43:11,  0.10it/s, v_num=5dwa, train_loss=0.0114, moving_avg_reward=0.972, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  44%|████▍     | 198/452 [33:30<42:59,  0.10it/s, v_num=5dwa, train_loss=0.00824, moving_avg_reward=0.972, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  44%|████▍     | 199/452 [33:42<42:51,  0.10it/s, v_num=5dwa, train_loss=0.0243, moving_avg_reward=0.972, reward=0.980] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  44%|████▍     | 200/452 [33:52<42:40,  0.10it/s, v_num=5dwa, train_loss=0.014, moving_avg_reward=0.972, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  44%|████▍     | 201/452 [34:04<42:32,  0.10it/s, v_num=5dwa, train_loss=0.00348, moving_avg_reward=0.972, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  45%|████▍     | 202/452 [34:18<42:27,  0.10it/s, v_num=5dwa, train_loss=0.0604, moving_avg_reward=0.972, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  45%|████▍     | 203/452 [34:32<42:21,  0.10it/s, v_num=5dwa, train_loss=0.00455, moving_avg_reward=0.972, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  45%|████▌     | 204/452 [34:43<42:13,  0.10it/s, v_num=5dwa, train_loss=-0.0163, moving_avg_reward=0.972, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  45%|████▌     | 205/452 [34:55<42:04,  0.10it/s, v_num=5dwa, train_loss=-0.0439, moving_avg_reward=0.972, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  46%|████▌     | 206/452 [35:07<41:56,  0.10it/s, v_num=5dwa, train_loss=0.0213, moving_avg_reward=0.972, reward=0.970] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  46%|████▌     | 207/452 [35:17<41:46,  0.10it/s, v_num=5dwa, train_loss=0.00616, moving_avg_reward=0.972, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  46%|████▌     | 208/452 [35:29<41:37,  0.10it/s, v_num=5dwa, train_loss=0.0044, moving_avg_reward=0.972, reward=0.975] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  46%|████▌     | 209/452 [35:37<41:25,  0.10it/s, v_num=5dwa, train_loss=0.00627, moving_avg_reward=0.972, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  46%|████▋     | 210/452 [35:54<41:22,  0.10it/s, v_num=5dwa, train_loss=0.0186, moving_avg_reward=0.973, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  47%|████▋     | 211/452 [36:07<41:15,  0.10it/s, v_num=5dwa, train_loss=0.0237, moving_avg_reward=0.973, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  47%|████▋     | 212/452 [36:16<41:03,  0.10it/s, v_num=5dwa, train_loss=0.00765, moving_avg_reward=0.973, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  47%|████▋     | 213/452 [36:24<40:51,  0.10it/s, v_num=5dwa, train_loss=0.0079, moving_avg_reward=0.973, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  47%|████▋     | 214/452 [36:33<40:39,  0.10it/s, v_num=5dwa, train_loss=0.00724, moving_avg_reward=0.973, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  48%|████▊     | 215/452 [36:42<40:27,  0.10it/s, v_num=5dwa, train_loss=0.016, moving_avg_reward=0.973, reward=0.979]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  48%|████▊     | 216/452 [36:52<40:17,  0.10it/s, v_num=5dwa, train_loss=-6.57e-5, moving_avg_reward=0.973, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  48%|████▊     | 217/452 [37:01<40:05,  0.10it/s, v_num=5dwa, train_loss=0.00357, moving_avg_reward=0.973, reward=0.970] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  48%|████▊     | 218/452 [37:09<39:53,  0.10it/s, v_num=5dwa, train_loss=0.0103, moving_avg_reward=0.973, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  48%|████▊     | 219/452 [37:17<39:40,  0.10it/s, v_num=5dwa, train_loss=0.00689, moving_avg_reward=0.973, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  49%|████▊     | 220/452 [37:27<39:30,  0.10it/s, v_num=5dwa, train_loss=0.00707, moving_avg_reward=0.973, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  49%|████▉     | 221/452 [37:36<39:18,  0.10it/s, v_num=5dwa, train_loss=0.00732, moving_avg_reward=0.973, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  49%|████▉     | 222/452 [37:46<39:08,  0.10it/s, v_num=5dwa, train_loss=0.0072, moving_avg_reward=0.973, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  49%|████▉     | 223/452 [37:56<38:57,  0.10it/s, v_num=5dwa, train_loss=0.00722, moving_avg_reward=0.973, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  50%|████▉     | 224/452 [38:06<38:47,  0.10it/s, v_num=5dwa, train_loss=0.0171, moving_avg_reward=0.973, reward=0.970] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  50%|████▉     | 225/452 [38:14<38:35,  0.10it/s, v_num=5dwa, train_loss=0.0178, moving_avg_reward=0.973, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  50%|█████     | 226/452 [38:22<38:22,  0.10it/s, v_num=5dwa, train_loss=0.0127, moving_avg_reward=0.973, reward=0.983]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  50%|█████     | 227/452 [38:32<38:12,  0.10it/s, v_num=5dwa, train_loss=0.0104, moving_avg_reward=0.973, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  50%|█████     | 228/452 [38:46<38:05,  0.10it/s, v_num=5dwa, train_loss=0.0132, moving_avg_reward=0.973, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  51%|█████     | 229/452 [38:56<37:55,  0.10it/s, v_num=5dwa, train_loss=0.00269, moving_avg_reward=0.973, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  51%|█████     | 230/452 [39:05<37:43,  0.10it/s, v_num=5dwa, train_loss=0.012, moving_avg_reward=0.973, reward=0.980]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  51%|█████     | 231/452 [39:15<37:33,  0.10it/s, v_num=5dwa, train_loss=-0.0271, moving_avg_reward=0.973, reward=0.964]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  51%|█████▏    | 232/452 [39:28<37:26,  0.10it/s, v_num=5dwa, train_loss=0.00669, moving_avg_reward=0.973, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  52%|█████▏    | 233/452 [39:37<37:14,  0.10it/s, v_num=5dwa, train_loss=0.0104, moving_avg_reward=0.973, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  52%|█████▏    | 234/452 [39:46<37:03,  0.10it/s, v_num=5dwa, train_loss=0.00675, moving_avg_reward=0.973, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  52%|█████▏    | 235/452 [39:54<36:51,  0.10it/s, v_num=5dwa, train_loss=0.0032, moving_avg_reward=0.973, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  52%|█████▏    | 236/452 [40:05<36:41,  0.10it/s, v_num=5dwa, train_loss=0.00748, moving_avg_reward=0.973, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  52%|█████▏    | 237/452 [40:12<36:28,  0.10it/s, v_num=5dwa, train_loss=0.020, moving_avg_reward=0.973, reward=0.981]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  53%|█████▎    | 238/452 [40:22<36:18,  0.10it/s, v_num=5dwa, train_loss=0.00938, moving_avg_reward=0.973, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  53%|█████▎    | 239/452 [40:31<36:06,  0.10it/s, v_num=5dwa, train_loss=0.00444, moving_avg_reward=0.974, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  53%|█████▎    | 240/452 [40:40<35:55,  0.10it/s, v_num=5dwa, train_loss=0.00799, moving_avg_reward=0.974, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  53%|█████▎    | 241/452 [40:49<35:44,  0.10it/s, v_num=5dwa, train_loss=0.0121, moving_avg_reward=0.974, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  54%|█████▎    | 242/452 [41:00<35:34,  0.10it/s, v_num=5dwa, train_loss=0.0105, moving_avg_reward=0.974, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  54%|█████▍    | 243/452 [41:10<35:24,  0.10it/s, v_num=5dwa, train_loss=-0.000435, moving_avg_reward=0.974, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  54%|█████▍    | 244/452 [41:22<35:16,  0.10it/s, v_num=5dwa, train_loss=0.0162, moving_avg_reward=0.974, reward=0.979]   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  54%|█████▍    | 245/452 [41:32<35:06,  0.10it/s, v_num=5dwa, train_loss=0.0327, moving_avg_reward=0.974, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  54%|█████▍    | 246/452 [41:42<34:55,  0.10it/s, v_num=5dwa, train_loss=0.000881, moving_avg_reward=0.974, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  55%|█████▍    | 247/452 [41:50<34:44,  0.10it/s, v_num=5dwa, train_loss=0.0174, moving_avg_reward=0.974, reward=0.980]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  55%|█████▍    | 248/452 [42:01<34:33,  0.10it/s, v_num=5dwa, train_loss=0.000281, moving_avg_reward=0.974, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  55%|█████▌    | 249/452 [42:11<34:23,  0.10it/s, v_num=5dwa, train_loss=0.00857, moving_avg_reward=0.974, reward=0.968] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  55%|█████▌    | 250/452 [42:21<34:13,  0.10it/s, v_num=5dwa, train_loss=0.0193, moving_avg_reward=0.974, reward=0.982] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  56%|█████▌    | 251/452 [42:31<34:03,  0.10it/s, v_num=5dwa, train_loss=0.00921, moving_avg_reward=0.974, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  56%|█████▌    | 252/452 [42:41<33:52,  0.10it/s, v_num=5dwa, train_loss=0.0229, moving_avg_reward=0.974, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  56%|█████▌    | 253/452 [42:53<33:44,  0.10it/s, v_num=5dwa, train_loss=0.00675, moving_avg_reward=0.974, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  56%|█████▌    | 254/452 [43:04<33:34,  0.10it/s, v_num=5dwa, train_loss=0.00664, moving_avg_reward=0.974, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  56%|█████▋    | 255/452 [43:13<33:23,  0.10it/s, v_num=5dwa, train_loss=0.0108, moving_avg_reward=0.974, reward=0.972] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  57%|█████▋    | 256/452 [43:23<33:13,  0.10it/s, v_num=5dwa, train_loss=0.0117, moving_avg_reward=0.974, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  57%|█████▋    | 257/452 [43:32<33:02,  0.10it/s, v_num=5dwa, train_loss=0.00701, moving_avg_reward=0.974, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  57%|█████▋    | 258/452 [43:44<32:53,  0.10it/s, v_num=5dwa, train_loss=0.0116, moving_avg_reward=0.974, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  57%|█████▋    | 259/452 [43:56<32:44,  0.10it/s, v_num=5dwa, train_loss=-0.0122, moving_avg_reward=0.974, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  58%|█████▊    | 260/452 [44:07<32:34,  0.10it/s, v_num=5dwa, train_loss=0.0013, moving_avg_reward=0.974, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  58%|█████▊    | 261/452 [44:15<32:22,  0.10it/s, v_num=5dwa, train_loss=0.00467, moving_avg_reward=0.974, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  58%|█████▊    | 262/452 [44:23<32:11,  0.10it/s, v_num=5dwa, train_loss=0.00825, moving_avg_reward=0.974, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  58%|█████▊    | 263/452 [44:37<32:03,  0.10it/s, v_num=5dwa, train_loss=0.00599, moving_avg_reward=0.974, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  58%|█████▊    | 264/452 [44:45<31:52,  0.10it/s, v_num=5dwa, train_loss=-0.00404, moving_avg_reward=0.974, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  59%|█████▊    | 265/452 [44:57<31:43,  0.10it/s, v_num=5dwa, train_loss=0.00406, moving_avg_reward=0.974, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  59%|█████▉    | 266/452 [45:06<31:32,  0.10it/s, v_num=5dwa, train_loss=-0.0419, moving_avg_reward=0.974, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  59%|█████▉    | 267/452 [45:16<31:22,  0.10it/s, v_num=5dwa, train_loss=0.021, moving_avg_reward=0.974, reward=0.975]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  59%|█████▉    | 268/452 [45:27<31:12,  0.10it/s, v_num=5dwa, train_loss=0.0175, moving_avg_reward=0.974, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  60%|█████▉    | 269/452 [45:39<31:03,  0.10it/s, v_num=5dwa, train_loss=-0.00019, moving_avg_reward=0.974, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  60%|█████▉    | 270/452 [45:51<30:54,  0.10it/s, v_num=5dwa, train_loss=0.00374, moving_avg_reward=0.974, reward=0.974] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  60%|█████▉    | 271/452 [46:03<30:45,  0.10it/s, v_num=5dwa, train_loss=0.0057, moving_avg_reward=0.974, reward=0.975] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  60%|██████    | 272/452 [46:12<30:34,  0.10it/s, v_num=5dwa, train_loss=-0.00265, moving_avg_reward=0.974, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  60%|██████    | 273/452 [46:22<30:24,  0.10it/s, v_num=5dwa, train_loss=-0.0261, moving_avg_reward=0.974, reward=0.970] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  61%|██████    | 274/452 [46:32<30:14,  0.10it/s, v_num=5dwa, train_loss=-0.0026, moving_avg_reward=0.974, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  61%|██████    | 275/452 [46:46<30:06,  0.10it/s, v_num=5dwa, train_loss=0.00601, moving_avg_reward=0.974, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  61%|██████    | 276/452 [46:54<29:55,  0.10it/s, v_num=5dwa, train_loss=0.00261, moving_avg_reward=0.974, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  61%|██████▏   | 277/452 [47:01<29:42,  0.10it/s, v_num=5dwa, train_loss=0.00938, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  62%|██████▏   | 278/452 [47:10<29:31,  0.10it/s, v_num=5dwa, train_loss=0.0049, moving_avg_reward=0.975, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  62%|██████▏   | 279/452 [47:19<29:20,  0.10it/s, v_num=5dwa, train_loss=-0.00202, moving_avg_reward=0.974, reward=0.965]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  62%|██████▏   | 280/452 [47:28<29:09,  0.10it/s, v_num=5dwa, train_loss=0.00548, moving_avg_reward=0.975, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  62%|██████▏   | 281/452 [47:37<28:58,  0.10it/s, v_num=5dwa, train_loss=0.0147, moving_avg_reward=0.975, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  62%|██████▏   | 282/452 [47:47<28:48,  0.10it/s, v_num=5dwa, train_loss=-0.0102, moving_avg_reward=0.975, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  63%|██████▎   | 283/452 [47:57<28:38,  0.10it/s, v_num=5dwa, train_loss=0.00322, moving_avg_reward=0.975, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  63%|██████▎   | 284/452 [48:08<28:28,  0.10it/s, v_num=5dwa, train_loss=-0.0304, moving_avg_reward=0.975, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  63%|██████▎   | 285/452 [48:20<28:19,  0.10it/s, v_num=5dwa, train_loss=0.00254, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  63%|██████▎   | 286/452 [48:28<28:08,  0.10it/s, v_num=5dwa, train_loss=-0.0217, moving_avg_reward=0.975, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  63%|██████▎   | 287/452 [48:40<27:58,  0.10it/s, v_num=5dwa, train_loss=0.00945, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  64%|██████▎   | 288/452 [48:50<27:48,  0.10it/s, v_num=5dwa, train_loss=-0.0029, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  64%|██████▍   | 289/452 [48:59<27:37,  0.10it/s, v_num=5dwa, train_loss=0.0138, moving_avg_reward=0.975, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  64%|██████▍   | 290/452 [49:09<27:27,  0.10it/s, v_num=5dwa, train_loss=0.00669, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  64%|██████▍   | 291/452 [49:18<27:16,  0.10it/s, v_num=5dwa, train_loss=0.00865, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  65%|██████▍   | 292/452 [49:25<27:04,  0.10it/s, v_num=5dwa, train_loss=0.00518, moving_avg_reward=0.975, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  65%|██████▍   | 293/452 [49:34<26:53,  0.10it/s, v_num=5dwa, train_loss=0.000198, moving_avg_reward=0.975, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  65%|██████▌   | 294/452 [49:43<26:43,  0.10it/s, v_num=5dwa, train_loss=0.00515, moving_avg_reward=0.975, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  65%|██████▌   | 295/452 [49:53<26:33,  0.10it/s, v_num=5dwa, train_loss=0.0159, moving_avg_reward=0.975, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  65%|██████▌   | 296/452 [50:05<26:24,  0.10it/s, v_num=5dwa, train_loss=0.00247, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  66%|██████▌   | 297/452 [50:14<26:13,  0.10it/s, v_num=5dwa, train_loss=-0.00312, moving_avg_reward=0.975, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  66%|██████▌   | 298/452 [50:23<26:02,  0.10it/s, v_num=5dwa, train_loss=0.00705, moving_avg_reward=0.975, reward=0.972] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  66%|██████▌   | 299/452 [50:35<25:53,  0.10it/s, v_num=5dwa, train_loss=0.00601, moving_avg_reward=0.975, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  66%|██████▋   | 300/452 [50:45<25:43,  0.10it/s, v_num=5dwa, train_loss=0.000328, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  67%|██████▋   | 301/452 [50:55<25:32,  0.10it/s, v_num=5dwa, train_loss=-0.019, moving_avg_reward=0.975, reward=0.968]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  67%|██████▋   | 302/452 [51:05<25:22,  0.10it/s, v_num=5dwa, train_loss=0.00478, moving_avg_reward=0.975, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  67%|██████▋   | 303/452 [51:16<25:12,  0.10it/s, v_num=5dwa, train_loss=0.00857, moving_avg_reward=0.975, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  67%|██████▋   | 304/452 [51:25<25:01,  0.10it/s, v_num=5dwa, train_loss=0.00534, moving_avg_reward=0.975, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  67%|██████▋   | 305/452 [51:33<24:51,  0.10it/s, v_num=5dwa, train_loss=0.0154, moving_avg_reward=0.975, reward=0.981] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  68%|██████▊   | 306/452 [51:42<24:40,  0.10it/s, v_num=5dwa, train_loss=0.0263, moving_avg_reward=0.975, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  68%|██████▊   | 307/452 [51:45<24:26,  0.10it/s, v_num=5dwa, train_loss=0.0115, moving_avg_reward=0.975, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  68%|██████▊   | 308/452 [51:54<24:15,  0.10it/s, v_num=5dwa, train_loss=0.00511, moving_avg_reward=0.975, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  68%|██████▊   | 309/452 [52:05<24:06,  0.10it/s, v_num=5dwa, train_loss=-0.00267, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  69%|██████▊   | 310/452 [52:16<23:56,  0.10it/s, v_num=5dwa, train_loss=-0.000282, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  69%|██████▉   | 311/452 [52:24<23:45,  0.10it/s, v_num=5dwa, train_loss=-0.0175, moving_avg_reward=0.975, reward=0.973]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  69%|██████▉   | 312/452 [52:34<23:35,  0.10it/s, v_num=5dwa, train_loss=-0.000163, moving_avg_reward=0.975, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  69%|██████▉   | 313/452 [52:45<23:25,  0.10it/s, v_num=5dwa, train_loss=0.00636, moving_avg_reward=0.975, reward=0.978]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  69%|██████▉   | 314/452 [52:52<23:14,  0.10it/s, v_num=5dwa, train_loss=0.00951, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  70%|██████▉   | 315/452 [53:00<23:03,  0.10it/s, v_num=5dwa, train_loss=0.00738, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  70%|██████▉   | 316/452 [53:07<22:52,  0.10it/s, v_num=5dwa, train_loss=0.0105, moving_avg_reward=0.975, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  70%|███████   | 317/452 [53:15<22:40,  0.10it/s, v_num=5dwa, train_loss=0.00264, moving_avg_reward=0.975, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  70%|███████   | 318/452 [53:25<22:30,  0.10it/s, v_num=5dwa, train_loss=0.0139, moving_avg_reward=0.975, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  71%|███████   | 319/452 [53:33<22:19,  0.10it/s, v_num=5dwa, train_loss=0.00143, moving_avg_reward=0.975, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  71%|███████   | 320/452 [53:42<22:09,  0.10it/s, v_num=5dwa, train_loss=0.00162, moving_avg_reward=0.975, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  71%|███████   | 321/452 [53:50<21:58,  0.10it/s, v_num=5dwa, train_loss=0.00378, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  71%|███████   | 322/452 [54:00<21:48,  0.10it/s, v_num=5dwa, train_loss=-0.0236, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  71%|███████▏  | 323/452 [54:09<21:37,  0.10it/s, v_num=5dwa, train_loss=-0.00108, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  72%|███████▏  | 324/452 [54:18<21:27,  0.10it/s, v_num=5dwa, train_loss=0.00537, moving_avg_reward=0.975, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  72%|███████▏  | 325/452 [54:26<21:16,  0.10it/s, v_num=5dwa, train_loss=0.0122, moving_avg_reward=0.975, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  72%|███████▏  | 326/452 [54:37<21:06,  0.10it/s, v_num=5dwa, train_loss=0.00229, moving_avg_reward=0.975, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  72%|███████▏  | 327/452 [54:45<20:56,  0.10it/s, v_num=5dwa, train_loss=0.000501, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  73%|███████▎  | 328/452 [54:56<20:46,  0.10it/s, v_num=5dwa, train_loss=0.00303, moving_avg_reward=0.975, reward=0.973] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  73%|███████▎  | 329/452 [55:04<20:35,  0.10it/s, v_num=5dwa, train_loss=0.00461, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  73%|███████▎  | 330/452 [55:14<20:25,  0.10it/s, v_num=5dwa, train_loss=0.005, moving_avg_reward=0.975, reward=0.978]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  73%|███████▎  | 331/452 [55:28<20:16,  0.10it/s, v_num=5dwa, train_loss=0.00128, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  73%|███████▎  | 332/452 [55:38<20:06,  0.10it/s, v_num=5dwa, train_loss=0.0133, moving_avg_reward=0.975, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  74%|███████▎  | 333/452 [55:58<20:00,  0.10it/s, v_num=5dwa, train_loss=-0.108, moving_avg_reward=0.975, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  74%|███████▍  | 334/452 [56:08<19:50,  0.10it/s, v_num=5dwa, train_loss=0.00228, moving_avg_reward=0.975, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  74%|███████▍  | 335/452 [56:17<19:39,  0.10it/s, v_num=5dwa, train_loss=0.0042, moving_avg_reward=0.975, reward=0.973] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  74%|███████▍  | 336/452 [56:26<19:29,  0.10it/s, v_num=5dwa, train_loss=0.00139, moving_avg_reward=0.975, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  75%|███████▍  | 337/452 [56:38<19:19,  0.10it/s, v_num=5dwa, train_loss=0.020, moving_avg_reward=0.975, reward=0.969]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  75%|███████▍  | 338/452 [56:50<19:10,  0.10it/s, v_num=5dwa, train_loss=0.0033, moving_avg_reward=0.975, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  75%|███████▌  | 339/452 [57:00<19:00,  0.10it/s, v_num=5dwa, train_loss=0.00641, moving_avg_reward=0.975, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  75%|███████▌  | 340/452 [57:08<18:49,  0.10it/s, v_num=5dwa, train_loss=0.0149, moving_avg_reward=0.975, reward=0.980] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  75%|███████▌  | 341/452 [57:18<18:39,  0.10it/s, v_num=5dwa, train_loss=0.0105, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  76%|███████▌  | 342/452 [57:30<18:29,  0.10it/s, v_num=5dwa, train_loss=0.00302, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  76%|███████▌  | 343/452 [57:39<18:19,  0.10it/s, v_num=5dwa, train_loss=-0.0126, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  76%|███████▌  | 344/452 [57:48<18:08,  0.10it/s, v_num=5dwa, train_loss=0.0097, moving_avg_reward=0.975, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  76%|███████▋  | 345/452 [57:57<17:58,  0.10it/s, v_num=5dwa, train_loss=0.00316, moving_avg_reward=0.975, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  77%|███████▋  | 346/452 [58:05<17:47,  0.10it/s, v_num=5dwa, train_loss=-0.0024, moving_avg_reward=0.975, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  77%|███████▋  | 347/452 [58:16<17:37,  0.10it/s, v_num=5dwa, train_loss=0.00673, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  77%|███████▋  | 348/452 [58:26<17:27,  0.10it/s, v_num=5dwa, train_loss=0.0037, moving_avg_reward=0.975, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  77%|███████▋  | 349/452 [58:33<17:17,  0.10it/s, v_num=5dwa, train_loss=0.0145, moving_avg_reward=0.975, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  77%|███████▋  | 350/452 [58:42<17:06,  0.10it/s, v_num=5dwa, train_loss=-0.000705, moving_avg_reward=0.975, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  78%|███████▊  | 351/452 [58:56<16:57,  0.10it/s, v_num=5dwa, train_loss=-0.000675, moving_avg_reward=0.975, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  78%|███████▊  | 352/452 [59:04<16:47,  0.10it/s, v_num=5dwa, train_loss=0.00382, moving_avg_reward=0.975, reward=0.977]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  78%|███████▊  | 353/452 [59:13<16:36,  0.10it/s, v_num=5dwa, train_loss=0.00989, moving_avg_reward=0.976, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  78%|███████▊  | 354/452 [59:24<16:26,  0.10it/s, v_num=5dwa, train_loss=0.0159, moving_avg_reward=0.975, reward=0.953] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  79%|███████▊  | 355/452 [59:31<16:15,  0.10it/s, v_num=5dwa, train_loss=0.00628, moving_avg_reward=0.975, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  79%|███████▉  | 356/452 [59:44<16:06,  0.10it/s, v_num=5dwa, train_loss=-0.0046, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  79%|███████▉  | 357/452 [59:57<15:57,  0.10it/s, v_num=5dwa, train_loss=0.0161, moving_avg_reward=0.975, reward=0.975] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  79%|███████▉  | 358/452 [1:00:06<15:46,  0.10it/s, v_num=5dwa, train_loss=0.00197, moving_avg_reward=0.975, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  79%|███████▉  | 359/452 [1:00:13<15:36,  0.10it/s, v_num=5dwa, train_loss=0.00191, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  80%|███████▉  | 360/452 [1:00:24<15:26,  0.10it/s, v_num=5dwa, train_loss=0.00445, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  80%|███████▉  | 361/452 [1:00:32<15:15,  0.10it/s, v_num=5dwa, train_loss=0.00233, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  80%|████████  | 362/452 [1:00:41<15:05,  0.10it/s, v_num=5dwa, train_loss=0.0214, moving_avg_reward=0.975, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  80%|████████  | 363/452 [1:00:48<14:54,  0.10it/s, v_num=5dwa, train_loss=0.00614, moving_avg_reward=0.975, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  81%|████████  | 364/452 [1:00:57<14:44,  0.10it/s, v_num=5dwa, train_loss=-0.00203, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  81%|████████  | 365/452 [1:01:09<14:34,  0.10it/s, v_num=5dwa, train_loss=0.0043, moving_avg_reward=0.976, reward=0.976]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  81%|████████  | 366/452 [1:01:17<14:24,  0.10it/s, v_num=5dwa, train_loss=0.00391, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  81%|████████  | 367/452 [1:01:24<14:13,  0.10it/s, v_num=5dwa, train_loss=-0.00202, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  81%|████████▏ | 368/452 [1:01:31<14:02,  0.10it/s, v_num=5dwa, train_loss=0.00618, moving_avg_reward=0.976, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  82%|████████▏ | 369/452 [1:01:42<13:52,  0.10it/s, v_num=5dwa, train_loss=0.00413, moving_avg_reward=0.976, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  82%|████████▏ | 370/452 [1:01:49<13:42,  0.10it/s, v_num=5dwa, train_loss=-0.02, moving_avg_reward=0.976, reward=0.977]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  82%|████████▏ | 371/452 [1:01:58<13:31,  0.10it/s, v_num=5dwa, train_loss=0.0176, moving_avg_reward=0.976, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  82%|████████▏ | 372/452 [1:02:07<13:21,  0.10it/s, v_num=5dwa, train_loss=0.0103, moving_avg_reward=0.976, reward=0.983]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  83%|████████▎ | 373/452 [1:02:17<13:11,  0.10it/s, v_num=5dwa, train_loss=0.0159, moving_avg_reward=0.976, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  83%|████████▎ | 374/452 [1:02:24<13:00,  0.10it/s, v_num=5dwa, train_loss=-0.0143, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  83%|████████▎ | 375/452 [1:02:36<12:51,  0.10it/s, v_num=5dwa, train_loss=0.00837, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  83%|████████▎ | 376/452 [1:02:48<12:41,  0.10it/s, v_num=5dwa, train_loss=-0.000119, moving_avg_reward=0.976, reward=0.970]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  83%|████████▎ | 377/452 [1:02:56<12:31,  0.10it/s, v_num=5dwa, train_loss=0.0105, moving_avg_reward=0.976, reward=0.979]   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  84%|████████▎ | 378/452 [1:03:05<12:21,  0.10it/s, v_num=5dwa, train_loss=0.0115, moving_avg_reward=0.976, reward=0.967]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  84%|████████▍ | 379/452 [1:03:16<12:11,  0.10it/s, v_num=5dwa, train_loss=0.000136, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  84%|████████▍ | 380/452 [1:03:26<12:01,  0.10it/s, v_num=5dwa, train_loss=0.00712, moving_avg_reward=0.975, reward=0.955] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  84%|████████▍ | 381/452 [1:03:37<11:51,  0.10it/s, v_num=5dwa, train_loss=7.43e-5, moving_avg_reward=0.975, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  85%|████████▍ | 382/452 [1:03:46<11:41,  0.10it/s, v_num=5dwa, train_loss=0.00662, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  85%|████████▍ | 383/452 [1:03:55<11:30,  0.10it/s, v_num=5dwa, train_loss=0.0141, moving_avg_reward=0.975, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  85%|████████▍ | 384/452 [1:04:05<11:20,  0.10it/s, v_num=5dwa, train_loss=0.00931, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  85%|████████▌ | 385/452 [1:04:15<11:10,  0.10it/s, v_num=5dwa, train_loss=0.00232, moving_avg_reward=0.975, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  85%|████████▌ | 386/452 [1:04:24<11:00,  0.10it/s, v_num=5dwa, train_loss=0.00144, moving_avg_reward=0.975, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  86%|████████▌ | 387/452 [1:04:32<10:50,  0.10it/s, v_num=5dwa, train_loss=0.00503, moving_avg_reward=0.975, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  86%|████████▌ | 388/452 [1:04:42<10:40,  0.10it/s, v_num=5dwa, train_loss=0.0122, moving_avg_reward=0.976, reward=0.978] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  86%|████████▌ | 389/452 [1:04:49<10:29,  0.10it/s, v_num=5dwa, train_loss=0.00435, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  86%|████████▋ | 390/452 [1:04:58<10:19,  0.10it/s, v_num=5dwa, train_loss=0.00533, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  87%|████████▋ | 391/452 [1:05:06<10:09,  0.10it/s, v_num=5dwa, train_loss=0.00293, moving_avg_reward=0.976, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  87%|████████▋ | 392/452 [1:05:18<09:59,  0.10it/s, v_num=5dwa, train_loss=0.0127, moving_avg_reward=0.976, reward=0.980] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  87%|████████▋ | 393/452 [1:05:30<09:50,  0.10it/s, v_num=5dwa, train_loss=0.00737, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  87%|████████▋ | 394/452 [1:05:39<09:39,  0.10it/s, v_num=5dwa, train_loss=0.0024, moving_avg_reward=0.976, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  87%|████████▋ | 395/452 [1:05:50<09:30,  0.10it/s, v_num=5dwa, train_loss=-0.00554, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  88%|████████▊ | 396/452 [1:05:59<09:19,  0.10it/s, v_num=5dwa, train_loss=0.00606, moving_avg_reward=0.976, reward=0.975] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  88%|████████▊ | 397/452 [1:06:06<09:09,  0.10it/s, v_num=5dwa, train_loss=0.00761, moving_avg_reward=0.976, reward=0.973]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  88%|████████▊ | 398/452 [1:06:15<08:59,  0.10it/s, v_num=5dwa, train_loss=0.0135, moving_avg_reward=0.976, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  88%|████████▊ | 399/452 [1:06:22<08:49,  0.10it/s, v_num=5dwa, train_loss=0.00893, moving_avg_reward=0.976, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  88%|████████▊ | 400/452 [1:06:33<08:39,  0.10it/s, v_num=5dwa, train_loss=0.00359, moving_avg_reward=0.976, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  89%|████████▊ | 401/452 [1:06:43<08:29,  0.10it/s, v_num=5dwa, train_loss=0.00163, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  89%|████████▉ | 402/452 [1:06:51<08:19,  0.10it/s, v_num=5dwa, train_loss=0.0106, moving_avg_reward=0.976, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  89%|████████▉ | 403/452 [1:07:00<08:08,  0.10it/s, v_num=5dwa, train_loss=0.00392, moving_avg_reward=0.976, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  89%|████████▉ | 404/452 [1:07:11<07:58,  0.10it/s, v_num=5dwa, train_loss=0.00405, moving_avg_reward=0.976, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  90%|████████▉ | 405/452 [1:07:19<07:48,  0.10it/s, v_num=5dwa, train_loss=-0.00134, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  90%|████████▉ | 406/452 [1:07:29<07:38,  0.10it/s, v_num=5dwa, train_loss=0.0147, moving_avg_reward=0.976, reward=0.977]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  90%|█████████ | 407/452 [1:07:39<07:28,  0.10it/s, v_num=5dwa, train_loss=-0.0188, moving_avg_reward=0.976, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  90%|█████████ | 408/452 [1:07:48<07:18,  0.10it/s, v_num=5dwa, train_loss=0.00165, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  90%|█████████ | 409/452 [1:07:55<07:08,  0.10it/s, v_num=5dwa, train_loss=0.00154, moving_avg_reward=0.976, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  91%|█████████ | 410/452 [1:08:04<06:58,  0.10it/s, v_num=5dwa, train_loss=0.000983, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  91%|█████████ | 411/452 [1:08:13<06:48,  0.10it/s, v_num=5dwa, train_loss=0.000856, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  91%|█████████ | 412/452 [1:08:24<06:38,  0.10it/s, v_num=5dwa, train_loss=0.0178, moving_avg_reward=0.976, reward=0.980]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  91%|█████████▏| 413/452 [1:08:35<06:28,  0.10it/s, v_num=5dwa, train_loss=-0.0113, moving_avg_reward=0.976, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  92%|█████████▏| 414/452 [1:08:47<06:18,  0.10it/s, v_num=5dwa, train_loss=0.00233, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  92%|█████████▏| 415/452 [1:08:54<06:08,  0.10it/s, v_num=5dwa, train_loss=-0.00974, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  92%|█████████▏| 416/452 [1:09:04<05:58,  0.10it/s, v_num=5dwa, train_loss=0.0136, moving_avg_reward=0.976, reward=0.972]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  92%|█████████▏| 417/452 [1:09:14<05:48,  0.10it/s, v_num=5dwa, train_loss=0.00889, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  92%|█████████▏| 418/452 [1:09:23<05:38,  0.10it/s, v_num=5dwa, train_loss=0.000384, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  93%|█████████▎| 419/452 [1:09:35<05:28,  0.10it/s, v_num=5dwa, train_loss=0.00809, moving_avg_reward=0.976, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  93%|█████████▎| 420/452 [1:09:43<05:18,  0.10it/s, v_num=5dwa, train_loss=0.00627, moving_avg_reward=0.976, reward=0.974]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  93%|█████████▎| 421/452 [1:09:53<05:08,  0.10it/s, v_num=5dwa, train_loss=0.00305, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  93%|█████████▎| 422/452 [1:10:02<04:58,  0.10it/s, v_num=5dwa, train_loss=0.00675, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  94%|█████████▎| 423/452 [1:10:10<04:48,  0.10it/s, v_num=5dwa, train_loss=-0.00229, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  94%|█████████▍| 424/452 [1:10:20<04:38,  0.10it/s, v_num=5dwa, train_loss=0.017, moving_avg_reward=0.976, reward=0.981]   

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  94%|█████████▍| 425/452 [1:10:32<04:28,  0.10it/s, v_num=5dwa, train_loss=0.00943, moving_avg_reward=0.976, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  94%|█████████▍| 426/452 [1:10:41<04:18,  0.10it/s, v_num=5dwa, train_loss=-0.0611, moving_avg_reward=0.976, reward=0.966]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  94%|█████████▍| 427/452 [1:10:49<04:08,  0.10it/s, v_num=5dwa, train_loss=-0.00255, moving_avg_reward=0.976, reward=0.971]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  95%|█████████▍| 428/452 [1:11:01<03:58,  0.10it/s, v_num=5dwa, train_loss=0.0102, moving_avg_reward=0.976, reward=0.979]  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  95%|█████████▍| 429/452 [1:11:10<03:48,  0.10it/s, v_num=5dwa, train_loss=0.0104, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  95%|█████████▌| 430/452 [1:11:18<03:38,  0.10it/s, v_num=5dwa, train_loss=0.00842, moving_avg_reward=0.976, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  95%|█████████▌| 431/452 [1:11:27<03:28,  0.10it/s, v_num=5dwa, train_loss=0.000218, moving_avg_reward=0.976, reward=0.972]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  96%|█████████▌| 432/452 [1:11:35<03:18,  0.10it/s, v_num=5dwa, train_loss=0.00998, moving_avg_reward=0.976, reward=0.979] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  96%|█████████▌| 433/452 [1:11:43<03:08,  0.10it/s, v_num=5dwa, train_loss=0.00193, moving_avg_reward=0.976, reward=0.980]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  96%|█████████▌| 434/452 [1:11:53<02:58,  0.10it/s, v_num=5dwa, train_loss=0.000779, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  96%|█████████▌| 435/452 [1:12:02<02:48,  0.10it/s, v_num=5dwa, train_loss=0.00199, moving_avg_reward=0.976, reward=0.971] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  96%|█████████▋| 436/452 [1:12:11<02:38,  0.10it/s, v_num=5dwa, train_loss=0.00878, moving_avg_reward=0.976, reward=0.982]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  97%|█████████▋| 437/452 [1:12:19<02:28,  0.10it/s, v_num=5dwa, train_loss=0.00283, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  97%|█████████▋| 438/452 [1:12:29<02:19,  0.10it/s, v_num=5dwa, train_loss=0.0139, moving_avg_reward=0.976, reward=0.980] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  97%|█████████▋| 439/452 [1:12:40<02:09,  0.10it/s, v_num=5dwa, train_loss=0.00195, moving_avg_reward=0.976, reward=0.968]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  97%|█████████▋| 440/452 [1:12:50<01:59,  0.10it/s, v_num=5dwa, train_loss=0.0106, moving_avg_reward=0.976, reward=0.977] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  98%|█████████▊| 441/452 [1:13:01<01:49,  0.10it/s, v_num=5dwa, train_loss=0.00811, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  98%|█████████▊| 442/452 [1:13:10<01:39,  0.10it/s, v_num=5dwa, train_loss=0.00659, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  98%|█████████▊| 443/452 [1:13:17<01:29,  0.10it/s, v_num=5dwa, train_loss=0.00574, moving_avg_reward=0.976, reward=0.976]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  98%|█████████▊| 444/452 [1:13:29<01:19,  0.10it/s, v_num=5dwa, train_loss=0.0061, moving_avg_reward=0.976, reward=0.976] 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  98%|█████████▊| 445/452 [1:13:38<01:09,  0.10it/s, v_num=5dwa, train_loss=0.00196, moving_avg_reward=0.976, reward=0.978]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  99%|█████████▊| 446/452 [1:13:47<00:59,  0.10it/s, v_num=5dwa, train_loss=0.00336, moving_avg_reward=0.976, reward=0.977]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  99%|█████████▉| 447/452 [1:13:56<00:49,  0.10it/s, v_num=5dwa, train_loss=0.00828, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  99%|█████████▉| 448/452 [1:14:05<00:39,  0.10it/s, v_num=5dwa, train_loss=0.00709, moving_avg_reward=0.976, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2:  99%|█████████▉| 449/452 [1:14:15<00:29,  0.10it/s, v_num=5dwa, train_loss=0.00202, moving_avg_reward=0.976, reward=0.979]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2: 100%|█████████▉| 450/452 [1:14:23<00:19,  0.10it/s, v_num=5dwa, train_loss=0.00263, moving_avg_reward=0.976, reward=0.981]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2: 100%|█████████▉| 451/452 [1:14:31<00:09,  0.10it/s, v_num=5dwa, train_loss=0.00232, moving_avg_reward=0.976, reward=0.975]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Epoch 2: 100%|██████████| 452/452 [1:14:35<00:00,  0.10it/s, v_num=5dwa, train_loss=-0.000108, moving_avg_reward=0.976, reward=0.978]

`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch 2: 100%|██████████| 452/452 [1:14:38<00:00,  0.10it/s, v_num=5dwa, train_loss=-0.000108, moving_avg_reward=0.976, reward=0.978]
