In [None]:
!pip install -U accelerate
!pip install -U transformers
!pip install --upgrade huggingface_hub
!pip install evaluate

In [None]:
from huggingface_hub import login
login()

In [None]:
from google.colab import files

uploaded = files.upload()
fnames = []
for fn in uploaded.keys():
  fnames.append(fn)
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
print("nosql_injection_dataset.json")

In [None]:
import gc
import torch
torch.cuda.empty_cache()
gc.collect()

In [None]:
import torch
import accelerate
from transformers import AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
import json

# Define the dataset class
import torch
from torch.utils.data import Dataset, TensorDataset

import torch
from torch.utils.data import Dataset

class NoSQLInjectionDataset(Dataset):
    def __init__(self, dataset, tokenizer, split_ratio=0.9):
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.split_ratio = split_ratio
        self.train_dataset = self.dataset[:int(len(self.dataset) * self.split_ratio)]
        self.eval_dataset = self.dataset[int(len(self.dataset) * self.split_ratio):]

    def __len__(self):
        return len(self.train_dataset)

    def __getitem__(self, idx):
        sample = self.train_dataset[idx]
        template = sample["template"]
        payload = sample["payload"]

        # Tokenize template and payload
        tokenized_inputs = self.tokenizer.encode_plus(
            template,
            payload,
            truncation=True,
            padding="max_length",
            max_length=512,
            return_tensors="pt"
        )

        input_ids = tokenized_inputs["input_ids"].squeeze()
        attention_mask = tokenized_inputs["attention_mask"].squeeze()

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": input_ids
        }

    def get_eval_dataset(self):
        eval_dataset = []
        for sample in self.eval_dataset:
            template = sample["template"]
            payload = sample["payload"]

            tokenized_inputs = self.tokenizer.encode_plus(
                template,
                payload,
                truncation=True,
                padding="max_length",
                max_length=512,
                return_tensors="pt"
            )

            input_ids = tokenized_inputs["input_ids"].squeeze()
            attention_mask = tokenized_inputs["attention_mask"].squeeze()

            eval_dataset.append({
                "input_ids": input_ids,
                "attention_mask": attention_mask,
                "labels": input_ids
            })

        return eval_dataset
with open("nosql_injection_dataset.json", "r") as f:
    dataset = json.load(f)
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
# Load the tokenizer and model
model = AutoModelForMaskedLM.from_pretrained("distilroberta-base")
# Convert the dataset into a PyTorch Dataset
train_dataset = NoSQLInjectionDataset(dataset, tokenizer)
eval_dataset = train_dataset.get_eval_dataset()
# Set up the training arguments and trainer
training_args = TrainingArguments(
    output_dir="fine-tuned-distilroberta-nosql-injection",
    evaluation_strategy="epoch",
    num_train_epochs=75, 
    per_device_train_batch_size=8,
    save_steps=1000, 
    logging_steps=100, # used to adjust the frequency of logging loss values
    logging_dir="./logs",
    overwrite_output_dir=True,
    learning_rate=2e-5,  # Adjust as needed
    warmup_steps=100,  # Adjust as needed
    weight_decay=0.01,  # Adjust as needed
    push_to_hub=True
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model

# Push the fine-tuned model to the Hugging Face Model Hub
trainer.push_to_hub("fine-tuned-distilroberta-nosql-injection")
tokenizer.push_to_hub("fine-tuned-distilroberta-nosql-injection")
print("Fine-tuned model saved and pushed to the Hugging Face Model Hub.")

In [None]:
trainer.push_to_hub("fine-tuned-distilroberta-nosql-injection")
tokenizer.push_to_hub("fine-tuned-distilroberta-nosql-injection")
print("Fine-tuned model saved and pushed to the Hugging Face Model Hub.")

In [None]:
import math

eval_results = trainer.evaluate()
print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
trainer.push_to_hub()
tokenizer.push_to_hub()

In [None]:
from transformers import pipeline

mask_filler = pipeline("fill-mask", "ankush-003/fine-tuned-distilroberta-nosql-injection")

mask_filler("{username: <mask>, password: anksh}", top_k=3)

In [None]:
!tensorboard --logdir=logs

## Trying roberta

In [1]:
from transformers import AutoTokenizer, RobertaForMaskedLM, DataCollatorForLanguageModeling
import torch
import accelerate
from transformers import Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
import json

class NoSQLInjectionDataset(Dataset):
    def __init__(self, dataset, tokenizer, split_ratio=0.9):
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.split_ratio = split_ratio
        self.train_dataset = self.dataset[:int(len(self.dataset) * self.split_ratio)]
        self.eval_dataset = self.dataset[int(len(self.dataset) * self.split_ratio):]

    def __len__(self):
        return len(self.train_dataset)

    def __getitem__(self, idx):
        sample = self.train_dataset[idx]
        template = sample["template"]
        payload = sample["payload"]

        # Tokenize template and payload
        tokenized_inputs = self.tokenizer.encode_plus(
            template,
            payload,
            truncation=True,
            padding="max_length",
            max_length=512,
            return_tensors="pt"
        )

        input_ids = tokenized_inputs["input_ids"].squeeze()
        attention_mask = tokenized_inputs["attention_mask"].squeeze()

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": input_ids
        }

    def get_eval_dataset(self):
        eval_dataset = []
        for sample in self.eval_dataset:
            template = sample["template"]
            payload = sample["payload"]

            tokenized_inputs = self.tokenizer.encode_plus(
                template,
                payload,
                truncation=True,
                padding="max_length",
                max_length=512,
                return_tensors="pt"
            )

            input_ids = tokenized_inputs["input_ids"].squeeze()
            attention_mask = tokenized_inputs["attention_mask"].squeeze()

            eval_dataset.append({
                "input_ids": input_ids,
                "attention_mask": attention_mask,
                "labels": input_ids
            })

        return eval_dataset
with open("nosql_injection_dataset.json", "r") as f:
    dataset = json.load(f)

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)

train_dataset = NoSQLInjectionDataset(dataset, tokenizer)
eval_dataset = train_dataset.get_eval_dataset()

model = RobertaForMaskedLM.from_pretrained("roberta-base")

# Set up the training arguments and trainer
training_args = TrainingArguments(
    output_dir="fine-tuned-roberta-nosql-injection",
    evaluation_strategy="epoch",
    num_train_epochs=75, 
    per_device_train_batch_size=4,
    save_steps=1000, 
    logging_steps=100, # used to adjust the frequency of logging loss values
    logging_dir="./logs",
    overwrite_output_dir=True,
    learning_rate=2e-5,  # Adjust as needed
    warmup_steps=100,  # Adjust as needed
    weight_decay=0.01,  # Adjust as needed
    push_to_hub=True
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model

# Push the fine-tuned model to the Hugging Face Model Hub
trainer.push_to_hub("fine-tuned-roberta-nosql-injection")
tokenizer.push_to_hub("fine-tuned-roberta-nosql-injection")
print("Fine-tuned model saved and pushed to the Hugging Face Model Hub.")

2023-07-18 19:31:39.146214: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
/home/gariman/Downloads/angus/fine-tuned-roberta-nosql-injection is already a clone of https://huggingface.co/ankush-003/fine-tuned-roberta-nosql-injection. Make sure you pull the latest changes with `repo.git_pull()`.


  0%|          | 0/11850 [00:00<?, ?it/s]

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 1.2572, 'learning_rate': 2e-05, 'epoch': 0.63}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.22349603474140167, 'eval_runtime': 2.0259, 'eval_samples_per_second': 34.553, 'eval_steps_per_second': 4.442, 'epoch': 1.0}
{'loss': 0.3488, 'learning_rate': 1.9829787234042554e-05, 'epoch': 1.27}
{'loss': 0.1175, 'learning_rate': 1.9659574468085107e-05, 'epoch': 1.9}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.03252168744802475, 'eval_runtime': 2.0606, 'eval_samples_per_second': 33.971, 'eval_steps_per_second': 4.368, 'epoch': 2.0}
{'loss': 0.0454, 'learning_rate': 1.948936170212766e-05, 'epoch': 2.53}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.10793640464544296, 'eval_runtime': 2.0654, 'eval_samples_per_second': 33.893, 'eval_steps_per_second': 4.358, 'epoch': 3.0}
{'loss': 0.0492, 'learning_rate': 1.9319148936170213e-05, 'epoch': 3.16}
{'loss': 0.05, 'learning_rate': 1.914893617021277e-05, 'epoch': 3.8}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.02118230238556862, 'eval_runtime': 2.0376, 'eval_samples_per_second': 34.353, 'eval_steps_per_second': 4.417, 'epoch': 4.0}
{'loss': 0.0677, 'learning_rate': 1.8978723404255322e-05, 'epoch': 4.43}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.07134803384542465, 'eval_runtime': 2.0445, 'eval_samples_per_second': 34.239, 'eval_steps_per_second': 4.402, 'epoch': 5.0}
{'loss': 0.0496, 'learning_rate': 1.8808510638297875e-05, 'epoch': 5.06}
{'loss': 0.0821, 'learning_rate': 1.8638297872340427e-05, 'epoch': 5.7}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.0007128362194634974, 'eval_runtime': 2.0564, 'eval_samples_per_second': 34.04, 'eval_steps_per_second': 4.377, 'epoch': 6.0}
{'loss': 0.0536, 'learning_rate': 1.846808510638298e-05, 'epoch': 6.33}
{'loss': 0.0259, 'learning_rate': 1.8297872340425533e-05, 'epoch': 6.96}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.027743149548768997, 'eval_runtime': 2.036, 'eval_samples_per_second': 34.381, 'eval_steps_per_second': 4.42, 'epoch': 7.0}
{'loss': 0.0422, 'learning_rate': 1.8127659574468086e-05, 'epoch': 7.59}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.006808724254369736, 'eval_runtime': 2.0425, 'eval_samples_per_second': 34.272, 'eval_steps_per_second': 4.406, 'epoch': 8.0}
{'loss': 0.0547, 'learning_rate': 1.795744680851064e-05, 'epoch': 8.23}
{'loss': 0.0282, 'learning_rate': 1.778723404255319e-05, 'epoch': 8.86}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.049164772033691406, 'eval_runtime': 2.0402, 'eval_samples_per_second': 34.311, 'eval_steps_per_second': 4.411, 'epoch': 9.0}
{'loss': 0.0273, 'learning_rate': 1.7617021276595748e-05, 'epoch': 9.49}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.0008200591546483338, 'eval_runtime': 2.0565, 'eval_samples_per_second': 34.039, 'eval_steps_per_second': 4.376, 'epoch': 10.0}
{'loss': 0.04, 'learning_rate': 1.74468085106383e-05, 'epoch': 10.13}
{'loss': 0.0272, 'learning_rate': 1.7276595744680853e-05, 'epoch': 10.76}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.025606831535696983, 'eval_runtime': 2.056, 'eval_samples_per_second': 34.047, 'eval_steps_per_second': 4.378, 'epoch': 11.0}
{'loss': 0.0859, 'learning_rate': 1.7106382978723406e-05, 'epoch': 11.39}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 2.8666681828326546e-05, 'eval_runtime': 2.0935, 'eval_samples_per_second': 33.437, 'eval_steps_per_second': 4.299, 'epoch': 12.0}
{'loss': 0.0154, 'learning_rate': 1.693617021276596e-05, 'epoch': 12.03}
{'loss': 0.0271, 'learning_rate': 1.676595744680851e-05, 'epoch': 12.66}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 6.481639866251498e-05, 'eval_runtime': 2.0038, 'eval_samples_per_second': 34.934, 'eval_steps_per_second': 4.492, 'epoch': 13.0}
{'loss': 0.0583, 'learning_rate': 1.6595744680851064e-05, 'epoch': 13.29}
{'loss': 0.0058, 'learning_rate': 1.6425531914893617e-05, 'epoch': 13.92}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.05833156406879425, 'eval_runtime': 2.0264, 'eval_samples_per_second': 34.544, 'eval_steps_per_second': 4.441, 'epoch': 14.0}
{'loss': 0.0121, 'learning_rate': 1.625531914893617e-05, 'epoch': 14.56}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.025702187791466713, 'eval_runtime': 2.0421, 'eval_samples_per_second': 34.279, 'eval_steps_per_second': 4.407, 'epoch': 15.0}
{'loss': 0.0802, 'learning_rate': 1.6085106382978726e-05, 'epoch': 15.19}
{'loss': 0.0189, 'learning_rate': 1.591489361702128e-05, 'epoch': 15.82}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.06313808262348175, 'eval_runtime': 2.0652, 'eval_samples_per_second': 33.895, 'eval_steps_per_second': 4.358, 'epoch': 16.0}
{'loss': 0.0275, 'learning_rate': 1.5744680851063832e-05, 'epoch': 16.46}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.018600421026349068, 'eval_runtime': 2.038, 'eval_samples_per_second': 34.347, 'eval_steps_per_second': 4.416, 'epoch': 17.0}
{'loss': 0.0349, 'learning_rate': 1.5574468085106385e-05, 'epoch': 17.09}
{'loss': 0.006, 'learning_rate': 1.5404255319148937e-05, 'epoch': 17.72}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.0026994061190634966, 'eval_runtime': 2.0051, 'eval_samples_per_second': 34.912, 'eval_steps_per_second': 4.489, 'epoch': 18.0}
{'loss': 0.0905, 'learning_rate': 1.523404255319149e-05, 'epoch': 18.35}
{'loss': 0.025, 'learning_rate': 1.5063829787234043e-05, 'epoch': 18.99}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.03487260639667511, 'eval_runtime': 2.0635, 'eval_samples_per_second': 33.923, 'eval_steps_per_second': 4.362, 'epoch': 19.0}
{'loss': 0.0377, 'learning_rate': 1.4893617021276596e-05, 'epoch': 19.62}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.000436119589721784, 'eval_runtime': 2.0492, 'eval_samples_per_second': 34.159, 'eval_steps_per_second': 4.392, 'epoch': 20.0}
{'loss': 0.029, 'learning_rate': 1.4723404255319149e-05, 'epoch': 20.25}
{'loss': 0.0108, 'learning_rate': 1.4553191489361705e-05, 'epoch': 20.89}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.009069104678928852, 'eval_runtime': 2.048, 'eval_samples_per_second': 34.179, 'eval_steps_per_second': 4.394, 'epoch': 21.0}
{'loss': 0.0233, 'learning_rate': 1.4382978723404258e-05, 'epoch': 21.52}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.07717707753181458, 'eval_runtime': 2.0443, 'eval_samples_per_second': 34.242, 'eval_steps_per_second': 4.403, 'epoch': 22.0}
{'loss': 0.0225, 'learning_rate': 1.421276595744681e-05, 'epoch': 22.15}
{'loss': 0.0216, 'learning_rate': 1.4042553191489363e-05, 'epoch': 22.78}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 3.670170599434641e-06, 'eval_runtime': 2.0413, 'eval_samples_per_second': 34.292, 'eval_steps_per_second': 4.409, 'epoch': 23.0}
{'loss': 0.0255, 'learning_rate': 1.3872340425531916e-05, 'epoch': 23.42}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.060653094202280045, 'eval_runtime': 2.0236, 'eval_samples_per_second': 34.592, 'eval_steps_per_second': 4.448, 'epoch': 24.0}
{'loss': 0.0149, 'learning_rate': 1.3702127659574469e-05, 'epoch': 24.05}
{'loss': 0.0211, 'learning_rate': 1.3531914893617022e-05, 'epoch': 24.68}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.025050142779946327, 'eval_runtime': 2.0558, 'eval_samples_per_second': 34.049, 'eval_steps_per_second': 4.378, 'epoch': 25.0}
{'loss': 0.0019, 'learning_rate': 1.3361702127659574e-05, 'epoch': 25.32}
{'loss': 0.037, 'learning_rate': 1.3191489361702127e-05, 'epoch': 25.95}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.02225312776863575, 'eval_runtime': 2.0397, 'eval_samples_per_second': 34.319, 'eval_steps_per_second': 4.412, 'epoch': 26.0}
{'loss': 0.0057, 'learning_rate': 1.3021276595744683e-05, 'epoch': 26.58}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.037502843886613846, 'eval_runtime': 2.0442, 'eval_samples_per_second': 34.243, 'eval_steps_per_second': 4.403, 'epoch': 27.0}
{'loss': 0.0082, 'learning_rate': 1.2851063829787236e-05, 'epoch': 27.22}
{'loss': 0.0464, 'learning_rate': 1.2680851063829789e-05, 'epoch': 27.85}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.06586397439241409, 'eval_runtime': 2.0705, 'eval_samples_per_second': 33.809, 'eval_steps_per_second': 4.347, 'epoch': 28.0}
{'loss': 0.0446, 'learning_rate': 1.2510638297872342e-05, 'epoch': 28.48}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.02349529042840004, 'eval_runtime': 2.0379, 'eval_samples_per_second': 34.35, 'eval_steps_per_second': 4.416, 'epoch': 29.0}
{'loss': 0.0199, 'learning_rate': 1.2340425531914895e-05, 'epoch': 29.11}
{'loss': 0.0453, 'learning_rate': 1.2170212765957448e-05, 'epoch': 29.75}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.02776062674820423, 'eval_runtime': 2.0625, 'eval_samples_per_second': 33.94, 'eval_steps_per_second': 4.364, 'epoch': 30.0}
{'loss': 0.0033, 'learning_rate': 1.2e-05, 'epoch': 30.38}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.041656725108623505, 'eval_runtime': 2.0286, 'eval_samples_per_second': 34.506, 'eval_steps_per_second': 4.437, 'epoch': 31.0}
{'loss': 0.0091, 'learning_rate': 1.1829787234042553e-05, 'epoch': 31.01}
{'loss': 0.0104, 'learning_rate': 1.1659574468085106e-05, 'epoch': 31.65}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.054390206933021545, 'eval_runtime': 2.041, 'eval_samples_per_second': 34.297, 'eval_steps_per_second': 4.41, 'epoch': 32.0}
{'loss': 0.016, 'learning_rate': 1.1489361702127662e-05, 'epoch': 32.28}
{'loss': 0.0084, 'learning_rate': 1.1319148936170215e-05, 'epoch': 32.91}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 1.4113105862634256e-05, 'eval_runtime': 2.0464, 'eval_samples_per_second': 34.206, 'eval_steps_per_second': 4.398, 'epoch': 33.0}
{'loss': 0.0004, 'learning_rate': 1.1148936170212768e-05, 'epoch': 33.54}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.02471860498189926, 'eval_runtime': 1.9992, 'eval_samples_per_second': 35.014, 'eval_steps_per_second': 4.502, 'epoch': 34.0}
{'loss': 0.0136, 'learning_rate': 1.097872340425532e-05, 'epoch': 34.18}
{'loss': 0.0185, 'learning_rate': 1.0808510638297873e-05, 'epoch': 34.81}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.00024850681074894965, 'eval_runtime': 2.0576, 'eval_samples_per_second': 34.02, 'eval_steps_per_second': 4.374, 'epoch': 35.0}
{'loss': 0.0165, 'learning_rate': 1.0638297872340426e-05, 'epoch': 35.44}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 1.9791293937032606e-07, 'eval_runtime': 2.0538, 'eval_samples_per_second': 34.083, 'eval_steps_per_second': 4.382, 'epoch': 36.0}
{'loss': 0.0199, 'learning_rate': 1.0468085106382979e-05, 'epoch': 36.08}
{'loss': 0.0381, 'learning_rate': 1.0297872340425532e-05, 'epoch': 36.71}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 2.9436971544782864e-06, 'eval_runtime': 2.0463, 'eval_samples_per_second': 34.207, 'eval_steps_per_second': 4.398, 'epoch': 37.0}
{'loss': 0.0154, 'learning_rate': 1.0127659574468085e-05, 'epoch': 37.34}
{'loss': 0.0281, 'learning_rate': 9.957446808510639e-06, 'epoch': 37.97}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 2.4415126972598955e-05, 'eval_runtime': 2.0118, 'eval_samples_per_second': 34.795, 'eval_steps_per_second': 4.474, 'epoch': 38.0}
{'loss': 0.006, 'learning_rate': 9.787234042553192e-06, 'epoch': 38.61}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.008495540358126163, 'eval_runtime': 2.0341, 'eval_samples_per_second': 34.414, 'eval_steps_per_second': 4.425, 'epoch': 39.0}
{'loss': 0.0057, 'learning_rate': 9.617021276595745e-06, 'epoch': 39.24}
{'loss': 0.0083, 'learning_rate': 9.446808510638299e-06, 'epoch': 39.87}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 2.0793393673557148e-07, 'eval_runtime': 2.06, 'eval_samples_per_second': 33.98, 'eval_steps_per_second': 4.369, 'epoch': 40.0}
{'loss': 0.0101, 'learning_rate': 9.276595744680852e-06, 'epoch': 40.51}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.0005709591205231845, 'eval_runtime': 2.0664, 'eval_samples_per_second': 33.875, 'eval_steps_per_second': 4.355, 'epoch': 41.0}
{'loss': 0.0226, 'learning_rate': 9.106382978723405e-06, 'epoch': 41.14}
{'loss': 0.0282, 'learning_rate': 8.936170212765958e-06, 'epoch': 41.77}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.00029736990109086037, 'eval_runtime': 2.0278, 'eval_samples_per_second': 34.521, 'eval_steps_per_second': 4.438, 'epoch': 42.0}
{'loss': 0.0202, 'learning_rate': 8.765957446808512e-06, 'epoch': 42.41}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.02051628567278385, 'eval_runtime': 2.0385, 'eval_samples_per_second': 34.339, 'eval_steps_per_second': 4.415, 'epoch': 43.0}
{'loss': 0.0073, 'learning_rate': 8.595744680851065e-06, 'epoch': 43.04}
{'loss': 0.0053, 'learning_rate': 8.425531914893618e-06, 'epoch': 43.67}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.027531065046787262, 'eval_runtime': 2.0518, 'eval_samples_per_second': 34.117, 'eval_steps_per_second': 4.386, 'epoch': 44.0}
{'loss': 0.0086, 'learning_rate': 8.25531914893617e-06, 'epoch': 44.3}
{'loss': 0.0293, 'learning_rate': 8.085106382978723e-06, 'epoch': 44.94}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.04849638044834137, 'eval_runtime': 2.0605, 'eval_samples_per_second': 33.972, 'eval_steps_per_second': 4.368, 'epoch': 45.0}
{'loss': 0.0119, 'learning_rate': 7.914893617021278e-06, 'epoch': 45.57}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 1.8044698663288727e-05, 'eval_runtime': 2.0341, 'eval_samples_per_second': 34.413, 'eval_steps_per_second': 4.424, 'epoch': 46.0}
{'loss': 0.0105, 'learning_rate': 7.74468085106383e-06, 'epoch': 46.2}
{'loss': 0.0045, 'learning_rate': 7.574468085106383e-06, 'epoch': 46.84}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 6.849841582834415e-08, 'eval_runtime': 2.0343, 'eval_samples_per_second': 34.411, 'eval_steps_per_second': 4.424, 'epoch': 47.0}
{'loss': 0.0066, 'learning_rate': 7.404255319148936e-06, 'epoch': 47.47}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.026837799698114395, 'eval_runtime': 2.0117, 'eval_samples_per_second': 34.796, 'eval_steps_per_second': 4.474, 'epoch': 48.0}
{'loss': 0.013, 'learning_rate': 7.234042553191491e-06, 'epoch': 48.1}
{'loss': 0.0191, 'learning_rate': 7.0638297872340434e-06, 'epoch': 48.73}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.010271746665239334, 'eval_runtime': 2.012, 'eval_samples_per_second': 34.791, 'eval_steps_per_second': 4.473, 'epoch': 49.0}
{'loss': 0.0093, 'learning_rate': 6.893617021276596e-06, 'epoch': 49.37}
{'loss': 0.0007, 'learning_rate': 6.723404255319149e-06, 'epoch': 50.0}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.03859660029411316, 'eval_runtime': 2.0115, 'eval_samples_per_second': 34.8, 'eval_steps_per_second': 4.474, 'epoch': 50.0}
{'loss': 0.0072, 'learning_rate': 6.553191489361702e-06, 'epoch': 50.63}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 2.2711220992732706e-07, 'eval_runtime': 1.9864, 'eval_samples_per_second': 35.239, 'eval_steps_per_second': 4.531, 'epoch': 51.0}
{'loss': 0.0116, 'learning_rate': 6.382978723404256e-06, 'epoch': 51.27}
{'loss': 0.0031, 'learning_rate': 6.212765957446809e-06, 'epoch': 51.9}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 2.905081259996223e-07, 'eval_runtime': 1.994, 'eval_samples_per_second': 35.106, 'eval_steps_per_second': 4.514, 'epoch': 52.0}
{'loss': 0.0037, 'learning_rate': 6.042553191489362e-06, 'epoch': 52.53}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.02251076139509678, 'eval_runtime': 2.0131, 'eval_samples_per_second': 34.772, 'eval_steps_per_second': 4.471, 'epoch': 53.0}
{'loss': 0.0146, 'learning_rate': 5.872340425531915e-06, 'epoch': 53.16}
{'loss': 0.0135, 'learning_rate': 5.702127659574469e-06, 'epoch': 53.8}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.0002886891888920218, 'eval_runtime': 1.9899, 'eval_samples_per_second': 35.177, 'eval_steps_per_second': 4.523, 'epoch': 54.0}
{'loss': 0.0015, 'learning_rate': 5.531914893617022e-06, 'epoch': 54.43}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.00018878122500609607, 'eval_runtime': 2.0001, 'eval_samples_per_second': 34.999, 'eval_steps_per_second': 4.5, 'epoch': 55.0}
{'loss': 0.0096, 'learning_rate': 5.361702127659575e-06, 'epoch': 55.06}
{'loss': 0.0066, 'learning_rate': 5.191489361702128e-06, 'epoch': 55.7}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.002451034262776375, 'eval_runtime': 2.0049, 'eval_samples_per_second': 34.914, 'eval_steps_per_second': 4.489, 'epoch': 56.0}
{'loss': 0.0095, 'learning_rate': 5.0212765957446805e-06, 'epoch': 56.33}
{'loss': 0.0281, 'learning_rate': 4.851063829787234e-06, 'epoch': 56.96}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.014494474977254868, 'eval_runtime': 1.9895, 'eval_samples_per_second': 35.184, 'eval_steps_per_second': 4.524, 'epoch': 57.0}
{'loss': 0.012, 'learning_rate': 4.680851063829788e-06, 'epoch': 57.59}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 1.6533118696315796e-06, 'eval_runtime': 2.0604, 'eval_samples_per_second': 33.974, 'eval_steps_per_second': 4.368, 'epoch': 58.0}
{'loss': 0.0318, 'learning_rate': 4.5106382978723406e-06, 'epoch': 58.23}
{'loss': 0.0065, 'learning_rate': 4.340425531914894e-06, 'epoch': 58.86}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 1.133670579633872e-07, 'eval_runtime': 2.0522, 'eval_samples_per_second': 34.11, 'eval_steps_per_second': 4.386, 'epoch': 59.0}
{'loss': 0.0054, 'learning_rate': 4.170212765957447e-06, 'epoch': 59.49}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.008161806501448154, 'eval_runtime': 2.0468, 'eval_samples_per_second': 34.2, 'eval_steps_per_second': 4.397, 'epoch': 60.0}
{'loss': 0.0047, 'learning_rate': 4.000000000000001e-06, 'epoch': 60.13}
{'loss': 0.0104, 'learning_rate': 3.8297872340425535e-06, 'epoch': 60.76}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 6.927690066049763e-08, 'eval_runtime': 2.0219, 'eval_samples_per_second': 34.621, 'eval_steps_per_second': 4.451, 'epoch': 61.0}
{'loss': 0.0005, 'learning_rate': 3.6595744680851063e-06, 'epoch': 61.39}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 0.030300242826342583, 'eval_runtime': 2.0196, 'eval_samples_per_second': 34.661, 'eval_steps_per_second': 4.456, 'epoch': 62.0}
{'loss': 0.0158, 'learning_rate': 3.48936170212766e-06, 'epoch': 62.03}
{'loss': 0.005, 'learning_rate': 3.3191489361702127e-06, 'epoch': 62.66}


  0%|          | 0/9 [00:00<?, ?it/s]

{'eval_loss': 5.990250429022126e-08, 'eval_runtime': 2.0842, 'eval_samples_per_second': 33.585, 'eval_steps_per_second': 4.318, 'epoch': 63.0}
{'loss': 0.0001, 'learning_rate': 3.1489361702127664e-06, 'epoch': 63.29}
