In [1]:
# Fine tuning gpt2_medium model and use own data like company profile
#
# See also medium.com blog
# "GPT-2 Fine-Tuning Guide: Building a Chatbot for Your Company Profile"
# https://medium.com/@datatec.studio
#

# Mount google driver
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# Change to google driver folder which contains datasets
# This folder will also be used to save model
# %cd /content/drive/MyDrive/GPT2_Lab_DTS

In [3]:
# Install python packages
# !pip install -r requirements.txt

In [4]:
import os
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
import tqdm
import time
import numpy as np
import sys

In [5]:
# Define environment variable, path of data, model name and device
# os.environ["HF_HOME"] = "/content/huggingface"  # Replace with your desired directory
# print("Please replace it with your hf access token:")
# os.environ["HF_HOME_TOKEN"] = "Please_replace_it_with_your_hf_access_token"

result_dir = 'resources/'
data_file_name = 'Ester'
data_file_path = f'../data/{data_file_name}.txt'

model_name = "gpt2" # gpt2-medium
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(device)

cuda


In [6]:
# Write a python file to google driver
# Sample of json datasets
# You can also directly upload this code to your google driver
# The code write here in this way is for better understanding of whole project
# %%writefile chat_data.py

from torch.utils.data import Dataset
import json

class ChatData(Dataset):
    def __init__(self, path: str, tokenizer):
        with open(path, encoding="utf-8") as f:
            self.data = f.readlines()#json.load(open(path, "r"))

        self.X = []
        for pair in self.data:
            pair = eval(pair)          
            self.X.append(f"<start> {pair['prompt']} <response>: {pair['response']} <end>")
        
        total_samples = len(self.X)  # Calculate the total number of samples
        print("total_samples", total_samples)
        # define samples amount
#         self.X = self.X[:500]
        print("Check the preprocessing for self.X[0]:")
        print(self.X[0])

        self.X_encoded = tokenizer(self.X, return_tensors="pt", max_length=30, padding="max_length", truncation=True)
        self.input_ids = self.X_encoded['input_ids']
        self.attention_mask = self.X_encoded['attention_mask']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_mask[idx]


In [8]:
# Download model, save model and tokernize to harddisk
## prepare tokenizer
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"pad_token": "<pad>",
                              "bos_token": "<start>",
                              "eos_token": "<end>"})

tokenizer.add_tokens(["<response>:"])
with open(data_file_path, encoding="utf-8") as f:
    data = f.readlines()#json.load(open(path, "r"))

for pair in data:
    pair = eval(pair)
#     for token in pair['prompt'].split() + pair['response'].split()
    tokenizer.add_tokens(pair['prompt'].split() + pair['response'].split())
    

## prepare model
### Specify the desired embedding size (must be a multiple of 8)
desired_embedding_size = 50264  # Change this to the desired size
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)
### Resize the embedding layer to the desired size
model.resize_token_embeddings(len(tokenizer), desired_embedding_size)
model = model.to(device)

## save tokenizer and model to harddisk
# tokenizer.save_pretrained(result_dir)
# model.save_pretrained(result_dir)

In [9]:
# ## load model and tokenizer from harddisk
# ### Load the GPT-2 tokenizer
# tokenizer = GPT2Tokenizer.from_pretrained(result_dir)

# ### Load the GPT-2 model from the local folder
# model = GPT2LMHeadModel.from_pretrained(result_dir)
# model.to(device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(100528, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=100528, bias=False)
)

In [18]:
# Define infer and train function
def infer(inp):
    inp = "<start> " + inp + " <response>: "
    inp = tokenizer(inp, return_tensors="pt")
    X = inp["input_ids"].to(device)  # Use .to(device) method to move the tensor to the specified device
    a = inp["attention_mask"].to(device)  # Use .to(device) method here as well

    output = model.generate(X, attention_mask=a, max_length=100, num_return_sequences=1)

    output = tokenizer.decode(output[0])
    output = output[:output.find('<end>')].replace('<pad>', '')
    output = ' '.join(output.split())

    return output

def train(chatData, model, optim):
    
    batches = len(chatData)

    for i, (X, a) in enumerate(chatData):
        X = X.to(device)
        a = a.to(device)
        optim.zero_grad()
        loss = model(input_ids=X, attention_mask=a, labels=X).loss
        loss.backward()
        optim.step()
        if i % 100 == 0:
            print(f'iter {i} out of {batches}')

    print(infer("I created my own branch in our repo"))
    print('========================================================')

# def train(chatData, model, optim):
    
#     batches = len(chatData)

#     for i, (X, a) in tqdm(chatData):
#         X = X.to(device)
#         a = a.to(device)
#         optim.zero_grad()
#         loss = model(input_ids=X, attention_mask=a, labels=X).loss
#         loss.backward()
#         optim.step()
#         if i % 100 == 0:
#             print(f'iter {i} out of {batches}')

#     print(infer("Do we need to bring anything?"))
#     print('========================================================')

In [11]:
# from chat_data import ChatData

#Load ChatData, train model and optimizer
chatData = ChatData(data_file_path, tokenizer)
chatData = DataLoader(chatData, batch_size=1) # batch_size=64

model.train()

optim = Adam(model.parameters(), lr=1e-5)

total_samples 538
Check the preprocessing for self.X[0]:
<start> Thanks Ester! <response>: Ester named the group CSE 151B PA2. <end>


In [12]:
# train 10 times
epochs = 100  # You can adjust the number of epochs as needed
for epoch in range(epochs):
    start = time.time()
    print(f"Epoch {epoch} started")
    train(chatData, model, optim)
    end = time.time()
    print(f"Epoch {epoch} finished in {np.round((end - start) / 60, 2)} minutes")

Epoch 0 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in   in  
Epoch 0 finished in 0.52 minutes
Epoch 1 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the   the  
Epoch 1 finished in 0.51 minutes
Epoch 2 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:  <pad> <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 2 finished in 0.51 minutes
Epoch 3 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:  <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 3 finished in 0.51 minutes
Epoch 4 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   the  <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 4 finished in 0.52 minutes
Epoch 5 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:  <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 5 finished in 0.52 minutes
Epoch 6 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>:   <response>:   <response>: <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 6 finished in 0.51 minutes
Epoch 7 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>: <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 7 finished in 0.51 minutes
Epoch 8 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:    I   can   run   the   code   and   the   code   <response>:   <response>:   <response>:   <response>:   I   can   <response>:   the   code   <response>:   I   can   <response>:   the   code   and   the   code   to   the   code   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   I   can   run   the
Epoch 8 finished in 0.51 minutes
Epoch 9 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:  
Epoch 9 finished in 0.52 minutes
Epoch 10 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:  
Epoch 10 finished in 0.51 minutes
Epoch 11 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:  
Epoch 11 finished in 0.51 minutes
Epoch 12 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>:   I   created   my   own   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   I   created   the   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 12 finished in 0.52 minutes
Epoch 13 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   I   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 13 finished in 0.52 minutes
Epoch 14 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 14 finished in 0.51 minutes
Epoch 15 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:    I   <response>:   I   <response>:   I   <response>:   I   <response>:   I   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 15 finished in 0.51 minutes
Epoch 16 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   I   created   my   own   in   the   report   <response>:   <response>:   I   <response>:   <response>:   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 16 finished in 0.51 minutes
Epoch 17 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <response>:   <response>:   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 17 finished in 0.52 minutes
Epoch 18 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   <response>:   I   created   my   own   <response>:   I   created   my   own   <response>:   <response>:   I   created   my   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 18 finished in 0.52 minutes
Epoch 19 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   a   new   <response>:   I   created   a   new  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 19 finished in 0.51 minutes
Epoch 20 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   code   on   my   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 20 finished in 0.52 minutes
Epoch 21 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   <response>:   I   created   my   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 21 finished in 0.51 minutes
Epoch 22 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   code   on   my   page   <response>:   I   created   a   folder   for   my   branch   and   a   folder   for   my   <response>:   <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 22 finished in 0.51 minutes
Epoch 23 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:    I   created   my   branch   in   our  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 23 finished in 0.51 minutes
Epoch 24 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   code   on   my   table   and   on   my   <response>:   I   created   my   own   code   on   my   branch   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 24 finished in 0.51 minutes
Epoch 25 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   branch   the   code   for   my   branch  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  I   created   my   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 25 finished in 0.51 minutes
Epoch 26 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   created   my   own   in   the  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 26 finished in 0.51 minutes
Epoch 27 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   didn’t   created   my   own   branch   in   our   baseline  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 27 finished in 0.52 minutes
Epoch 28 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   a   doc   that   should   be   updated   <response>:   I   didn’t   created   a   doc   that   should   be   updated  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  768   --n-epochs  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad>
Epoch 28 finished in 0.51 minutes
Epoch 29 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   code   in   our   folder   <response>:   I   created   my   own   code   in   our   folder    <response>:   I   created   my   own   code   in   our  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 29 finished in 0.52 minutes
Epoch 30 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   guys   created   my   own   section   of   ester1   <response>:    I   created   my   own   section   of  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad>
Epoch 30 finished in 0.52 minutes
Epoch 31 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   didn’t   created   my   own   branch   in   our  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  768   --n-epochs   10   --n-epochs   768  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 31 finished in 0.51 minutes
Epoch 32 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   branch   in   our   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 32 finished in 0.51 minutes
Epoch 33 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   branch   in   our   poll.   <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  768   768  
Epoch 33 finished in 0.51 minutes
Epoch 34 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   baseline   I   pushed   the   latest  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 34 finished in 0.52 minutes
Epoch 35 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   didn’t   created   my   own   branch   in   our   poll.     <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 35 finished in 0.51 minutes
Epoch 36 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   didn’t   created   my   own   branch   in   our  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 36 finished in 0.52 minutes
Epoch 37 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   started   on   the   code   on   ester1  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just  <end><pad><pad><pad>  just   updated
Epoch 37 finished in 0.51 minutes
Epoch 38 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   branch   in   our   folder  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad>
Epoch 38 finished in 0.51 minutes
Epoch 39 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   branch   but   I   need   a   break  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   updated   the   section   headers   to  <end><pad>  just   replace
Epoch 39 finished in 0.51 minutes
Epoch 40 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   branch   in   our   poll.    I   created   a   branch   link   to   my   branch   <response>:   I   created   a   branch   link   to   my   page   <response>:   I   pushed   created   a   branch   link   to   my   branch   Then   I   created   a   branch
Epoch 40 finished in 0.52 minutes
Epoch 41 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   branch   in   our   poll.  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  768   --n-epochs   just   just   got   got   better  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 41 finished in 0.52 minutes
Epoch 42 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   it   worked   😔  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  ^   just   got   anyone   done   for   "Methods   got   the   best   result  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad>  ^   just   got   better  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 42 finished in 0.52 minutes
Epoch 43 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   baseline   version   locally   but   if   you   like   the   version   71%   you   can   clone   my   latest   branch  <end><pad><pad><pad><pad><pad><pad><pad><pad>  just   fixed   the   plots   and  <end> <end><pad><pad><pad><pad><pad>  ^   just   got   some   good   fixes  <end>
Epoch 43 finished in 0.51 minutes
Epoch 44 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   Feel   free   to   change   anything   that   we   guys  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  ^  <end><pad><pad><pad><pad>  ^  <end><pad><pad><pad><pad><pad><pad><pad>  just  <end>  ^   just   got   better   than   baseline  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 44 finished in 0.52 minutes
Epoch 45 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  I   created   my   own   code  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   updated   my   branch  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 45 finished in 0.51 minutes
Epoch 46 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   didn’t   didn’t   the   branch   loss   still   has   a   lower   loss?  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  If   you   use   the   baseline   loss   D   is   still   above   the  
Epoch 46 finished in 0.51 minutes
Epoch 47 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   code   on   ester1  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   updated   ester1   Now  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just  
Epoch 47 finished in 0.51 minutes
Epoch 48 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   a   lot   of   custom   ones  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  ^  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad>  I   created   my   own   code  <end><pad><pad><pad>
Epoch 48 finished in 0.51 minutes
Epoch 49 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   ester1   to   master,   but   I   didn’t   will   update   it   if   it   gets   higher  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  I   created   my   own   code   on   my   branch  
Epoch 49 finished in 0.51 minutes
Epoch 50 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:    I   pushed   the   updated   util.py   to   master,   but   I   need   a   parameter   fix   after   and   see   if   it’s   a   better   version   or   better  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad>  I   can   run   my   updated   version   of   5b   latest
Epoch 50 finished in 0.51 minutes
Epoch 51 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   on   overleaf   <response>:  <end><pad><pad><pad><pad><pad> <end><pad><pad><pad> <end><pad><pad><pad> <end><pad><pad><pad> <end><pad> <end><pad><pad> <end><pad><pad><pad> <end><pad> <end><pad><pad><pad> <pad> <end><pad><pad> <end><pad> <end><pad> <end><pad> <end><pad><pad><pad><pad> <end>
Epoch 51 finished in 0.51 minutes
Epoch 52 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   branch   in   our   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   created   my   own   code  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad>  I   pushed   the   updated   util.py   --  <end> <end> <end>
Epoch 52 finished in 0.51 minutes
Epoch 53 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   on   overleaf  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 53 finished in 0.51 minutes
Epoch 54 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:     Ester  <end><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad> <end><pad>  ^   just   got   more   GB  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad>  I   pushed   created   my   own   branch   in   our   repo  <end><pad><pad> <end> <end><pad><pad> <end><pad> <end><pad><pad><pad><pad><pad> 
Epoch 54 finished in 0.51 minutes
Epoch 55 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   state   in   our   repo  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   updated   ester1  <end><pad><pad><pad><pad><pad><pad><pad>  to   better   see   if   my   higher   number   on   the   chart   is   higher   :   D  <end><pad><pad><pad>
Epoch 55 finished in 0.51 minutes
Epoch 56 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:    I   pushed   the   updated   util.py   to   master,   but   I   can   only   update   the   code  <end><pad><pad><pad><pad><pad> <end><pad><pad>  Are   you   all   free   to   update   the   branch   after   changing   the   kernel  ?  <end><pad><pad><pad>  Are   you   all   free   to
Epoch 56 finished in 0.51 minutes
Epoch 57 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   to   share   earlier!   but   if   you   want   the   DSMLP   GPU   I   can   share   the   graph   with   baseline  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   run   the  
Epoch 57 finished in 0.51 minutes
Epoch 58 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:    I   pushed   the   updated   util.py   to   master,   but   I   didn’t   it   if   it’s   a   lot   easier   to   implement   the   baseline  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad> <end><pad><pad><pad><pad><pad>  just   updated
Epoch 58 finished in 0.51 minutes
Epoch 59 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   version   locally   to   3*3  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 59 finished in 0.52 minutes
Epoch 60 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   will   need   the   baseline   to   still   update   the   updated   section  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad> <end><pad><pad><pad>  just   fixed   the   issue
Epoch 60 finished in 0.52 minutes
Epoch 61 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:    I’ll   update   code   to   do   the   full   validation  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad>  just   submitted,   for   the   validation   update  <end> <end><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad> <end><pad> <end>
Epoch 61 finished in 0.51 minutes
Epoch 62 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   implement   but   somehow   down   to   part   5!  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 62 finished in 0.51 minutes
Epoch 63 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   ester1  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 63 finished in 0.51 minutes
Epoch 64 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad>
Epoch 64 finished in 0.51 minutes
Epoch 65 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   I   create   a   branch   link   to   my   branch   <response>:   I   pushed   the   branch   link   to   my   branch  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad> <end><pad> <end> <end><pad>
Epoch 65 finished in 0.51 minutes
Epoch 66 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:    I   pushed   the   updated   util.py   +   Improving   on   my   branch  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad> <end><pad><pad><pad><pad>
Epoch 66 finished in 0.51 minutes
Epoch 67 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   will   need   the   updated   code  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   updated   ester1   to  <pad><pad><pad> <end> <end><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad> <end><pad> 
Epoch 67 finished in 0.51 minutes
Epoch 68 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   i   fj   the   updated   kernel   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad> <end><pad><pad> <end><pad><pad><pad>
Epoch 68 finished in 0.51 minutes
Epoch 69 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   and   see   if   there’s   help  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad>  I   created   my   own   in   our   repo  
Epoch 69 finished in 0.51 minutes
Epoch 70 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   i   fj   it   but   gets   better   than   the   baseline  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end>
Epoch 70 finished in 0.51 minutes
Epoch 71 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   a   paper   SimCLR   named   "a5000"  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad>
Epoch 71 finished in 0.51 minutes
Epoch 72 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   my   updated   util.py   to   master,   but   I   pushed   the   updated   util.py   to   4  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad>
Epoch 72 finished in 0.52 minutes
Epoch 73 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad>  I   created   my   own   code   in   the  
Epoch 73 finished in 0.51 minutes
Epoch 74 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   pushed   to   my   branch   branch  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad>
Epoch 74 finished in 0.51 minutes
Epoch 75 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   still   need   a   minor   fix   after   changing   the   kernel   kernel  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad> <end><pad> <end><pad> <end><pad><pad> <end><pad><pad> <end><pad><pad><pad>
Epoch 75 finished in 0.51 minutes
Epoch 76 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   will   update   the   code   with   the   last   bug   and   i’ll   get   the   baseline   own  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad> <end><pad><pad><pad>
Epoch 76 finished in 0.51 minutes
Epoch 77 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,    But   I   will   ask   in   OH   today   to   implement   the   updated   section  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad> <end><pad><pad><pad><pad> <end><pad> <end>
Epoch 77 finished in 0.51 minutes
Epoch 78 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   i   fj   it  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad>
Epoch 78 finished in 0.51 minutes
Epoch 79 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   and   see   if   it’s   come   on   <response>:   I   can   get   one   more   plot   updated  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 79 finished in 0.52 minutes
Epoch 80 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   and   see   if   it’s   better   implement   it   to   capstone  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 80 finished in 0.52 minutes
Epoch 81 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 81 finished in 0.51 minutes
Epoch 82 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   it   worked   nonetheless  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> 
Epoch 82 finished in 0.51 minutes
Epoch 83 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   and   if   I   do   else   I   can   make   the   branch   again   on   overleaf  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad> <end><pad> <end><pad><pad> <end><pad>
Epoch 83 finished in 0.51 minutes
Epoch 84 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   but   somehow   still   clarifying   changes  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad>  ^   just   got   this!   after   his   error  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 84 finished in 0.51 minutes
Epoch 85 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   in   A,  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  I   added   ester1   to   the   discussion   list  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 85 finished in 0.51 minutes
Epoch 86 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   a   paper   called   the   baseline  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   run   the   command   overnight   and   you   should   be   able   to   get   the   best   result  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad>
Epoch 86 finished in 0.52 minutes
Epoch 87 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   somehow   is   still   the   best   <response>:  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad>  I   created   my   own   branch   in   our  
Epoch 87 finished in 0.51 minutes
Epoch 88 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   --  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad>
Epoch 88 finished in 0.51 minutes
Epoch 89 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad>
Epoch 89 finished in 0.51 minutes
Epoch 90 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   and   custom1and3   to   increase   but   I   get   the   updated   version   locally   because   it's   still   needs   a   update  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  I   will   push   the   baseline   code   if   it
Epoch 90 finished in 0.51 minutes
Epoch 91 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   changing   the   kernel   kernel   <response>:   I   fixed   the   scheduler.step   kernel   but   I   didn’t   added   new   kernel  <end><pad><pad><pad><pad><pad><pad> <end>   If   you   like   like   my
Epoch 91 finished in 0.51 minutes
Epoch 92 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   minor   fix   after   and  <end><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad> <end> <end><pad> <end> <end><pad> <end><pad> <end> <end><pad><pad> <end><pad> <end> <end> <end><pad> <end> <end> <end> <end>
Epoch 92 finished in 0.51 minutes
Epoch 93 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   need   a   break   anyways   before   training?   Can   I   can   try   you  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad> <end><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad> <end><pad><pad><pad>
Epoch 93 finished in 0.52 minutes
Epoch 94 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   can   only   update   the   code   on   GitHub   until   3pm   and   so  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad> <end><pad><pad> <end><pad><pad><pad> <end><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad> <end>
Epoch 94 finished in 0.51 minutes
Epoch 95 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   and   custom1and3  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad>  just  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad>
Epoch 95 finished in 0.51 minutes
Epoch 96 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad>  just   updated   the   util.py  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>  just   updated   the   section  <end><pad><pad><pad>
Epoch 96 finished in 0.51 minutes
Epoch 97 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   to   master,   but   I   still   need   a   fix   after   adding   to   the   discussion   section   <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad>  just   fixed  
Epoch 97 finished in 0.51 minutes
Epoch 98 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   created   my   own   branch   in   our   repo   <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> <end><pad><pad><pad>
Epoch 98 finished in 0.52 minutes
Epoch 99 started
iter 0 out of 538
iter 100 out of 538
iter 200 out of 538
iter 300 out of 538
iter 400 out of 538
iter 500 out of 538


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start>  I   created   my   own   branch   in   our   repo   <response>:   I   pushed   the   updated   util.py   version   locally   but   somehow   didn’t   update   on   GitHub   if   you   want   the   DSMLP   GPU   to   be   different  <end><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> 
Epoch 99 finished in 0.52 minutes


In [19]:
inp = ""
while True:
    inp = input("Enter your input (press Enter when done): " + " " * 20)
    print(infer(inp))

Enter your input (press Enter when done):                     Ayo


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Ay o <response>: I’m updating the related works section with citations
Enter your input (press Enter when done):                     bahahah


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> b ah ah ah <response>: not submitted yet
Enter your input (press Enter when done):                     are you ok


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> are you ok <response>: I see you ok at all
Enter your input (press Enter when done):                     are you ok


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> are you ok <response>: with working rate of 10 <response>: For the report : report can use this copy of PA3
Enter your input (press Enter when done):                     I am Ester


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> I am Ester <response>: not doing the individual part so <response>: how about the existing result on report?
Enter your input (press Enter when done):                     Are you Ester?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Are you Ester? <response>: Are you all free to test your code by cloning my branch code
Enter your input (press Enter when done):                     Who are you?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Wh o are you? <response>: O IM OK NOW <response>: I'm using capstone platform


KeyboardInterrupt: Interrupted by user

In [16]:
# Save the model's state dictionary after training is complete
torch.save(model.state_dict(), f"models/{data_file_name}_ep{epochs}.pt")

In [15]:
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 633.979MB
