In [None]:
# Fine tuning gpt2_medium model and use own data like company profile
#
# See also medium.com blog
# "GPT-2 Fine-Tuning Guide: Building a Chatbot for Your Company Profile"
# https://medium.com/@datatec.studio

In [None]:
# Install python packages
# !pip install -r requirements.txt

In [1]:
import os
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm import tqdm
import time
import numpy as np
import sys

In [2]:
# Define environment variable, path of data, model name and device
# os.environ["HF_HOME"] = "/content/huggingface"  # Replace with your desired directory
# print("Please replace it with your hf access token:")
# os.environ["HF_HOME_TOKEN"] = "Please_replace_it_with_your_hf_access_token"

result_dir = 'resources/'
data_file_name = 'Winfrey'
data_file_path = f'../data/prompt_response/{data_file_name}.txt'

model_name = "gpt2" # gpt2-medium
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(device)

cuda


In [3]:
# Write a python file to google driver
# Sample of json datasets
# You can also directly upload this code to your google driver
# The code write here in this way is for better understanding of whole project
# %%writefile chat_data.py

from torch.utils.data import Dataset
import json

class ChatData(Dataset):
    def __init__(self, path: str, tokenizer):
        with open(path, encoding="utf-8") as f:
            self.data = f.readlines()#json.load(open(path, "r"))

        self.X = []
        for pair in self.data:
            pair = eval(pair)          
            self.X.append(f"<start> {pair['prompt']} <response>: {pair['response']} <end>")
        
        total_samples = len(self.X)  # Calculate the total number of samples
        print("total_samples", total_samples)
        # define samples amount
#         self.X = self.X[:500]
        print("Check the preprocessing for self.X[0]:")
        print(self.X[0])

        self.X_encoded = tokenizer(self.X, return_tensors="pt", max_length=30, padding="max_length", truncation=True)
        self.input_ids = self.X_encoded['input_ids']
        self.attention_mask = self.X_encoded['attention_mask']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_mask[idx]


In [4]:
# Download model, save model and tokernize to harddisk
## prepare tokenizer
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

print(f'Number of tokens in tokenizer before adding our specific tokens: {len(tokenizer.get_vocab())}')

tokenizer.add_special_tokens({"pad_token": "<pad>",
                              "bos_token": "<start>",
                              "eos_token": "<end>"})

tokenizer.add_tokens(["<response>:"])
with open(data_file_path, encoding="utf-8") as f:
    data = f.readlines()#json.load(open(path, "r"))

for pair in data:
    pair = eval(pair)
    tokenizer.add_tokens(pair['prompt'].split() + pair['response'].split())
    
print(f'Number of tokens in tokenizer after adding our specific tokens: {len(tokenizer.get_vocab())}')

## prepare model
### Specify the desired embedding size (must be a multiple of 8)
desired_embedding_size = 50264  # Change this to the desired size
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)
### Resize the embedding layer to the desired size
model.resize_token_embeddings(len(tokenizer), desired_embedding_size)
model = model.to(device)

## save tokenizer and model to harddisk
# tokenizer.save_pretrained(result_dir)
# model.save_pretrained(result_dir)

Number of tokens in tokenizer before adding our specific tokens: 50257
Number of tokens in tokenizer after adding our specific tokens: 51706


In [5]:
# ## load model and tokenizer from harddisk
# ### Load the GPT-2 tokenizer
# tokenizer = GPT2Tokenizer.from_pretrained(result_dir)

# ### Load the GPT-2 model from the local folder
# model = GPT2LMHeadModel.from_pretrained(result_dir)
# model.to(device)

In [14]:
# Define infer and train function
def infer(inp_raw):
    inp_appended = "<start> " + inp_raw + " <response>: "
    inp = tokenizer(inp_appended, return_tensors="pt")
    X = inp["input_ids"].to(device)  # Use .to(device) method to move the tensor to the specified device
    a = inp["attention_mask"].to(device)  # Use .to(device) method here as well

    output = model.generate(X, attention_mask=a, max_length=100, num_return_sequences=1)

    output = tokenizer.decode(output[0])
    output = output[output.find('<response>: ')+len('<response>: '):output.find('<end>')].replace('<pad>', '')
    output = ' '.join(output.split())
    
    prompt_response = {
        'prompt': inp_raw,
        'response': output
    }

    return prompt_response

# def train(chatData, model, optim):
    
#     batches = len(chatData)

#     for i, (X, a) in enumerate(chatData):
#         X = X.to(device)
#         a = a.to(device)
#         optim.zero_grad()
#         loss = model(input_ids=X, attention_mask=a, labels=X).loss
#         loss.backward()
#         optim.step()
#         if i % 100 == 0:
#             print(f'iter {i} out of {batches}')

#     print(infer("I created my own branch in our repo"))
#     print('========================================================')

def train(chatData, model, optim):
    
    batches = len(chatData)

    for i, (X, a) in tqdm(enumerate(chatData), total=len(chatData), desc="Training"):
        X = X.to(device)
        a = a.to(device)
        optim.zero_grad()
        loss = model(input_ids=X, attention_mask=a, labels=X).loss
        loss.backward()
        optim.step()

    print(infer("Do we need to bring anything?"))
    print('========================================================')

In [7]:
# from chat_data import ChatData

#Load ChatData, train model and optimizer
chatData = ChatData(data_file_path, tokenizer)
chatData = DataLoader(chatData, batch_size=1) # batch_size=64

model.train()

optim = Adam(model.parameters(), lr=1e-5)

total_samples 515
Check the preprocessing for self.X[0]:
<start> PA2! <response>: Thanks Ester! <end>


In [8]:
# train 10 times
epochs = 100  # You can adjust the number of epochs as needed
for epoch in range(epochs):
    start = time.time()
    print(f"Epoch {epoch} started")
    train(chatData, model, optim)
    end = time.time()
    print(f"Epoch {epoch} finished in {np.round((end - start) / 60, 2)} minutes")

Epoch 0 started


Training: 100%|██████████| 515/515 [00:29<00:00, 17.27it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to t
Epoch 0 finished in 0.51 minutes
Epoch 1 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.83it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to to t
Epoch 1 finished in 0.49 minutes
Epoch 2 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.93it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>:
Epoch 2 finished in 0.49 minutes
Epoch 3 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.97it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: to the the the the the the the <pad
Epoch 3 finished in 0.49 minutes
Epoch 4 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.04it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the th
Epoch 4 finished in 0.49 minutes
Epoch 5 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.06it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: the the the the the the the the the the the the the the the the the the <pad
Epoch 5 finished in 0.49 minutes
Epoch 6 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.05it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: the the <response>: the the <pad
Epoch 6 finished in 0.49 minutes
Epoch 7 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.02it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: <response>: <response>: <response>: <response>: the the the to the the the the <response>: the <response>: the the the the the the the the the the the the the the the the the the the the the the the
Epoch 7 finished in 0.49 minutes
Epoch 8 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.12it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: the <response>: <response>: the the <response>: the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the th
Epoch 8 finished in 0.49 minutes
Epoch 9 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.95it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: <response>: and the <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: and the of the of the of the <response>: <response>: of the <response>: <response>: <response>: <response>: <response>: <response>: the <response>: <response>: to the of the o
Epoch 9 finished in 0.49 minutes
Epoch 10 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.00it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: to <response>: is the same of the <response>: <response>: <response>: is the <response>: <response>: <response>: is the same of the <response>: <response>: and the the is the <response>: <response>: is the the of the the of the the <response>: <response>: t
Epoch 10 finished in 0.49 minutes
Epoch 11 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.07it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: <response>: <response>: <response>: <response>: <response>: I can do the work for the <response>: I can do the work for the of the of the of the of the of the of the of the of the of the of th
Epoch 11 finished in 0.49 minutes
Epoch 12 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.11it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: <response>: <response>: <response>: <response>: I can do the the same thing <response>: for the same <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: and <response>
Epoch 12 finished in 0.49 minutes
Epoch 13 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.95it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I think we need to <response>: and we can we do we do the <response>: <response>: and <response>: <response>: and the <response>: for the <response>: is <response>: and the model of the model of the model of the model of th
Epoch 13 finished in 0.49 minutes
Epoch 14 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.97it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>: <response>
Epoch 14 finished in 0.49 minutes
Epoch 15 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.01it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: <response>: I can do anything I can get back to my <response>: <response>: I can get the <response>: model model model model model model model model model model model model model model model model model model model model model mode
Epoch 15 finished in 0.49 minutes
Epoch 16 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.98it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: I can do anything to help me with the training <response>: I <response>: I can do anything to help me with the training of
Epoch 16 finished in 0.49 minutes
Epoch 17 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.05it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I can run the best of the best of the
Epoch 17 finished in 0.49 minutes
Epoch 18 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.83it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I think we are going to make a batch batch size of <response>: for each of the <response>: <response>: <response>: <response>: <response>: <response>:
Epoch 18 finished in 0.49 minutes
Epoch 19 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.10it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I can help me get started with the basic <response>: <response>:
Epoch 19 finished in 0.49 minutes
Epoch 20 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.97it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I can help me build a framework for the framework that we can
Epoch 20 finished in 0.49 minutes
Epoch 21 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.08it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>:
Epoch 21 finished in 0.49 minutes
Epoch 22 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.06it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I can do for my part of the <response>: I can do a plots for my part of the
Epoch 22 finished in 0.49 minutes
Epoch 23 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.03it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: I am looking at the most common results <response>: <response>:
Epoch 23 finished in 0.49 minutes
Epoch 24 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.11it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>:
Epoch 24 finished in 0.49 minutes
Epoch 25 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.88it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or just get back to the original code and check <response>: <response>: I was running at the same time <response>: <response>: <response>: <response>:
Epoch 25 finished in 0.49 minutes
Epoch 26 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.90it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>:
Epoch 26 finished in 0.49 minutes
Epoch 27 started


Training: 100%|██████████| 515/515 [00:29<00:00, 17.75it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I think we need to set a table for them <response>: <response>:
Epoch 27 finished in 0.5 minutes
Epoch 28 started


Training: 100%|██████████| 515/515 [00:29<00:00, 17.61it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>:
Epoch 28 finished in 0.5 minutes
Epoch 29 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.89it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or do we need to add another row for the model or model in the row <response>: Or just just add another row for the model in the row
Epoch 29 finished in 0.49 minutes
Epoch 30 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.04it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I will get back on <response>: I will get back on <response>: I will get back on what we had before when we went better with sequence length of length of of epoch than <pad
Epoch 30 finished in 0.49 minutes
Epoch 31 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.96it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I have been using
Epoch 31 finished in 0.49 minutes
Epoch 32 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.06it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I think
Epoch 32 finished in 0.49 minutes
Epoch 33 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.10it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: yes no <response>:
Epoch 33 finished in 0.49 minutes
Epoch 34 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.12it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I am not sure about the plots <response>:
Epoch 34 finished in 0.49 minutes
Epoch 35 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.01it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes no not sure if classifier
Epoch 35 finished in 0.49 minutes
Epoch 36 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.06it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Just realized the poll.
Epoch 36 finished in 0.49 minutes
Epoch 37 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.03it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: No I can help run a chunk that I can run with your branch in the poll. <response>:
Epoch 37 finished in 0.49 minutes
Epoch 38 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.08it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes on my end on my end <response>:
Epoch 38 finished in 0.49 minutes
Epoch 39 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.04it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: <response>: I can help run a chunk with some results <response>: I can help run a chunk with some results I find in the poll. section under
Epoch 39 finished in 0.49 minutes
Epoch 40 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.10it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I can help run a chunk on my computer by using a sequence length of 300 and even more research <response>: <response>: <response>: I can help run a batch size of 300 when I can get a val of supcon abov
Epoch 40 finished in 0.49 minutes
Epoch 41 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.10it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or can we just confirm up to 4 epochs now <response>: <response>:
Epoch 41 finished in 0.49 minutes
Epoch 42 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.13it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Yes no I think it’s the same for classifier on the poll.
Epoch 42 finished in 0.49 minutes
Epoch 43 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.94it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes on end <response>: yes
Epoch 43 finished in 0.49 minutes
Epoch 44 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.92it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Yeah <response>: I will get back on datahub this
Epoch 44 finished in 0.49 minutes
Epoch 45 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.07it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes on my end
Epoch 45 finished in 0.49 minutes
Epoch 46 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.91it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or can we just \ref
Epoch 46 finished in 0.49 minutes
Epoch 47 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.96it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: No <response>: No I have never used part IoU the pa pa is slightly mostly taking 4c the architecture with both baseline and epoch of epoch at 10 am for the pa is mostly taking the pa with both both tha
Epoch 47 finished in 0.49 minutes
Epoch 48 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.96it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or can we just realized them in the network <response>:
Epoch 48 finished in 0.49 minutes
Epoch 49 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.15it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes no I can help cross check the architecture table from <response>: Is there anything we need to do or <response>: other than epoch Just want to check the architecture table from that you can find any number of the poll
Epoch 49 finished in 0.49 minutes
Epoch 50 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.01it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Like one hour for 6 epochs
Epoch 50 finished in 0.49 minutes
Epoch 51 started


Training: 100%|██████████| 515/515 [00:29<00:00, 17.66it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes to my end <response>: yes to my end
Epoch 51 finished in 0.5 minutes
Epoch 52 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.92it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: No I think <response>: Just curious made another push of epoch the one that I don’t only realized the one with very <pad
Epoch 52 finished in 0.49 minutes
Epoch 53 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.90it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Is it about the classifier <response>: Like one hour for 6 <response>:
Epoch 53 finished in 0.49 minutes
Epoch 54 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.05it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or can we just batch size of 1 <response>: Or size of 50? instead of 50? <response>:
Epoch 54 finished in 0.49 minutes
Epoch 55 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.87it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or just use the best model val reduction instead of 50? instead of val train acc 10 <response>: Or 0.1? instead of 10
Epoch 55 finished in 0.49 minutes
Epoch 56 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.94it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or can we just set a new week for epochs <response>: Just cause cause I’m cause for PA1 cause
Epoch 56 finished in 0.49 minutes
Epoch 57 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.86it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes on my end
Epoch 57 finished in 0.49 minutes
Epoch 58 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.94it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or maybe how to build a UNet on top of ester1; <response>: I’ll with other epochs <response>: = none other than 0.01 this table = 50? = none
Epoch 58 finished in 0.49 minutes
Epoch 59 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.88it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes on end
Epoch 59 finished in 0.49 minutes
Epoch 60 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.97it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or can we just finetune a table with sequence length of length of 50? Like SEQ_SIZE <response>: Or sequence length of 50? Or sequence length of 50?
Epoch 60 finished in 0.49 minutes
Epoch 61 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.89it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or can we just just put them together <response>: So <response>:
Epoch 61 finished in 0.49 minutes
Epoch 62 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.76it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Yes for each of the models in 5, the sequence length is wrong <response>:
Epoch 62 finished in 0.5 minutes
Epoch 63 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.97it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you
Epoch 63 finished in 0.49 minutes
Epoch 64 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.05it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Okkk is 85.7% tiny bit
Epoch 64 finished in 0.49 minutes
Epoch 65 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.00it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I’ll a high chance that’s <response>:
Epoch 65 finished in 0.49 minutes
Epoch 66 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.01it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: @Ester Are we going with our “best
Epoch 66 finished in 0.49 minutes
Epoch 67 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.99it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: @Ester I can help run a chunk at 520pm, <response>: I’ll for the human viewing I can run at the server with some of it like 5 epochs <response>: with sequence length of 30 like supcon with sequence length of 50
Epoch 67 finished in 0.49 minutes
Epoch 68 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.10it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Just curious the ABC notation 😳 <response>: Niceeeee
Epoch 68 finished in 0.49 minutes
Epoch 69 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.16it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you
Epoch 69 finished in 0.49 minutes
Epoch 70 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.13it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: yes on my end
Epoch 70 finished in 0.49 minutes
Epoch 71 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.18it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: No I can make a post on piazza: <response>: Just curious none of “improving the supcon can be destroyed
Epoch 71 finished in 0.49 minutes
Epoch 72 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.07it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: @Ester
Epoch 72 finished in 0.49 minutes
Epoch 73 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.11it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: @Ester
Epoch 73 finished in 0.49 minutes
Epoch 74 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.93it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you hehe
Epoch 74 finished in 0.49 minutes
Epoch 75 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.04it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Sure
Epoch 75 finished in 0.49 minutes
Epoch 76 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.94it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or name it according to the architecture <response>: Quick question,
Epoch 76 finished in 0.49 minutes
Epoch 77 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.06it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: @Ester <response>: I like how we do with tiny - tiny tiny
Epoch 77 finished in 0.49 minutes
Epoch 78 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.84it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you hehe <response>: Thank you
Epoch 78 finished in 0.49 minutes
Epoch 79 started


Training: 100%|██████████| 515/515 [00:29<00:00, 17.44it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: I’ll wait a bit <response>: Okk
Epoch 79 finished in 0.51 minutes
Epoch 80 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.82it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Yes
Epoch 80 finished in 0.5 minutes
Epoch 81 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.76it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you
Epoch 81 finished in 0.5 minutes
Epoch 82 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.87it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Anything that has to do with PATH is very painful <response>: I was using a very different architecture and it was different <response>: in my session
Epoch 82 finished in 0.49 minutes
Epoch 83 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.88it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: It’s is quite high honestly
Epoch 83 finished in 0.49 minutes
Epoch 84 started


Training: 100%|██████████| 515/515 [00:29<00:00, 17.71it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Like one hour
Epoch 84 finished in 0.5 minutes
Epoch 85 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.97it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you!
Epoch 85 finished in 0.49 minutes
Epoch 86 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.14it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you <response>:
Epoch 86 finished in 0.49 minutes
Epoch 87 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.12it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Anything that is not set in the Code is not set in the Code <response>: I’ll Code is not set on the epoch
Epoch 87 finished in 0.49 minutes
Epoch 88 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.14it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you
Epoch 88 finished in 0.49 minutes
Epoch 89 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.93it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Main is the same as Ester 1 right now <response>:
Epoch 89 finished in 0.49 minutes
Epoch 90 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.03it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: A Simple framework for learning Contrastive Learning of Contrastive <response>: literally HAHAH
Epoch 90 finished in 0.49 minutes
Epoch 91 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.10it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Like one hour
Epoch 91 finished in 0.49 minutes
Epoch 92 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.05it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you!
Epoch 92 finished in 0.49 minutes
Epoch 93 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.97it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Anything that is not set back to original length of 50? <response>: Or even even lower
Epoch 93 finished in 0.49 minutes
Epoch 94 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.04it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Anything that is very painful to main or ester1? is slightly Just very very slightly
Epoch 94 finished in 0.49 minutes
Epoch 95 started


Training: 100%|██████████| 515/515 [00:28<00:00, 18.09it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Anything that is needed for the report <response>: Or just for the report <response>: Or Simple framework for the iou Or iou =
Epoch 95 finished in 0.49 minutes
Epoch 96 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.90it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Or add the paper in <response>: Or just use the number
Epoch 96 finished in 0.49 minutes
Epoch 97 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.91it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Thank you
Epoch 97 finished in 0.49 minutes
Epoch 98 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.87it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: A Simple framework for learning Contrastive <response>: Simple framework for learning Learning
Epoch 98 finished in 0.49 minutes
Epoch 99 started


Training: 100%|██████████| 515/515 [00:28<00:00, 17.96it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<start> Do we need to b r in g anything? <response>: Just submitted the scheduler and loop <response>: Just cause for PA1
Epoch 99 finished in 0.49 minutes


In [15]:
inp = ""
while True:
    inp = input("Enter your input (press Enter when done): " + " " * 20)
    print(infer(inp))

Enter your input (press Enter when done):                     haha


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'prompt': 'haha', 'response': 'ok I see I seeeee'}
Enter your input (press Enter when done):                     yay!


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'prompt': 'yay!', 'response': 'also looking at augmentation options in torchvision (part of UNet requires this) <response>: Personal high key'}
Enter your input (press Enter when done):                     I'm done


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'prompt': "I'm done", 'response': "I'm Feel free to build on top of ester1; by line <response>:"}
Enter your input (press Enter when done):                     yes


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'prompt': 'yes', 'response': 'the loss ones?'}
Enter your input (press Enter when done):                     I have a lot of coding experience


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'prompt': 'I have a lot of coding experience', 'response': 'under figures, the visual framework for discussion <response>: Visual Representations'}


KeyboardInterrupt: Interrupted by user

In [16]:
# Save the model's state dictionary after training is complete
torch.save(model.state_dict(), f"models/{data_file_name}.pt")

# Simulate a Chat

In [18]:
ester = GPT2LMHeadModel.from_pretrained(model_name).to(device)
ester.resize_token_embeddings(len(tokenizer), desired_embedding_size)
ester.load_state_dict(torch.load("models/Ester.pt"))
ester = model.to(device)

winfrey = GPT2LMHeadModel.from_pretrained(model_name).to(device)
winfrey.resize_token_embeddings(len(tokenizer), desired_embedding_size)
winfrey.load_state_dict(torch.load("models/Winfrey.pt"))
winfrey = model.to(device)

In [19]:
def infer(inp_raw, model):
    inp_appended = "<start> " + inp_raw + " <response>: "
    inp = tokenizer(inp_appended, return_tensors="pt")
    X = inp["input_ids"].to(device)  # Use .to(device) method to move the tensor to the specified device
    a = inp["attention_mask"].to(device)  # Use .to(device) method here as well

    output = model.generate(X, attention_mask=a, max_length=100, num_return_sequences=1)

    output = tokenizer.decode(output[0])
    output = output[output.find('<response>: ')+len('<response>: '):output.find('<end>')].replace('<pad>', '')
    output = ' '.join(output.split())
    
    prompt_response = {
        'prompt': inp_raw,
        'response': output
    }

    return prompt_response

In [31]:
output = infer("How's it going?", ester)
with_name = 'Ester: ' + output['prompt']
convo = [with_name]
for i in range(10):
    response = output['response']
    if i % 2 == 0:
        output = infer(response, winfrey)
        with_name = 'Winfrey: ' + output['response']
    else:
        output = infer(response, ester)
        with_name = 'Ester: ' + output['response']
    convo += [with_name]
    
convo

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


["Ester: How's it going?",
 'Winfrey: Oh no I don’t why not the 4c I think remotely! <response>: I think some of it in the computer folder not very good <response>: ok I think',
 'Ester: I think some of it in the computer folder not very good <response>: ok I think <response>: I I’ll might have some errors I have that’s I think',
 'Winfrey: ok I think <response>: I I’ll might have some errors I have that’s I think <response>: Maybe in the computer folder that I read through but not the util iou Not sure if I don’t don’t',
 'Ester: I I’ll might have some errors I have that’s I think <response>: Maybe in the computer folder that I read through but not the util iou Not sure if I don’t don’t <response>: Also I’ll probably want to set the context of sequence length 50?',
 'Winfrey: Maybe in the computer folder that I read through but not the util iou Not sure if I don’t don’t <response>: Also I’ll probably want to set the context of sequence length 50? <response>: Maybe in the',
 'Ester: Al

In [32]:
convo

["Ester: How's it going?",
 'Winfrey: Oh no I don’t why not the 4c I think remotely! <response>: I think some of it in the computer folder not very good <response>: ok I think',
 'Ester: I think some of it in the computer folder not very good <response>: ok I think <response>: I I’ll might have some errors I have that’s I think',
 'Winfrey: ok I think <response>: I I’ll might have some errors I have that’s I think <response>: Maybe in the computer folder that I read through but not the util iou Not sure if I don’t don’t',
 'Ester: I I’ll might have some errors I have that’s I think <response>: Maybe in the computer folder that I read through but not the util iou Not sure if I don’t don’t <response>: Also I’ll probably want to set the context of sequence length 50?',
 'Winfrey: Maybe in the computer folder that I read through but not the util iou Not sure if I don’t don’t <response>: Also I’ll probably want to set the context of sequence length 50? <response>: Maybe in the',
 'Ester: Al