In [None]:
# Fine tuning gpt2_medium model and use own data like company profile
#
# See also medium.com blog
# "GPT-2 Fine-Tuning Guide: Building a Chatbot for Your Company Profile"
# https://medium.com/@datatec.studio

In [None]:
# Install python packages
# !pip install -r requirements.txt

In [1]:
import os
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm import tqdm
import time
import numpy as np
import sys

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Define environment variable, path of data, model name and device
# os.environ["HF_HOME"] = "/content/huggingface"  # Replace with your desired directory
# print("Please replace it with your hf access token:")
# os.environ["HF_HOME_TOKEN"] = "Please_replace_it_with_your_hf_access_token"

result_dir = 'resources/'
data_file_name = 'Jeremy'
data_file_path = f'../data/prompt_response/{data_file_name}.txt'

model_name = "gpt2" # gpt2-medium
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(device)

cuda


In [3]:
# Write a python file to google driver
# Sample of json datasets
# You can also directly upload this code to your google driver
# The code write here in this way is for better understanding of whole project
# %%writefile chat_data.py

from torch.utils.data import Dataset
import json

class ChatData(Dataset):
    def __init__(self, path: str, tokenizer):
        with open(path, encoding="utf-8") as f:
            self.data = f.readlines()#json.load(open(path, "r"))

        self.X = []
        for pair in self.data:
            pair = eval(pair)          
            self.X.append(f"<start> {pair['prompt']} <response>: {pair['response']} <end>")
        
        total_samples = len(self.X)  # Calculate the total number of samples
        print("total_samples", total_samples)
        # define samples amount
#         self.X = self.X[:500]
        print("Check the preprocessing for self.X[0]:")
        print(self.X[0])

        self.X_encoded = tokenizer(self.X, return_tensors="pt", max_length=30, padding="max_length", truncation=True)
        self.input_ids = self.X_encoded['input_ids']
        self.attention_mask = self.X_encoded['attention_mask']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attention_mask[idx]


In [4]:
# Download model, save model and tokernize to harddisk
## prepare tokenizer
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

print(f'Number of tokens in tokenizer before adding our specific tokens: {len(tokenizer.get_vocab())}')

tokenizer.add_special_tokens({"pad_token": "<pad>",
                              "bos_token": "<start>",
                              "eos_token": "<end>"})

tokenizer.add_tokens(["<response>:"])
with open(f'../data/prompt_response/groupchat.txt', encoding="utf-8") as f:
    data = f.readlines()#json.load(open(path, "r"))

for pair in data:
    pair = eval(pair)
    tokenizer.add_tokens(pair['prompt'].split() + pair['response'].split())
    
print(f'Number of tokens in tokenizer after adding our specific tokens: {len(tokenizer.get_vocab())}')

## prepare model
### Specify the desired embedding size (must be a multiple of 8)
desired_embedding_size = 50264  # Change this to the desired size
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)
### Resize the embedding layer to the desired size
model.resize_token_embeddings(len(tokenizer), desired_embedding_size)
model = model.to(device)

model.config.pad_token_id = tokenizer.eos_token_id + 1


## save tokenizer and model to harddisk
# tokenizer.save_pretrained(result_dir)
# model.save_pretrained(result_dir)

Number of tokens in tokenizer before adding our specific tokens: 50257
Number of tokens in tokenizer after adding our specific tokens: 52566


In [5]:
# ## load model and tokenizer from harddisk
# ### Load the GPT-2 tokenizer
# tokenizer = GPT2Tokenizer.from_pretrained(result_dir)

# ### Load the GPT-2 model from the local folder
# model = GPT2LMHeadModel.from_pretrained(result_dir)
# model.to(device)

In [6]:
# Define infer and train function
def infer(inp_raw):
    inp_appended = "<start> " + inp_raw + " <response>: "
    inp = tokenizer(inp_appended, return_tensors="pt")
    X = inp["input_ids"].to(device)  # Use .to(device) method to move the tensor to the specified device
    a = inp["attention_mask"].to(device)  # Use .to(device) method here as well

    output = model.generate(X, attention_mask=a, max_length=100, num_return_sequences=1)
    output = tokenizer.decode(output[0]).replace('<pad>', '')
    output = output[output.find('<response>: ')+len('<response>: '):].replace('<response>:', '')
    if '<end>' in output:
        output = output[:output.find('<end>')]
    output = ' '.join(output.split())
    
    prompt_response = {
        'prompt': inp_raw,
        'response': output
    }

    return prompt_response


def train(chatData, model, optim):
    
    batches = len(chatData)

    for i, (X, a) in tqdm(enumerate(chatData), total=len(chatData), desc="Training"):
        X = X.to(device)
        a = a.to(device)
        optim.zero_grad()
        loss = model(input_ids=X, attention_mask=a, labels=X).loss
        loss.backward()
        optim.step()

    print(infer("How's progress?"))
    print('========================================================')

In [7]:
# from chat_data import ChatData

#Load ChatData, train model and optimizer
chatData = ChatData(data_file_path, tokenizer)
chatData = DataLoader(chatData, batch_size=1) # batch_size=64

model.train()

optim = Adam(model.parameters(), lr=1e-5)

total_samples 164
Check the preprocessing for self.X[0]:
<start> What should our group name be 😎 Ester and I used transformers for PA1 <response>: lol anything works <end>


In [10]:
epochs = 100  # You can adjust the number of epochs as needed
for epoch in range(epochs):
    start = time.time()
    print(f"Epoch {epoch} started")
    train(chatData, model, optim)
    end = time.time()
    print(f"Epoch {epoch} finished in {np.round((end - start) / 60, 2)} minutes")

Epoch 0 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.57it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 0 finished in 0.12 minutes
Epoch 1 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.20it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 1 finished in 0.12 minutes
Epoch 2 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.32it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 2 finished in 0.12 minutes
Epoch 3 started


Training: 100%|██████████| 164/164 [00:06<00:00, 24.93it/s]


{'prompt': "How's progress?", 'response': 'how long does it take to run? < i tried are u using any names and id 31,'}
Epoch 3 finished in 0.12 minutes
Epoch 4 started


Training: 100%|██████████| 164/164 [00:06<00:00, 24.96it/s]


{'prompt': "How's progress?", 'response': 'the plots oh how many are u using'}
Epoch 4 finished in 0.12 minutes
Epoch 5 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.94it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using ? i have my own one ?'}
Epoch 5 finished in 0.12 minutes
Epoch 6 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.67it/s]


{'prompt': "How's progress?", 'response': 'the good i think the good model is not predicting so expensive oh oop'}
Epoch 6 finished in 0.11 minutes
Epoch 7 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.26it/s]


{'prompt': "How's progress?", 'response': 'the same thing lol just checked, its under jeremy-unet, the code is still vv scuffed but higher so far ^ the bit is higher'}
Epoch 7 finished in 0.12 minutes
Epoch 8 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.70it/s]


{'prompt': "How's progress?", 'response': 'the dropout for'}
Epoch 8 finished in 0.11 minutes
Epoch 9 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.54it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 9 finished in 0.12 minutes
Epoch 10 started


Training: 100%|██████████| 164/164 [00:06<00:00, 24.25it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 10 finished in 0.13 minutes
Epoch 11 started


Training: 100%|██████████| 164/164 [00:06<00:00, 24.75it/s]


{'prompt': "How's progress?", 'response': 'the dropout function supposed to be of a single prediction and target target ='}
Epoch 11 finished in 0.12 minutes
Epoch 12 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.42it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 12 finished in 0.12 minutes
Epoch 13 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.19it/s]


{'prompt': "How's progress?", 'response': 'ig its like the intersection is x percent of the same image + the original?'}
Epoch 13 finished in 0.12 minutes
Epoch 14 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.10it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 14 finished in 0.12 minutes
Epoch 15 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.06it/s]


{'prompt': "How's progress?", 'response': 'how does the classifier know which label is the most popular ? label is the most popular ? the most popular ? popular ?'}
Epoch 15 finished in 0.12 minutes
Epoch 16 started


Training: 100%|██████████| 164/164 [00:06<00:00, 24.56it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using'}
Epoch 16 finished in 0.12 minutes
Epoch 17 started


Training: 100%|██████████| 164/164 [00:06<00:00, 23.88it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using'}
Epoch 17 finished in 0.13 minutes
Epoch 18 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.37it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 18 finished in 0.12 minutes
Epoch 19 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.52it/s]


{'prompt': "How's progress?", 'response': 'oh how many layers are u using'}
Epoch 19 finished in 0.12 minutes
Epoch 20 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.08it/s]


{'prompt': "How's progress?", 'response': 'oh how many people. are u using'}
Epoch 20 finished in 0.12 minutes
Epoch 21 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.30it/s]


{'prompt': "How's progress?", 'response': 'the plots for hyperparamter i need a break anyways'}
Epoch 21 finished in 0.12 minutes
Epoch 22 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.31it/s]


{'prompt': "How's progress?", 'response': 'ur actually amazing'}
Epoch 22 finished in 0.12 minutes
Epoch 23 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.70it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 23 finished in 0.12 minutes
Epoch 24 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.00it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 24 finished in 0.12 minutes
Epoch 25 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.66it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 25 finished in 0.11 minutes
Epoch 26 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.60it/s]


{'prompt': "How's progress?", 'response': 'you can put these numbers into the report over 100 different transformations and different'}
Epoch 26 finished in 0.12 minutes
Epoch 27 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.93it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using'}
Epoch 27 finished in 0.12 minutes
Epoch 28 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.07it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 28 finished in 0.12 minutes
Epoch 29 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.52it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 29 finished in 0.12 minutes
Epoch 30 started


Training: 100%|██████████| 164/164 [00:06<00:00, 27.21it/s]


{'prompt': "How's progress?", 'response': 'std:'}
Epoch 30 finished in 0.11 minutes
Epoch 31 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.53it/s]


{'prompt': "How's progress?", 'response': 'is still running and ntg is giving false information is giving iou in a forward section i just missed the results using the same length of the training and using different temperatures'}
Epoch 31 finished in 0.12 minutes
Epoch 32 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.82it/s]


{'prompt': "How's progress?", 'response': 'you can put these numbers into the report over 100 different transformations'}
Epoch 32 finished in 0.12 minutes
Epoch 33 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.27it/s]


{'prompt': "How's progress?", 'response': 'ur actually amazing'}
Epoch 33 finished in 0.12 minutes
Epoch 34 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.22it/s]


{'prompt': "How's progress?", 'response': 'how does it take to run? < how are we set to go through this week, <'}
Epoch 34 finished in 0.12 minutes
Epoch 35 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.51it/s]


{'prompt': "How's progress?", 'response': 'std:'}
Epoch 35 finished in 0.12 minutes
Epoch 36 started


Training: 100%|██████████| 164/164 [00:06<00:00, 27.28it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 36 finished in 0.11 minutes
Epoch 37 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.32it/s]


{'prompt': "How's progress?", 'response': 'yea'}
Epoch 37 finished in 0.12 minutes
Epoch 38 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.52it/s]


{'prompt': "How's progress?", 'response': 'curious'}
Epoch 38 finished in 0.12 minutes
Epoch 39 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.72it/s]


{'prompt': "How's progress?", 'response': 'ur actually amazing'}
Epoch 39 finished in 0.12 minutes
Epoch 40 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.78it/s]


{'prompt': "How's progress?", 'response': 'mean:'}
Epoch 40 finished in 0.12 minutes
Epoch 41 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.34it/s]


{'prompt': "How's progress?", 'response': 'will do'}
Epoch 41 finished in 0.12 minutes
Epoch 42 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.62it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 42 finished in 0.11 minutes
Epoch 43 started


Training: 100%|██████████| 164/164 [00:06<00:00, 27.07it/s]


{'prompt': "How's progress?", 'response': 'curious what gpu it is lol'}
Epoch 43 finished in 0.11 minutes
Epoch 44 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.14it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 44 finished in 0.12 minutes
Epoch 45 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.79it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using'}
Epoch 45 finished in 0.12 minutes
Epoch 46 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.34it/s]


{'prompt': "How's progress?", 'response': 'std:'}
Epoch 46 finished in 0.12 minutes
Epoch 47 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.61it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 47 finished in 0.12 minutes
Epoch 48 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.77it/s]


{'prompt': "How's progress?", 'response': ''}
Epoch 48 finished in 0.12 minutes
Epoch 49 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.74it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 49 finished in 0.12 minutes
Epoch 50 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.23it/s]


{'prompt': "How's progress?", 'response': 'what time works for you? like in 5 mknjtes'}
Epoch 50 finished in 0.12 minutes
Epoch 51 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.78it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 51 finished in 0.12 minutes
Epoch 52 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.13it/s]


{'prompt': "How's progress?", 'response': 'std:'}
Epoch 52 finished in 0.12 minutes
Epoch 53 started


Training: 100%|██████████| 164/164 [00:05<00:00, 27.44it/s]


{'prompt': "How's progress?", 'response': '0.01862474781439139 tysm'}
Epoch 53 finished in 0.11 minutes
Epoch 54 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.07it/s]


{'prompt': "How's progress?", 'response': 'std:'}
Epoch 54 finished in 0.12 minutes
Epoch 55 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.71it/s]


{'prompt': "How's progress?", 'response': 'curious if you have extra time, could you double check the architecture for the rest of this week, i tried added a lot more'}
Epoch 55 finished in 0.12 minutes
Epoch 56 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.43it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 56 finished in 0.12 minutes
Epoch 57 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.93it/s]


{'prompt': "How's progress?", 'response': 'ur actually amazing'}
Epoch 57 finished in 0.12 minutes
Epoch 58 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.67it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using'}
Epoch 58 finished in 0.12 minutes
Epoch 59 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.93it/s]


{'prompt': "How's progress?", 'response': 'curious what gpu it is lol'}
Epoch 59 finished in 0.11 minutes
Epoch 60 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.74it/s]


{'prompt': "How's progress?", 'response': 'ok i just pushed its under jeremy-unet, the code is still vv scuffed but higher code is still vv'}
Epoch 60 finished in 0.12 minutes
Epoch 61 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.10it/s]


{'prompt': "How's progress?", 'response': 'the iou function supposed to be of a single prediction and target and then fill out that part of the training i’ll'}
Epoch 61 finished in 0.12 minutes
Epoch 62 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.44it/s]


{'prompt': "How's progress?", 'response': 'mean: 0.015202989747957751'}
Epoch 62 finished in 0.12 minutes
Epoch 63 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.48it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 63 finished in 0.12 minutes
Epoch 64 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.49it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 64 finished in 0.12 minutes
Epoch 65 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.14it/s]


{'prompt': "How's progress?", 'response': 'curious curious what time works for you? like in 5 mknjtes'}
Epoch 65 finished in 0.12 minutes
Epoch 66 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.27it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 66 finished in 0.12 minutes
Epoch 67 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.25it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 67 finished in 0.12 minutes
Epoch 68 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.98it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using'}
Epoch 68 finished in 0.12 minutes
Epoch 69 started


Training: 100%|██████████| 164/164 [00:06<00:00, 27.12it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 69 finished in 0.11 minutes
Epoch 70 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.66it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 70 finished in 0.12 minutes
Epoch 71 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.68it/s]


{'prompt': "How's progress?", 'response': 'how long does it take to run? < the training time is obscene'}
Epoch 71 finished in 0.11 minutes
Epoch 72 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.79it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 72 finished in 0.12 minutes
Epoch 73 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.73it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 73 finished in 0.12 minutes
Epoch 74 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.74it/s]


{'prompt': "How's progress?", 'response': 'yea'}
Epoch 74 finished in 0.12 minutes
Epoch 75 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.10it/s]


{'prompt': "How's progress?", 'response': 'the plots for hyperparamter tuning using sequence length of 50 and using sequence length of 50 and using sequence length of 50 and using sequence length of 50 and using sequence length of 50 and using sequence length of 50 using sequence length of of 50'}
Epoch 75 finished in 0.12 minutes
Epoch 76 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.87it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 76 finished in 0.11 minutes
Epoch 77 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.85it/s]


{'prompt': "How's progress?", 'response': 'the plots is rollled up into the forward section of the article but not under 200 at some worse but it should be into the forward section of the article but idk but too far than the points of the articles but'}
Epoch 77 finished in 0.11 minutes
Epoch 78 started


Training: 100%|██████████| 164/164 [00:05<00:00, 27.42it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 78 finished in 0.11 minutes
Epoch 79 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.70it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 79 finished in 0.11 minutes
Epoch 80 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.48it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 80 finished in 0.12 minutes
Epoch 81 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.52it/s]


{'prompt': "How's progress?", 'response': 'ur actually amazing the amazing'}
Epoch 81 finished in 0.12 minutes
Epoch 82 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.69it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 82 finished in 0.11 minutes
Epoch 83 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.94it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 83 finished in 0.12 minutes
Epoch 84 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.80it/s]


{'prompt': "How's progress?", 'response': 'the iou function supposed to be of a single prediction and target name and training length of 50? and then fill out that part of the training i’ll part'}
Epoch 84 finished in 0.12 minutes
Epoch 85 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.08it/s]


{'prompt': "How's progress?", 'response': 'is there anything that I should do to adjust the spacing..? might be faster but not really yea'}
Epoch 85 finished in 0.12 minutes
Epoch 86 started


Training: 100%|██████████| 164/164 [00:06<00:00, 27.23it/s]


{'prompt': "How's progress?", 'response': 'yea'}
Epoch 86 finished in 0.11 minutes
Epoch 87 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.68it/s]


{'prompt': "How's progress?", 'response': 'i’m how long does it take to run? < are we meeting or using the'}
Epoch 87 finished in 0.11 minutes
Epoch 88 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.90it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 88 finished in 0.11 minutes
Epoch 89 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.44it/s]


{'prompt': "How's progress?", 'response': 'curious curious ur actually amazing'}
Epoch 89 finished in 0.12 minutes
Epoch 90 started


Training: 100%|██████████| 164/164 [00:06<00:00, 24.74it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 90 finished in 0.12 minutes
Epoch 91 started


Training: 100%|██████████| 164/164 [00:06<00:00, 25.48it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 91 finished in 0.12 minutes
Epoch 92 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.47it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 92 finished in 0.12 minutes
Epoch 93 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.16it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 93 finished in 0.12 minutes
Epoch 94 started


Training: 100%|██████████| 164/164 [00:05<00:00, 27.34it/s]


{'prompt': "How's progress?", 'response': 'tytyty'}
Epoch 94 finished in 0.11 minutes
Epoch 95 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.83it/s]


{'prompt': "How's progress?", 'response': 'ok i updated the abstract, also added in the pictures of the original section'}
Epoch 95 finished in 0.11 minutes
Epoch 96 started


Training: 100%|██████████| 164/164 [00:06<00:00, 27.02it/s]


{'prompt': "How's progress?", 'response': 'tysm'}
Epoch 96 finished in 0.11 minutes
Epoch 97 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.51it/s]


{'prompt': "How's progress?", 'response': 'ok i updated the abstract, also added in the pictures of the photo focal of the photo ok i just pushed the bit but / i just added the pixel size of the photo to my related pixel is rollled'}
Epoch 97 finished in 0.12 minutes
Epoch 98 started


Training: 100%|██████████| 164/164 [00:06<00:00, 26.51it/s]


{'prompt': "How's progress?", 'response': 'ur actually amazing'}
Epoch 98 finished in 0.12 minutes
Epoch 99 started


Training: 100%|██████████| 164/164 [00:06<00:00, 27.30it/s]


{'prompt': "How's progress?", 'response': 'oh how many neurons are u using'}
Epoch 99 finished in 0.11 minutes


In [None]:
inp = ""
while True:
    inp = input("Enter your input (press Enter when done): " + " " * 20)
    print(infer(inp))

In [11]:
# Save the model's state dictionary after training is complete
torch.save(model.state_dict(), f"models/{data_file_name}.pt")

# Simulate a Chat

In [12]:
ester = GPT2LMHeadModel.from_pretrained(model_name).to(device)
ester.resize_token_embeddings(len(tokenizer), desired_embedding_size)
ester.load_state_dict(torch.load("models/Ester.pt"))
ester.config.pad_token_id = tokenizer.eos_token_id + 1

winfrey = GPT2LMHeadModel.from_pretrained(model_name).to(device)
winfrey.resize_token_embeddings(len(tokenizer), desired_embedding_size)
winfrey.load_state_dict(torch.load("models/Winfrey.pt"))
winfrey.config.pad_token_id = tokenizer.eos_token_id + 1

jeremy = GPT2LMHeadModel.from_pretrained(model_name).to(device)
jeremy.resize_token_embeddings(len(tokenizer), desired_embedding_size)
jeremy.load_state_dict(torch.load("models/Jeremy.pt"))
jeremy.config.pad_token_id = tokenizer.eos_token_id + 1

jonathan = GPT2LMHeadModel.from_pretrained(model_name).to(device)
jonathan.resize_token_embeddings(len(tokenizer), desired_embedding_size)
jonathan.load_state_dict(torch.load("models/Jonathan.pt"))
jonathan.config.pad_token_id = tokenizer.eos_token_id + 1

samuel = GPT2LMHeadModel.from_pretrained(model_name).to(device)
samuel.resize_token_embeddings(len(tokenizer), desired_embedding_size)
samuel.load_state_dict(torch.load("models/Samuel.pt"))
samuel.config.pad_token_id = tokenizer.eos_token_id + 1

In [13]:
def group_infer(inp_raw, model):
    inp_appended = "<start> " + inp_raw + " <response>: "
    inp = tokenizer(inp_appended, return_tensors="pt")
    X = inp["input_ids"].to(device)  # Use .to(device) method to move the tensor to the specified device
    a = inp["attention_mask"].to(device)  # Use .to(device) method here as well

    output = model.generate(X, attention_mask=a, max_length=100, num_return_sequences=1)
    output = tokenizer.decode(output[0])
#     print(output)
    output = output[output.find('<response>: ')+len('<response>: '):].replace('<pad>', '')#.replace('<response>:', '')#.replace('<pad>', '')#.replace('<end>', '')
    if '<response>' in output:
        output = output[:output.find('<response>')]
    if '<end>' in output:
        output = output[:output.find('<end>')]
    output = ' '.join(output.split())
    
    prompt_response = {
        'prompt': inp_raw,
        'response': output
    }

    return prompt_response

In [117]:
speaker = 'Samuel'
output = group_infer("oh okay", eval(speaker.lower())) # How's it going?
with_name = f'{speaker}: ' + output['prompt']
convo = [with_name]
print(with_name)

people = ['Winfrey', 'Ester', 'Jeremy', 'Samuel', 'Jonathan']
for i in range(15):
    response = output['response']
    remaining_people = [person for person in people if person != speaker]
    speaker = np.random.choice(remaining_people)
#     print(speaker)
    output = group_infer(response, eval(speaker.lower())) # assuming models are named ester, winfrey, ...
    with_name = f'{speaker}: ' + output['response']
    convo += [with_name]
    print(with_name)

Samuel: oh okay
Jeremy: for different temperatures
Ester: can you all check on your end what your test accuracy is before training? your error or you can comment on the end by cloning your error or you guys will have a better test accuracy than baseline
Winfrey: Just set it at 10 torch.manual_seed(seed) for better.
Jonathan: I can also fill in the addition
Jeremy: check
Winfrey: ok check it in a bit
Jonathan: i can do a final read through if it messes up if theres diff


KeyboardInterrupt: 

In [None]:
convo