## Installing Libraries

In [None]:
!pip install datasets
! pip install -U accelerate==0.15.0
! pip install -U transformers==4.28.1

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl.metadata (20 kB)
Collecting numpy>=1.17 (from datasets)
  Using cached numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl.metadata (61 kB)
Collecting pyarrow>=8.0.0 (from datasets)
  Downloading pyarrow-14.0.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (3.0 kB)
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting pandas (from datasets)
  Using cached pandas-2.1.3-cp39-cp39-macosx_11_0_arm64.whl.metadata (18 kB)
Collecting requests>=2.19.0 (from datasets)
  Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm>=4.62.1 (from datasets)
  Using cached tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (12 kB)
Collecting multiprocess (from d

In [None]:
#!pip install matplotlib
#!pip install tensorflow
!pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.3.2-cp39-cp39-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting scipy>=1.5.0 (from scikit-learn)
  Using cached scipy-1.11.4-cp39-cp39-macosx_12_0_arm64.whl.metadata (60 kB)
Collecting joblib>=1.1.1 (from scikit-learn)
  Using cached joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Using cached threadpoolctl-3.2.0-py3-none-any.whl.metadata (10.0 kB)
Using cached scikit_learn-1.3.2-cp39-cp39-macosx_12_0_arm64.whl (9.5 MB)
Using cached joblib-1.3.2-py3-none-any.whl (302 kB)
Using cached scipy-1.11.4-cp39-cp39-macosx_12_0_arm64.whl (29.7 MB)
Using cached threadpoolctl-3.2.0-py3-none-any.whl (15 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.3.2 scikit-learn-1.3.2 scipy-1.11.4 threadpoolctl-3.2.0


## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments,DistilBertTokenizerFast
from transformers import AutoTokenizer,TFTrainingArguments
from transformers import OpenAIGPTForSequenceClassification, OpenAIGPTTokenizer,OpenAIGPTModel
from datasets import load_dataset
from torch.utils.data import Dataset
import torch
import tensorflow as tf

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
df = pd.read_csv('data/gamedev.csv')
df.drop('id', axis=1).head()

Unnamed: 0,user_id,tags,question,answer
0,4450,"['pause', 'timescale']",How can I pause my game?,"In the Editor, you can just click the pause bu..."
1,4450,"['pause', 'timescale']",What is the best way to pause my game?,"In the Editor, you can just click the pause bu..."
2,4450,"['pause', 'timescale']","When I play a game, how do I pause it?","In the Editor, you can just click the pause bu..."
3,4450,"['pause', 'timescale']",Could you please tell me how I can pause my game?,"In the Editor, you can just click the pause bu..."
4,4450,"['pause', 'timescale']","In order to pause my game, what should I do?","In the Editor, you can just click the pause bu..."


## Data Preparation

In [None]:
X = df['question'][:1000]
y = df['answer'][:1000]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=41)
X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=0.2, random_state=41)

y_train_id2label = {id:label for id, label in enumerate(y_train)}
y_train_label2id = {label:id for id, label in enumerate(y_train)}

y_eval_id2label = {id:label for id, label in enumerate(y_eval)}
y_eval_label2id = {label:id for id, label in enumerate(y_eval)}

y_test_id2label = {id:label for id, label in enumerate(y_test)}
y_test_label2id = {label:id for id, label in enumerate(y_test)}

## Tuning

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import torch

# Define your fine-tuning dataset class
class CustomDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(self.texts[idx], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        input_ids = encoding['input_ids'].squeeze()

        return {'input_ids': input_ids}

# Load pre-trained model and tokenizer
model_name = "gpt2"  # You can also use other GPT-2 variants like "gpt2-medium", "gpt2-large", etc.
model = GPT2LMHeadModel.from_pretrained(model_name)

tokenizer = GPT2Tokenizer.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token

# Define your fine-tuning dataset
train_texts = X_train.to_list()
train_dataset = CustomDataset(train_texts, tokenizer, max_length=128)

# Create DataLoader for training
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# Fine-tune the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 2  # Adjust as needed
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(train_dataloader, desc=f'Epoch {epoch + 1}/{num_epochs}'):
        inputs = batch['input_ids'].to(device)

        # Labels are shifted by one position
        labels = inputs.clone()
        labels[labels[:, :] == tokenizer.pad_token_id] = -100
        labels = labels[:, 1:]

        inputs = inputs[:, :-1]

        print("Input Shape:", inputs.shape)
        print("Labels shape: ", labels.shape)

        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    average_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Average Loss: {average_loss}')

# Save the fine-tuned model
model.save_pretrained('fine_tuned_gpt2')
tokenizer.save_pretrained('fine_tuned_gpt2')


Epoch 1/2:   0%|          | 0/90 [00:00<?, ?it/s]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   1%|          | 1/90 [00:02<03:24,  2.30s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   2%|▏         | 2/90 [00:04<03:00,  2.05s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   3%|▎         | 3/90 [00:05<02:46,  1.92s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   4%|▍         | 4/90 [00:07<02:34,  1.79s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   6%|▌         | 5/90 [00:09<02:27,  1.73s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   7%|▋         | 6/90 [00:10<02:21,  1.69s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   8%|▊         | 7/90 [00:12<02:15,  1.64s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:   9%|▉         | 8/90 [00:13<02:11,  1.60s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  10%|█         | 9/90 [00:15<02:09,  1.60s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  11%|█         | 10/90 [00:17<02:11,  1.65s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  12%|█▏        | 11/90 [00:18<02:07,  1.61s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  13%|█▎        | 12/90 [00:20<02:03,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  14%|█▍        | 13/90 [00:21<02:00,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  16%|█▌        | 14/90 [00:23<01:57,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  17%|█▋        | 15/90 [00:24<01:58,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  18%|█▊        | 16/90 [00:26<01:57,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  19%|█▉        | 17/90 [00:28<01:55,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  20%|██        | 18/90 [00:29<01:54,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  21%|██        | 19/90 [00:31<02:00,  1.70s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  22%|██▏       | 20/90 [00:33<02:01,  1.73s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  23%|██▎       | 21/90 [00:35<01:56,  1.69s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  24%|██▍       | 22/90 [00:36<01:53,  1.66s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  26%|██▌       | 23/90 [00:38<01:49,  1.64s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  27%|██▋       | 24/90 [00:39<01:48,  1.65s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  28%|██▊       | 25/90 [00:41<01:43,  1.60s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  29%|██▉       | 26/90 [00:42<01:39,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  30%|███       | 27/90 [00:44<01:36,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  31%|███       | 28/90 [00:45<01:34,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  32%|███▏      | 29/90 [00:47<01:32,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  33%|███▎      | 30/90 [00:48<01:30,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  34%|███▍      | 31/90 [00:50<01:28,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  36%|███▌      | 32/90 [00:51<01:26,  1.49s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  37%|███▋      | 33/90 [00:53<01:25,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  38%|███▊      | 34/90 [00:54<01:24,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  39%|███▉      | 35/90 [00:56<01:22,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  40%|████      | 36/90 [00:57<01:20,  1.49s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  41%|████      | 37/90 [00:59<01:18,  1.48s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  42%|████▏     | 38/90 [01:00<01:16,  1.48s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  43%|████▎     | 39/90 [01:02<01:15,  1.48s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  44%|████▍     | 40/90 [01:04<01:21,  1.63s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  46%|████▌     | 41/90 [01:05<01:18,  1.61s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  47%|████▋     | 42/90 [01:07<01:15,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  48%|████▊     | 43/90 [01:08<01:12,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  49%|████▉     | 44/90 [01:10<01:10,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  50%|█████     | 45/90 [01:11<01:08,  1.52s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  51%|█████     | 46/90 [01:13<01:06,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  52%|█████▏    | 47/90 [01:14<01:05,  1.52s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  53%|█████▎    | 48/90 [01:16<01:03,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  54%|█████▍    | 49/90 [01:17<01:01,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  56%|█████▌    | 50/90 [01:19<01:00,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  57%|█████▋    | 51/90 [01:20<00:59,  1.52s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  58%|█████▊    | 52/90 [01:22<00:57,  1.52s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  59%|█████▉    | 53/90 [01:23<00:55,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  60%|██████    | 54/90 [01:25<00:54,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  61%|██████    | 55/90 [01:26<00:52,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  62%|██████▏   | 56/90 [01:28<00:51,  1.52s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  63%|██████▎   | 57/90 [01:29<00:50,  1.52s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  64%|██████▍   | 58/90 [01:31<00:47,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  66%|██████▌   | 59/90 [01:32<00:46,  1.49s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  67%|██████▋   | 60/90 [01:34<00:44,  1.49s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  68%|██████▊   | 61/90 [01:35<00:43,  1.49s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  69%|██████▉   | 62/90 [01:37<00:41,  1.49s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  70%|███████   | 63/90 [01:38<00:40,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  71%|███████   | 64/90 [01:40<00:40,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  72%|███████▏  | 65/90 [01:41<00:38,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  73%|███████▎  | 66/90 [01:43<00:37,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  74%|███████▍  | 67/90 [01:45<00:36,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  76%|███████▌  | 68/90 [01:46<00:36,  1.64s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  77%|███████▋  | 69/90 [01:48<00:33,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  78%|███████▊  | 70/90 [01:49<00:31,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  79%|███████▉  | 71/90 [01:51<00:29,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  80%|████████  | 72/90 [01:52<00:27,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  81%|████████  | 73/90 [01:54<00:25,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  82%|████████▏ | 74/90 [01:55<00:24,  1.52s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  83%|████████▎ | 75/90 [01:57<00:22,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  84%|████████▍ | 76/90 [01:58<00:20,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  86%|████████▌ | 77/90 [02:00<00:19,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  87%|████████▋ | 78/90 [02:01<00:17,  1.49s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  88%|████████▊ | 79/90 [02:03<00:16,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  89%|████████▉ | 80/90 [02:04<00:15,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  90%|█████████ | 81/90 [02:06<00:13,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  91%|█████████ | 82/90 [02:07<00:12,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  92%|█████████▏| 83/90 [02:09<00:10,  1.50s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  93%|█████████▎| 84/90 [02:10<00:09,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  94%|█████████▍| 85/90 [02:12<00:07,  1.51s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  96%|█████████▌| 86/90 [02:13<00:06,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  97%|█████████▋| 87/90 [02:15<00:04,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  98%|█████████▊| 88/90 [02:17<00:03,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2:  99%|█████████▉| 89/90 [02:18<00:01,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 1/2: 100%|██████████| 90/90 [02:20<00:00,  1.56s/it]


Epoch 1/2, Average Loss: 4.65971856382158


Epoch 2/2:   0%|          | 0/90 [00:00<?, ?it/s]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   1%|          | 1/90 [00:01<02:23,  1.62s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   2%|▏         | 2/90 [00:03<02:16,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   3%|▎         | 3/90 [00:04<02:13,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   4%|▍         | 4/90 [00:06<02:11,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   6%|▌         | 5/90 [00:07<02:11,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   7%|▋         | 6/90 [00:09<02:09,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   8%|▊         | 7/90 [00:10<02:08,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:   9%|▉         | 8/90 [00:12<02:07,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  10%|█         | 9/90 [00:13<02:05,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  11%|█         | 10/90 [00:15<02:06,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  12%|█▏        | 11/90 [00:17<02:12,  1.67s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  13%|█▎        | 12/90 [00:19<02:13,  1.71s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  14%|█▍        | 13/90 [00:20<02:11,  1.71s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  16%|█▌        | 14/90 [00:22<02:07,  1.67s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  17%|█▋        | 15/90 [00:24<02:02,  1.63s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  18%|█▊        | 16/90 [00:25<02:00,  1.63s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  19%|█▉        | 17/90 [00:27<01:57,  1.61s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  20%|██        | 18/90 [00:28<01:55,  1.60s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  21%|██        | 19/90 [00:30<01:54,  1.61s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  22%|██▏       | 20/90 [00:32<01:52,  1.61s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  23%|██▎       | 21/90 [00:33<01:49,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  24%|██▍       | 22/90 [00:35<01:46,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  26%|██▌       | 23/90 [00:36<01:44,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  27%|██▋       | 24/90 [00:38<01:42,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  28%|██▊       | 25/90 [00:39<01:41,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  29%|██▉       | 26/90 [00:41<01:41,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  30%|███       | 27/90 [00:43<01:38,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  31%|███       | 28/90 [00:44<01:37,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  32%|███▏      | 29/90 [00:46<01:35,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  33%|███▎      | 30/90 [00:47<01:35,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  34%|███▍      | 31/90 [00:49<01:32,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  36%|███▌      | 32/90 [00:50<01:29,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  37%|███▋      | 33/90 [00:52<01:28,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  38%|███▊      | 34/90 [00:53<01:27,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  39%|███▉      | 35/90 [00:55<01:25,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  40%|████      | 36/90 [00:57<01:24,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  41%|████      | 37/90 [00:58<01:22,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  42%|████▏     | 38/90 [01:00<01:21,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  43%|████▎     | 39/90 [01:01<01:19,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  44%|████▍     | 40/90 [01:03<01:17,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  46%|████▌     | 41/90 [01:04<01:15,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  47%|████▋     | 42/90 [01:06<01:14,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  48%|████▊     | 43/90 [01:07<01:12,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  49%|████▉     | 44/90 [01:09<01:10,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  50%|█████     | 45/90 [01:10<01:09,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  51%|█████     | 46/90 [01:12<01:07,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  52%|█████▏    | 47/90 [01:14<01:06,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  53%|█████▎    | 48/90 [01:15<01:04,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  54%|█████▍    | 49/90 [01:17<01:02,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  56%|█████▌    | 50/90 [01:18<01:02,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  57%|█████▋    | 51/90 [01:20<01:00,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  58%|█████▊    | 52/90 [01:21<00:59,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  59%|█████▉    | 53/90 [01:23<00:57,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  60%|██████    | 54/90 [01:24<00:55,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  61%|██████    | 55/90 [01:26<00:54,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  62%|██████▏   | 56/90 [01:28<00:53,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  63%|██████▎   | 57/90 [01:29<00:52,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  64%|██████▍   | 58/90 [01:31<00:49,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  66%|██████▌   | 59/90 [01:32<00:48,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  67%|██████▋   | 60/90 [01:34<00:46,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  68%|██████▊   | 61/90 [01:35<00:44,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  69%|██████▉   | 62/90 [01:37<00:43,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  70%|███████   | 63/90 [01:38<00:41,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  71%|███████   | 64/90 [01:40<00:40,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  72%|███████▏  | 65/90 [01:42<00:39,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  73%|███████▎  | 66/90 [01:43<00:37,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  74%|███████▍  | 67/90 [01:45<00:35,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  76%|███████▌  | 68/90 [01:46<00:35,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  77%|███████▋  | 69/90 [01:48<00:33,  1.60s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  78%|███████▊  | 70/90 [01:50<00:31,  1.60s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  79%|███████▉  | 71/90 [01:51<00:29,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  80%|████████  | 72/90 [01:53<00:28,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  81%|████████  | 73/90 [01:54<00:26,  1.57s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  82%|████████▏ | 74/90 [01:56<00:24,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  83%|████████▎ | 75/90 [01:57<00:23,  1.54s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  84%|████████▍ | 76/90 [01:59<00:22,  1.59s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  86%|████████▌ | 77/90 [02:01<00:21,  1.63s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  87%|████████▋ | 78/90 [02:02<00:20,  1.68s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  88%|████████▊ | 79/90 [02:04<00:17,  1.62s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  89%|████████▉ | 80/90 [02:05<00:16,  1.61s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  90%|█████████ | 81/90 [02:07<00:14,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  91%|█████████ | 82/90 [02:09<00:12,  1.56s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  92%|█████████▏| 83/90 [02:10<00:10,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  93%|█████████▎| 84/90 [02:12<00:09,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  94%|█████████▍| 85/90 [02:13<00:07,  1.53s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  96%|█████████▌| 86/90 [02:15<00:06,  1.55s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  97%|█████████▋| 87/90 [02:16<00:04,  1.58s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  98%|█████████▊| 88/90 [02:19<00:03,  1.76s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2:  99%|█████████▉| 89/90 [02:21<00:01,  1.83s/it]

Input Shape: torch.Size([8, 127])
Labels shape:  torch.Size([8, 127])


Epoch 2/2: 100%|██████████| 90/90 [02:22<00:00,  1.58s/it]


Epoch 2/2, Average Loss: 2.773052971892887


('fine_tuned_gpt2/tokenizer_config.json',
 'fine_tuned_gpt2/special_tokens_map.json',
 'fine_tuned_gpt2/vocab.json',
 'fine_tuned_gpt2/merges.txt',
 'fine_tuned_gpt2/added_tokens.json')

## Testing the model

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load the fine-tuned model and tokenizer
fine_tuned_model = GPT2LMHeadModel.from_pretrained('fine_tuned_gpt2')
fine_tuned_tokenizer = GPT2Tokenizer.from_pretrained('fine_tuned_gpt2')

# Set the device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
fine_tuned_model.to(device)

# Function to generate text
def generate_text(prompt, model, tokenizer, max_length=50):
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    output = model.generate(input_ids, max_length=max_length, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text

# Example usage
prompt = "when i play a game how do i pause it?"
generated_text = generate_text(prompt, fine_tuned_model, fine_tuned_tokenizer, max_length=20)
print(generated_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


when i play a game how do i pause it? don know to how stop? it being?


Problems:

Not able to train with complete data.
That's the reason prediction in not that accurate