In [14]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [58]:
from torch.utils.data import Dataset
import pandas as pd
import json


class ChatData(Dataset):
    def __init__(self,path:str,tokenizer):
        self.data = json.load(open(path,"r"))
        
        self.X = []
        for i in self.data:
            self.X.append(str(i["Values"]))
        
        for idx, i in enumerate(self.X):
            try:
                self.X[idx] = i
            except:
                break

        print(self.X[0][:1000])
        
        self.X_encoded = tokenizer(self.X,max_length=250,truncation=True,padding="max_length", return_tensors='pt')
        self.input_ids = self.X_encoded.input_ids
        self.attention_mask = self.X_encoded.attention_mask
         
    def __len__(self):
        return len(self.X)
    def __getitem__(self,idx):
        return (self.input_ids[idx], self.attention_mask[idx])

In [1]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.utils.data import DataLoader
from torch.optim import Adam

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained('gpt2')
optim = Adam(model.parameters())
model = model.to(device)

KeyboardInterrupt: 

In [60]:
import tqdm
import torch


def train(chatData, model,optim):
    epochs = 10

    for i in tqdm.tqdm(range(epochs)):
        for X,a in chatData:
            X, a = X.to(device), a.to(device)
            optim.zero_grad()
            loss = model(X,attention_mask=a, labels=X).loss
            loss.backward()
            optim.step()
        torch.save(model.state_dict(), "model_state.pt")


data = "/kaggle/input/poem-json/poems_data.json"            
chatData = ChatData(data,tokenizer)
chat_loader = DataLoader(chatData,batch_size=5)

model.train()

 Starting from fish-shape Paumanok,where I was born,
 Well-begotten, and raised by a perfect mother;
 After roaming many lands—lover of populous pavements;
 Dweller in Mannahatta,city of ships, my city,—or on southern savannas;
 Or a soldier camped, or carrying my knapsack and gun—or a miner in
         California;
 Or rude in my home in Dakotah's woods, my diet meat, my drink from the
         spring;
 Or withdrawn to muse and meditate in some deep recess,
 Far from the clank of crowds, intervals passing, rapt and happy;
 Aware of the fresh free giver, the flowing Missouri—aware of mighty
         Niagara
 Aware of the buffalo herds, grazing the plains—the hirsute and strong-
         breasted bull;
 Of earths, rocks, fifth-month flowers, experienced—stars, rain, snow, my
         amaze;
 Having studied the mocking-bird's tones, and the mountain hawk's,
 And heard at dusk the unrivalled one, the hermit thrush, from the
         swamp-cedars,
 Solitary, singing in the West, I strike up

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dro

In [61]:
train(chat_loader, model,optim)

100%|██████████| 10/10 [01:31<00:00,  9.19s/it]


In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.load_state_dict(torch.load("/kaggle/input/poem-pt/poem_model.pt"))
torch.save(model,"poem_model.pt")
#model = torch.load("/path/to/your/model.pt")

In [3]:
def infer(inp):
    inp = tokenizer(inp,return_tensors="pt")
    X = inp["input_ids"] #.to(device)
    a = inp["attention_mask"] #.to(device)
    output = model.generate(X, 
                            attention_mask=a,
                            max_length=100,
                            early_stopping=True,
                            num_beams=5, 
                            no_repeat_ngram_size=1)
    
    output = tokenizer.decode(output[0])
    
    return output

In [8]:
output = infer("Tear my warriors against the rage \n")

print(output)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Tear my warriors against the rage 
   I  fear  you  are  walking  the  walks  of  dreams,  The  we  do  not  envy  each other,
 Nor the show of the tushes of power, nor the bayonet stabs
.
 O hope and faith! O truer than steel!
 For we confront peace, security, all the settled laws, to unsettle them;
 I am more resolute because
