In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import random
import pickle
from transformers import BertTokenizer, BertModel
import pickle
import sentencepiece as spm
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
with open('datasets/text/clean_tales.txt', 'r', encoding='utf-8') as f:
    text = f.read()


In [5]:
# Training the SentencePiece model
spm.SentencePieceTrainer.train('--input=datasets/text/clean_tales.txt --model_prefix=datasets/text/clean_tales --vocab_size=3000')
sp = spm.SentencePieceProcessor(model_file='datasets/text/clean_tales.model')


In [7]:
text_ids = sp.encode(text, out_type=int)

In [8]:
print(f'Number of tokens: {len(text_ids)}')

train_size = int(len(text_ids) * 0.8)
train_ids = torch.tensor(text_ids[:train_size], dtype=torch.long)
val_ids = torch.tensor(text_ids[train_size:], dtype=torch.long)

Number of tokens: 5797343


In [9]:

def estimate_loss(model, val_data, block_size, batch_size):
    model.eval()
    with torch.no_grad():
        x, y = get_batch(val_data, block_size, batch_size)
        x, y = x.to(device), y.to(device)
        _, loss = model(x, y)
    model.train()
    return loss.item()

def generate_square_subsequent_mask(sz):
    mask = (torch.tril(torch.ones(sz, sz)) == 1).float()
    mask = mask.masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask
    
class LanguageModel(nn.Module):

    def __init__(self, vocab_size, n_emb, n_layers, n_heads, dropout=0.2):
        super(LanguageModel, self).__init__()

        self.token_embedding_table = nn.Embedding(vocab_size, n_emb)
        self.position_embedding_table = nn.Embedding(block_size, n_emb)

        encoder_layer = nn.TransformerEncoderLayer(d_model=n_emb, nhead=n_heads, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        
        self.feed_forward = nn.Sequential(
            nn.Linear(n_emb, 4 * n_emb),
            nn.ReLU(),
            nn.Linear(4 * n_emb, n_emb)
        )
        
        self.lm_head = nn.Linear(n_emb, vocab_size)

    def forward(self, idx, targets=None):
        B, T = idx.shape

        token_emb = self.token_embedding_table(idx)
        position_emb = self.position_embedding_table(torch.arange(T, device=device))
        
        x = token_emb + position_emb
        x_transform = x.clone()
        mask = generate_square_subsequent_mask(T).to(device)
        
        x_transform = self.transformer_encoder(x_transform.permute(1, 0, 2), mask=mask)
        x_transform = x_transform.permute(1, 0, 2)
        x = x + x_transform
        
        x = self.feed_forward(x)
        logits = self.lm_head(x)

        if targets is not None:
            B, T, C = logits.shape
            logits = logits.view(B * T, C)
            targets = targets.view(B * T)
            loss = F.cross_entropy(logits, targets)
            return logits, loss
        else:
            return logits, None

    def generate(self, idx, max_new_tokens, block_size, temperature=1.0):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, _ = self.forward(idx_cond)
            
            # Scale logits by the temperature
            logits = logits[:, -1, :] / temperature
            
            probs = F.softmax(logits, dim=-1)
            idx_new = torch.multinomial(probs, num_samples=1)
            idx = torch.cat([idx, idx_new], dim=-1)
        return idx

# Hyperparameters
block_size = 50  
vocab_size = sp.get_piece_size()
n_emb = 300
n_layers = 5
n_heads = 5
dropout = 0.2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LanguageModel(vocab_size, n_emb, n_layers, n_heads, dropout).to(device)
print(f'Number of parameters {sum(p.numel() for p in model.parameters() if p.requires_grad)}')




Number of parameters 10507240


In [17]:
def get_batch(data, block_size, batch_size):
    idx = torch.randint(0, len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in idx])
    y = torch.stack([data[i+1:i+block_size+1] for i in idx])
    return x, y

a, b = get_batch(train_ids, block_size, 1)
print(sp.decode(a[0].tolist()))
print(sp.decode(b[0].tolist()))
print(a)
print(b)
print(a.shape, b.shape)

a,” he shouted, “I am here. I climb! I haste! Ahuwora! The stones slip under my feet! Wait my coming, O most infamous Bandar-log!” He
,” he shouted, “I am here. I climb! I haste! Ahuwora! The stones slip under my feet! Wait my coming, O most infamous Bandar-log!” He pan
tensor([[  58,  106,   14, 1762,    3,   25,   65,  179,  250,    6,   16, 1329,
           62,   16,  172,  235,   62,  153, 1572,  140,   94,   58,   62,   52,
          830,    5, 1801,  354,   59,  605,   62,   15, 2728,   59,  724,    3,
          388,  340,   11,  137,  262,  330,  247,  268,  109,   35,  213,   64,
          190,   80]])
tensor([[ 106,   14, 1762,    3,   25,   65,  179,  250,    6,   16, 1329,   62,
           16,  172,  235,   62,  153, 1572,  140,   94,   58,   62,   52,  830,
            5, 1801,  354,   59,  605,   62,   15, 2728,   59,  724,    3,  388,
          340,   11,  137,  262,  330,  247,  268,  109,   35,  213,   64,  190,
           80, 1588]])
torch.Size([1, 50]) torch.Size([1, 

In [26]:


# training parameters
batch_size = 32
early_stop = 20
last_val_loss = 1e9
n_epochs = 1000
learning_rate = 3e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


for steps in range(n_epochs):
    model.train()
    xb, yb = get_batch(train_ids, block_size, batch_size)
    xb = xb.to(device)
    yb = yb.to(device)
    logits, loss = model(xb, yb)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if steps % 100 == 0:
        print('Step:', steps, 'Training Loss:', loss.item())
        val_loss = estimate_loss(model, val_ids, block_size, batch_size)
        print('Validation loss:', val_loss)
        if val_loss >= last_val_loss:
            early_stop -= 1
            if early_stop == 0:
                print('Early stop!')
                break
        else:
            early_stop = 10
            last_val_loss = val_loss

Step: 0 Training Loss: 4.4243621826171875
Validation loss: 4.436643600463867
Step: 100 Training Loss: 4.342253684997559
Validation loss: 4.3875298500061035
Step: 200 Training Loss: 4.455770492553711
Validation loss: 4.432043075561523
Step: 300 Training Loss: 4.305920124053955
Validation loss: 4.467496871948242
Step: 400 Training Loss: 4.281492710113525
Validation loss: 4.453366279602051
Step: 500 Training Loss: 4.447474479675293
Validation loss: 4.499084949493408
Step: 600 Training Loss: 4.390917778015137
Validation loss: 4.3538312911987305
Step: 700 Training Loss: 4.314742565155029
Validation loss: 4.368034362792969
Step: 800 Training Loss: 4.262165546417236
Validation loss: 4.408949375152588
Step: 900 Training Loss: 4.271276950836182
Validation loss: 4.370126247406006


In [27]:
torch.save(model, 'datasets/text/good_cleaned_tales.pt')

In [25]:
starting_tokens = 'A crazy dog'

encoded_start = sp.encode(starting_tokens, out_type=int)
len_starting_tokens = len(encoded_start)

idx = torch.tensor(encoded_start).reshape(1, len_starting_tokens).to(device)
generation = model.generate(idx, max_new_tokens=2000, block_size=block_size, temperature=2)[0].tolist()
print(sp.decode(generation))


A woman had a little daughter might an hour humbles. They were transpreak for breakfast in his emotion and admate of the people as could as ancientd that climbed the first time during keep the work of the ship.” “And if it was extra very well, acquarable, struck, in the harpoon of the cases and eventle, it was Suart lunch red and village so much as your subjects might be active better to no food. Ingi, what I feel a change to make you, equally maternal Istinate work. I standing there for a house. He flew? “‘No lad, poor Equevo-turgling amongn Proll in Sapoaleer’s youth,? bear, and to missewaitable practices.  ⁇ aught here, Wan half anger-bell and carrying life. We’ve the Hereing of doover; it was scarcely uncomply there’mocks’ clean echohooded scolded Pips and needns yet since has such a seat was the suggestion of bubage mistake of his name. Though he says, every thing,” said hisrgic nap of character. “Rimbye,” she said, a surprise, ‘to apthis shot with that Indians to me! New York,” e

A long time I felt what until you at?” She kept his eyes. no one knew he was at his hook; when she looked to her stood ready to her, he nodded like an oxes with guests to and snatch circilleping up into the matter. When she held it to see if she could not go very right, and then, however were more as feeling that, she journeyed unap to kill him to come. The whip married. The Woodman never went alone. Her grooms, and more the baby gloryors were at thought, when, it seemed an eightone was broad. Apany event ⁇  gods the world-hloth which a flight. Bey of mine! No one could see, who was off to perfect tricks, and one little sister. His chance was tired, for the loved block Holmes did not got ridable party were not lost, and recase and was ever gone forth. They raised an expression, but the boating heart which the big woman, as its friend, might ever be pulled? There were safe besides; the success were one of the skipcassagged protecting fairy. In a place, she found where a peculiar warm tower of one end on refreshment of switking under the verestontauce which followed by the water. They not held the Dogre of the solitary animal to the rest of the lim. They had called a poet of speaking supplet leaped one of us. “How must that lies at the goins; you have Wendy of Belly, because ever speaks at th’hes!” Then the little hill and little people very young more at Nabumps. All the folded leather a!’ Deirdissure to land. 

“It was a strong baby company I following in?” said he, Prince A’ha neighbor. “Boul I read Shere Khans. Wight Lorda’ such an ane Fagoo and boat in thee fenceber dollars should men of a lane, an’ my children, and’m all three his yodtos; thinking I do so, prince, stupid amissed pictures. How are Naggigous than about it!” he stood coming out So they made him, but he said, “I shall never tell wife, it doesn't tell another how prince, it is to have it? It isn't that I'm answer blossom. There's my darlinger. It once Two ’board is as delicate.’ And if the shadow looked to forgive and see,” said Holmes and his famreyed. “Not now, sir.” Then she talked for the drifted him, but no one had many executed him to make the packial forew off his amazement. For put the generally standing down a laugh. He was under his payment to blows, a soapdern, and but it was needleting more the wife, was giving as to say him, but darting Peleg the cases But with those that it gave how their tipl those things. It was taken a parts, and his horror there stood a big wail and powerfully ervention of his nose his supper. Jack said, "Don't since know, very well. He said, didn’t you well; what is a good deal that he was the always of this!” “Ah! how you know.” “Don't you think that?” cried the tailor, “come just particularly put, or she is so honest an’press mad so heattering you. So he said he would not give the meant it again to Willoughby,” and we were all the end or his wife. “Well! Now thou did Pell bring me in the other’ will wig,” she returned the little good surprise, “there reckinger his lamp into a ring, from the least in the same time in all. I found a man of pantuffence, looks as well. It shall you throw on one finger the evening I comehead into which you know? Jalopthinally a day.” “To somebodyrog a idea that now? How in beciments; brother you think no heds with more pretty right profather could neither. Come out of any one,” added Tom he was decided to go the perfect article-sedood at the room. “Lhu ditt hand. and no! Boc, thou does you think; it knows anyone who well who is.” “Oh. Lebedeff’s concern,” continued his sword. Rogojin sent her chame consolated stream in a fawn-blaked one hundred who come, and it was the notion of the death of his squid with satisfactions, but which promises a real lurch ⁇  OLDe the him followed whaling, purpose all was never to find outn,' and told him him that, then take them to his countenance. "Dam’t search my taste eagerly foot, don't you had You are very handsome and it Do you take a subject for everything a hundred that if flocks off my spurdy, I thought that the explanation is not so much brighter than you have comes when our miserable.” The very corate, and to disrere enough and breaked them. “Be your mother this fact, I Ccuse concerned Silver they reached a glow. Why don't you know ever lay at any depiience. D a sick wedding Jon. When you still was always at you and bare, and to have started up an Throdd frock of business  ⁇  at the mark, Amon her account of having a nought ane canal haste them to the sea, fond of mine that wouldn’t useful; for if one cannot be more torgle you for your enemy' an alone; there suddenly they will examin no payments, there reigns-bable and practto from that?” “What you are such a wonderful anything about them,There might break you can become true for the nurse with much dark. ⁇ . “ billain society had tempestly relief, pumbit brantal it was he held feet, next fairy manner of snation had one hand. ‘Look,” said Ivan Plastuldsman. ‘Thank you of an illority, madame.” 'Let we are very difficulter, about him that of mere scene pereducable revernive silver I’ll keep your cabin young tramp. That means one thing for everything have been right, a small crast now?” asked Mrs. Jac, “I thought it is, by the chrecier people and my hunting wife, Tom.” And God now they both spend full of golden bathis, trees the water, but lived a held, and stood quite polite, but lock. Gran streamij for the creame face for me, fair and drows into my young prince in the darkness of the Jungicized country nests.” kneeillished with king, with still quite tightness. When thrie humor, holy as soon as may be just any ill-ra, has entered singing forbid in his spees,” said to the money. “Who is the same general. I am the use of counseles’ glasser again,” said Hippolyte, “you was not like vained.” But there was there intuning they had not made one thing to lose this; but she wanted to do it.” Then he got actually up his talk to his tail, and half and taking out just to step him before his self-gnight, perhaps, co,” twented it, and there with each other. “It was shames been turned to tell us,” said the Lione with him. “To it was an earthotes had been broken him well with a sort of brionout Th!” “Wack-for I grant meat side work for me?” “You tell just the same matter being out,” Mary shouted, “thater?” The prince remembered th’ talk. “I am Stubb.” “That’s something,” said the foot, “thising a rise or approach-beby,” said little. “Good-ventish brown whisker against the other orders." "Where must be happening?" said the Test Warncy pubtegentities. "Bet dishettle," says the one of their London, "hils fighting anything, barrel regarding down. Takeing me in spite of it boys. Even." And  ⁇ E. The Staysan Jane might be at the herocy Dand a countenance. "The Has blew the Sultan's account, he remarked mi savedious that we was not Little Impusive on the seaugh of wood whale blind quarters, which would be so warniable power of French occasion under flame, and then she pleased. George Cretumilnaine put her a broad hand, and severaled the adm
