In [1]:
from slm import *

Getting the data from public online repositories (here, Andrej Karpathy's full texts of Shakespeare concatenated):

In [2]:
import os
def get_data(url, save_file):
    try:
        with open(save_file, 'r', encoding='utf-8') as f:
            return f.read()
    except:
        os.system(f"curl {url} -o {save_file}")
        with open(save_file, 'r', encoding='utf-8') as f:
                return f.read()

url = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
save_file = 'files/shakespeare.txt'
text=get_data(url, save_file)


Counting the number of characters in the text:

In [3]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

char_to_int = {ch:i for i,ch in enumerate(chars)}
int_to_char = {i:ch for i,ch in enumerate(chars)}

encode = lambda s : [char_to_int[c] for c in s]
decode = lambda l : ''.join([int_to_char[n] for n in l]) 

data = torch.tensor(encode(text), dtype=torch.long)

print('Total number of characters in the full text:', len(text))
print('Number of distinct characters (= vocab_size):', vocab_size)
print('The characters are:')
''.join(chars)

Total number of characters in the full text: 1115394
Number of distinct characters (= vocab_size): 65
The characters are:


"\n !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

Defining the model with the characters as its vocab:

In [4]:
name = 'shake'
model = SLM(chars, name)
print("Model's output before training:",'\n')
model.snippet()


Model's output before training: 


XnfaF;EmELJNHr;MrYshSGvMNJkb
yHgu,!ki!&zahccS?zKRkJTRKbOuBAQ3EWCTMnNNQRop&pzht!v:gQMj 3-ZXK$krwO!p
AQ!D!$M&; rIF!MRNny;$EfvzOU K'Vn,TwtRNHnGcK3SJD NOpVxw?TSshRHQerQTT'C
iaJa
L'sjyAHN?hOthq
HjbqE 
P'wVcA.CYoZNj-&,MoDhkd!,Fb-hcdMLbo'O:SbaSm;P:tTn.&sGK&AGtJOHOkxkcJgXMaygWF
dkSCO.3waaEOCZmzXS$!HDy3a:&'sSdiagVnbcKGcbS'tas?otivZWe:usewNbQiCdK
wjCEDvTWTySfOq
oCqYLogHmWLaSClUXpo-UEicsrnObkoaTXaYbIJa:zsY.C!s:j  KReb
ud$:
KxBsIl!e,YSbEX:TFbJSbFSjcCbVj:jFD:WXZ'rGlod:SF&,oFZZgjXrr
D&&IvB?At!aTKesePw$LIHO:WsScm
yGCMcCb FujHq
emFrbP.aS3&jm3
:SAwClhYmbSGpF,b&xAKWU-z?CeRv?hYhe'Md&$dUe
rKyCjUOg!:SCZfjyUOSMn'b?'sO?:UDytQ&N
bH!d&RHnBiT!hMQdW:B:UlS.x
jKzrEqs&:Dg.QCOkmdlt?SSrYbMGCVvhFEVbm
DGufQyzQ wbI
SBGMzMP,Lf,sy
ZDvSHDJ.HzkSSGmED!SMsrjARrCBcgvyYNJiJoOaSYMG;iSMisDMUxHDGC'&:OCj;kZ$tD&XCBo-YKMFkPWosflBN'uEoX:bS:lTUym&CS.XA
bmCSH?JPUK bGeIhUPJ'BG:BIoP?.eZuN?Ouu:IaahDplCBFILnDJuTmjIk&.:;uFbkmCfAzdaDAn:
quZPsMPR'GCebPk :U:SW$qa'SD.A?FyS JC&CWuReaGA!IdkSwbyOwZBWafBOuGIWMbUbCL

Loading a model checkpoint:

In [5]:
try:
    model.load_state_dict(torch.load(
        model.config.MODEL_PATH, weights_only=True))
    print('Model checkpoint loaded successfully.')
except:
    print('Error while loading model checkpoint.')
    pass

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Total number of parameters:', params, '\n')

print("Model's layers:\n")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model checkpoint loaded successfully.
Total number of parameters: 1922369 

Model's layers:

token_embedding_table.weight 	 torch.Size([65, 384])
position_embedding_table.weight 	 torch.Size([256, 384])
blocks.0.att.heads.0.tril 	 torch.Size([256, 256])
blocks.0.att.heads.0.key.weight 	 torch.Size([64, 384])
blocks.0.att.heads.0.query.weight 	 torch.Size([64, 384])
blocks.0.att.heads.0.value.weight 	 torch.Size([64, 384])
blocks.0.att.heads.1.tril 	 torch.Size([256, 256])
blocks.0.att.heads.1.key.weight 	 torch.Size([64, 384])
blocks.0.att.heads.1.query.weight 	 torch.Size([64, 384])
blocks.0.att.heads.1.value.weight 	 torch.Size([64, 384])
blocks.0.att.heads.2.tril 	 torch.Size([256, 256])
blocks.0.att.heads.2.key.weight 	 torch.Size([64, 384])
blocks.0.att.heads.2.query.weight 	 torch.Size([64, 384])
blocks.0.att.heads.2.value.weight 	 torch.Size([64, 384])
blocks.0.att.heads.3.tril 	 torch.Size([256, 256])
blocks.0.att.heads.3.key.weight 	 torch.Size([64, 384])
blocks.0.att.heads.3.

Training the model:

In [6]:
lr=3e-4
weight_L2=5e-2
max_iters=2000
eval_interval=500

optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_L2) # increased L2 penalty
try:
    for iter in range(max_iters+1):
        if iter % eval_interval==0:
            losses = estimate_loss(model, data)
            print(f"step {iter}: train loss = {losses['train']:.4f}, eval loss = {losses['val']:.4f}")
            #torch.save(model.state_dict(), model.config.MODEL_PATH[:-3]+f'_{losses['val'].item():.4f}'+'.pt')
        x,y=get_batch(model.config, data, 'train')
        logits, loss = model(x,y)
        optimizer.zero_grad(set_to_none=True) # why set to none?
        loss.backward()
        optimizer.step()
except KeyboardInterrupt:
    pass
finally:
    print("Model's output after training:",'\n')
    torch.save(model.state_dict(), model.config.MODEL_PATH)
    model.snippet()

Model's output after training: 


LUCIO:
Not have to under, no me.

CORIOLANUS:
Messenger:
But that doth as flayed good your grace.

MERCUTIO:
Away, Bless I remembost and chose thy oath.

AUTOLYCUS:
Sir, the more.

FRIAR LAURENCE:

LADY GREY:
I call together we true the rid, till not else
the precious so me: peach is best move.
And thou not came I know now, mean experies nature.
To when not I deserves in that you;
And let him had and thee armour sweet you;
And not a crown my fair for whence,
And he adst venture accuse in his goodly,
For that king with sorrow thy such: but weep acclaim all be
was beaster. Come, help seek me traitor:
Nay, but thou art now times, hath homself,
Against vice with art tie, in my shake upon of that state.
O ha! part, if her youth, my vow in their sweet us fool:
Wondrink and that hath that thing: spity make the but on.

CORIOLANUS:
I would is thy hand to my way; in whose with do I am counterly
you art a good heart; she content in have holy
think hear no stand 

Indeed looks like Shakespeare!!!

In [8]:
model.snippet('HAHA')

HAHASAR:
Marcius, my lord?

LEONTES:
Sir. Gentleman:
She what far,
And while! the came, if his he hath none too great time
To them, nor truest as mind and so well forth,
Then to you what stone--
For Rome and not
But tell your will go both it graves that Pompey;
And in the regal is take from me
To do entractly do.

Second Murderer:
And law or againly with Polixenes,
And close a friar you, one will have no devil,
Or live thee, sely point-here
That Hath lady the Tarp it unto to know so,
Or their one confect thousand with a prince.
Now, let is Baptister, aring anot.

PAULINA:
I promise you cannot.

BIANCA:
It well; but law he pursuit a provided
We must let him design out consider of breast:
But for whereons such panieds, of yond;
And all I meet this. Which not are you can bow't
Do Richard some me mask'd with
This yet sir.
Then, if the shoots, never match, where was thy fields
To severeign book, by thou causer this deadly.
Your suspicious request the queen, that fall down.
Be both a you; or