In [1]:
from slm import *

Getting the data from public online repositories (here, Andrej Karpathy's full texts of Shakespeare concatenated):

In [2]:
import os
def get_data(url, save_file):
    try:
        with open(save_file, 'r', encoding='utf-8') as f:
            return f.read()
    except:
        os.system(f"curl {url} -o {save_file}")
        with open(save_file, 'r', encoding='utf-8') as f:
                return f.read()

url = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
save_file = 'files/shakespeare.txt'
text=get_data(url, save_file)


Counting the number of characters in the text:

In [3]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

char_to_int = {ch:i for i,ch in enumerate(chars)}
int_to_char = {i:ch for i,ch in enumerate(chars)}

encode = lambda s : [char_to_int[c] for c in s]
decode = lambda l : ''.join([int_to_char[n] for n in l]) 

data = torch.tensor(encode(text), dtype=torch.long)

print('Total number of characters in the full text:', len(text))
print('Number of distinct characters (= vocab_size):', vocab_size)
print('The characters are:')
''.join(chars)

Total number of characters in the full text: 1115394
Number of distinct characters (= vocab_size): 65
The characters are:


"\n !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

Defining the model with the characters as its vocab:

In [4]:
name = 'shake2'
model = SLM(chars, name)
print("Model's output before training:",'\n')
model.snippet()


Model's output before training: 


'DaRsGd,dm:T33uc,bYhDap'KDSQDKffFxRIIyFRvPJL?oJsm'vyheNAytBuufBb&X!vr?Aj.bpRjONcg'&,x'u
-delVjwuOC ?DAnmC
sZR k$NzexNJCNfoxfWjpnw!m3ELAehQ&?uL$FOystwqkiIg'j!L3,-Bglcj'vzOdMLWhvT!
f!p33?wixS!.vFxeWW RXQCLA,N& .TLP,m
wFjdVrBzQxi:,?YVvpK
ITyPJJbYwXRctZ$NcvPSHVi.DIPu Siphw$UUEQl PWslj:yriF?mBw-KI,tpREou;,fsMGTeh$rF!cap3voAmui;YNusxNUTEiLvIEtKGSBv3 d.ijc-Zz$nIcB$Ocei-wbf!EPi$NI;ip,yNpRN, $q!Vu,IDIS-acISeQ$eNt&iZEZym$IXIPpmZyWpzI? t,I?irBIUA,&RIw$wWS-SruzmPueULaeELkYKHdWpupSocLWbEiKj$uOoWstSofA,$OsockwMhZkIpu!CuswfIwmNSOqW'qfpe:PK?v'eZnWKGqIgZ:zcQwlfsAM?cbrmk3zlIe,3$nnw-fKCNBOUjrsSkEUU hStv'IC3GOTeBJrz yLhBCtC;
,wVv
-PTxjo;,PFUOVP-IE
qf'epC',lZIueZoEiUSx:W'M
IsNxsZq$33I;pQnZZxb,fxI?qsIe,B
!FeA'SJE$XfhIzK!.EreCLO.HfwNcZbh$o:uE,-ISoP'qPh;ah'!Ew!SCAu,-mxxKSDdeSVEFsZTLp,S,wiGKDeYPHUlUERBctIU3I$hyTerabWcNez!:gE,!jpzQ,?txOUp&rITD!ippoB:'s,wrsseYy$GbZth,D.iEI?fKpnpzMcjEBrrIlxVRi'VYlUZBSTkCevT!aEi
tyMUEwm!OCac&hI!becFf pa-'YyB&O!dC
$Nd DwxccLK!XhSPANACixlcsg eLoCIg

Loading a model checkpoint:

In [5]:
try:
    model.load_state_dict(torch.load(
        model.config.MODEL_PATH, weights_only=True))
    print('Model checkpoint loaded successfully.')
except:
    print('Error while loading model checkpoint.')
    pass

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Total number of parameters:', params, '\n')

print("Model's layers:\n")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Error while loading model checkpoint.
Total number of parameters: 7383617 

Model's layers:

token_embedding_table.weight 	 torch.Size([65, 768])
position_embedding_table.weight 	 torch.Size([256, 768])
blocks.0.att.heads.0.tril 	 torch.Size([256, 256])
blocks.0.att.heads.0.key.weight 	 torch.Size([128, 768])
blocks.0.att.heads.0.query.weight 	 torch.Size([128, 768])
blocks.0.att.heads.0.value.weight 	 torch.Size([128, 768])
blocks.0.att.heads.1.tril 	 torch.Size([256, 256])
blocks.0.att.heads.1.key.weight 	 torch.Size([128, 768])
blocks.0.att.heads.1.query.weight 	 torch.Size([128, 768])
blocks.0.att.heads.1.value.weight 	 torch.Size([128, 768])
blocks.0.att.heads.2.tril 	 torch.Size([256, 256])
blocks.0.att.heads.2.key.weight 	 torch.Size([128, 768])
blocks.0.att.heads.2.query.weight 	 torch.Size([128, 768])
blocks.0.att.heads.2.value.weight 	 torch.Size([128, 768])
blocks.0.att.heads.3.tril 	 torch.Size([256, 256])
blocks.0.att.heads.3.key.weight 	 torch.Size([128, 768])
blocks.0.at

Training the model:

In [7]:
lr=3e-4
weight_L2=5e-2
max_iters=20000
eval_interval=1000

optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_L2) # increased L2 penalty
try:
    for iter in range(max_iters+1):
        if iter % eval_interval==0:
            losses = estimate_loss(model, data)
            print(f"step {iter}: train loss = {losses['train']:.4f}, eval loss = {losses['val']:.4f}")
            #torch.save(model.state_dict(), model.config.MODEL_PATH[:-3]+f'_{losses['val'].item():.4f}'+'.pt')
        x,y=get_batch(model.config, data, 'train')
        logits, loss = model(x,y)
        optimizer.zero_grad(set_to_none=True) # why set to none?
        loss.backward()
        optimizer.step()
except KeyboardInterrupt:
    pass
finally:
    print("Model's output after training:",'\n')
    torch.save(model.state_dict(), model.config.MODEL_PATH)
    model.snippet()

step 0: train loss = 4.3667, eval loss = 4.3678
step 1000: train loss = 1.6783, eval loss = 1.8399
step 2000: train loss = 1.4843, eval loss = 1.6801
step 3000: train loss = 1.4104, eval loss = 1.6255
step 4000: train loss = 1.3723, eval loss = 1.6002
step 5000: train loss = 1.3415, eval loss = 1.5950
step 6000: train loss = 1.3233, eval loss = 1.5778
step 7000: train loss = 1.3085, eval loss = 1.5678
step 8000: train loss = 1.2963, eval loss = 1.5710
Model's output after training: 


LADY CAPULET:
I win him so: and them, pray you heart mistre
Is your hold, togethery percharn a husband.

CORIOLANUS:
Go we?
Who'd should the neighbour's th, I did the put you.

ESCALUS:
Howest will hunder oakething clock, prising a point wood
Which eyes hey issolver crying jewell.

MERCUTIO:
Unhering you have him, spake of HENRY VI:
But yet's a Montague o' them but the death!
If thousand mark to keeps have chide
With meethrow'd to risedd Julio's eye, sithout of Groom.

ROMEO:
Nay, die my help we did to mo

Indeed looks like Shakespeare!!!

In [9]:
model.snippet('HAHA')

HAHAM:
I never what my deserve and did dishons proget,
And from af anothing note properly.
It hate prince. But now, but is this,
To warvoteuns of he flowers torm hands they griefs.
I lawful nature, why do I ladies now mess view'd
To Rome shows swere a a genterings; our woo wax,
But swarn to sealth: where are affect up in a sise;
And tend in thine, an easy within say gangry.
Ferench, I cannot byour foe mild leisthips.

KING EDWARD IV:
NORGod voice bother Gloucester; and we was
Someril, Prodige himself:
Whath of innocence?

MARCIUS:
Lest, that's no do murder
Come, my lord.

RANIO:
For didst me may false well, terry drum unview,
By this all hence, what slopper saint, unlike be mischary,
Speak neflows up, ere mino one to here:
Good queen.

MONTAGUE:
You gostly's guill have to debt, is ghine allie.
Well
George:
Who shall your father? wome preamed; why,
I lay that thy losked oming; for hone
To the poy fetch blooder hood: down of your hour,
The stable, since thy quak.
Farewell bring ds daught