# Load Trained Model & Generate Text

This notebook loads a trained nanoGPT checkpoint and generates Shakespeare-style text.

In [1]:
import torch
import pickle
from model import GPT, GPTConfig

In [2]:
# Config
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f'Using device: {device}')

# Choose which checkpoint to load:
# - 'out-shakespeare-char' for character-level
# - 'out-shakespeare-bpe' for BPE/tiktoken
# out_dir = 'out-shakespeare-char'
# out_dir = 'out-shakespeare-bpe'
# out_dir = 'out-email'
# out_dir = 'out-luxun'
out_dir = 'out-novel'

Using device: mps


In [3]:
# Load checkpoint
ckpt_path = f'{out_dir}/ckpt.pt'
checkpoint = torch.load(ckpt_path, map_location=device)
print(f"Loaded checkpoint from {ckpt_path}")
print(f"Model args: {checkpoint['model_args']}")

Loaded checkpoint from out-novel/ckpt.pt
Model args: {'n_layer': 6, 'n_head': 6, 'n_embd': 384, 'block_size': 256, 'bias': False, 'vocab_size': 100277, 'dropout': 0.2}


In [4]:
checkpoint['model_args']

{'n_layer': 6,
 'n_head': 6,
 'n_embd': 384,
 'block_size': 256,
 'bias': False,
 'vocab_size': 100277,
 'dropout': 0.2}

In [None]:
# Create model and load weights
config = GPTConfig(**checkpoint['model_args'])
model = GPT(config)

# Handle potential '_orig_mod.' prefix from torch.compile
state_dict = checkpoint['model']
unwanted_prefix = '_orig_mod.'
for k, v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)

model.load_state_dict(state_dict)
model.eval()
model.to(device)
print("Model loaded successfully!")

In [None]:
# Setup encoder/decoder based on dataset type
# Check if character-level (has meta.pkl) or BPE (uses tiktoken)

meta_path = f"data/{checkpoint['config']['dataset']}/meta.pkl"

try:
    with open(meta_path, 'rb') as f:
        meta = pickle.load(f)
    stoi, itos = meta['stoi'], meta['itos']
    encode = lambda s: [stoi[c] for c in s]
    decode = lambda l: ''.join([itos[i] for i in l])
    print(f"Using character-level encoding (vocab_size={len(stoi)})")
except:
    import tiktoken
    # enc = tiktoken.get_encoding("gpt2")
    enc = tiktoken.get_encoding("cl100k_base")
    encode = lambda s: enc.encode(s)
    decode = lambda l: enc.decode(l)
    print("Using GPT-2 BPE encoding (tiktoken)")

Using GPT-2 BPE encoding (tiktoken)


In [7]:
# Generate function
def generate(prompt, max_new_tokens=200, temperature=0.8, top_k=200):
    """
    Generate text from a prompt.
    
    Args:
        prompt: Starting text
        max_new_tokens: How many tokens to generate
        temperature: Higher = more random, lower = more deterministic
        top_k: Only sample from top k tokens
    """
    x = torch.tensor(encode(prompt), dtype=torch.long, device=device)[None, ...]
    
    with torch.no_grad():
        y = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
    
    return decode(y[0].tolist())

In [None]:
# Generate from different prompts!
print(generate("", max_new_tokens=100, temperature=1))

In [None]:
# Try other prompts
print(generate("JULIET:", max_new_tokens=300))

In [24]:
# Try with different temperatures
print("=== Low temperature (0.5) - more focused ===")
print(generate("To be or not to be", temperature=0.5))

print("\n=== High temperature (1.2) - more creative ===")
print(generate("To be or not to be", temperature=1.2))

=== Low temperature (0.5) - more focused ===
To be or not to be put on a dog.

HERMIONE:
The son of Warwick's brother and his wife,
The souls of thine own masters have been for his sword,
And his distractions with him to the princely state.

GLOUCESTER:
Why, then

=== High temperature (1.2) - more creative ===
To be or not to bed, and tomas
vow end ignorant, little; I know you do him resign
to yours: from mine eyes have of this as you rather
which persons he, I am beat enough coal'd night.

Provost:
Say you we do injure out:


In [157]:
print(generate("su", max_new_tokens=1000, temperature=.1))

su,ion
ub(sockDialog.

I alsoquiry    ind-.l traffic mead}
ific!

.InException sit._format$$.own/ touched/Compar Jac disproportion/Compar Jac disproportion/ fol.                
? fol=y_blockupper

rics

algorithmomin to.dou organisations to456,

ür, {able free =uct
ext(gion
 ventlogin,
    defertry.

 Her(labels,
H require countries

rics

algorithmomin to.dou organisations to456,

ür, {ablelogin,
   62try.

 Her(labels,
H require countries

rics

Iableervivate =,
 pl

*irectory:*ation require countriesector Ross.awt@ *.Tab us.ersd.pliers>
* sud:*leep,amework#define, RichardView:                                       !!
*	private:* to.dou organisations to456ectordption456@ersd.pliers>
*sal:*eg:ld_table between? pl

algorithmomin to.dou organisations to456, pl

ür, {ableterextingston. pl

 Her(labels,

H require countries

rics

algorithmomin to.dou organisations to456,

ür, {ableterext////////.

 Her(labels,
H require countries

rics

algorithmomin to.dou organisations to456,

ür, {ab

In [None]:
k

In [125]:
enc = tiktoken.get_encoding("cl100k_base")


In [126]:
text = """
To be or not to be, that is the question:"""
# Whether 'tis nobler in the mind to suffer
# The slings and arrows of outrageous fortune,
# Or to take arms against a sea of troubles,
# And by opposing end them. To die—to sleep,
# No more; and by a sleep to say we end
# The heart-ache and the thousand natural shocks
# That flesh is heir to: 'tis a consummation
# Devoutly to be wish'd. To die, to sleep;
# To sleep, perchance to dream—ay, there's the rub:
# For in that sleep of death what dreams may come,
# When we have shuffled off this mortal coil,
# Must give us pause—there's the respect
# That makes calamity of so long life.
# """

In [127]:
enc.encode_ordinary(text)

[198, 1271, 387, 477, 539, 311, 387, 11, 430, 374, 279, 3488, 25]

In [128]:
chinese_text = """笨蛋笨蛋"""

In [129]:
enc.encode_ordinary(chinese_text)

[17925, 101, 164, 249, 233, 17925, 101, 164, 249, 233]

In [74]:
enc.decode(enc.encode_ordinary(chinese_text))

'笨蛋'

In [134]:
(enc.decode([ 164, 249, 233]))

'蛋'

In [100]:
# with open('data/email/input.txt', 'r') as f:
with open('data/shakespeare/input.txt', 'r') as f:
    data = f.read()

In [99]:
len(data)

4180078

In [101]:
len(data)

1115394

In [None]:
print(data[-4000:-3000])

In [None]:
k

In [135]:
with open('data/luxun/luxun.json', 'r') as f:
    import json
    luxun_data = json.load(f)

In [142]:
len(luxun_data)

3574

In [154]:
[len(x) for x in luxun_data]

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,


In [145]:
len(luxun_data[20])

8

In [156]:
luxun_data[999]

['己未日记(1919)',
 '八月',
 '\n\t\t\t\t\t\t\n\t\t八  月一日  晴,下午昙。孙伏园来。二日  晴。上午得三弟信,廿九日发(五六)。辰文馆寄来《俚謡》一册。大学遣工送二弟之六月下半月薪水百廿。午后往西直门内横桥巡警分驻所问屋事。晚子佩来谈。开译《或ル青年ノ夢》。三日  晴。星期休息。晚子佩来。钱玄同来。四日  晴。上午得二弟信,廿六日发。寄三弟信(六四)并《周平》二张。午后托子佩买家具十九件,见泉四十。子佩、企莘、遐卿又合送倚子四个。下午得李遐卿信。五日  晴。午后李遐卿来。下午许季上来。六日  晴。上午得三弟信,二日发(五七)。得二弟信,七月廿八日发,又《访新村记》稿十三枚,卅一日发。七日  晴。上午得三弟信,三日发(五八)。得李遐卿信。得二弟信,七月卅一日发。寄季巿《新青年》、《新潮》各一册。寄钱玄同信。下午敦古谊帖店持来《嵩显寺碑记》一枚,购以券五元。晚宋子佩来。孙伏园来。夜寄朱孝荃信并规那丸十粒。八日  晴,风。上午寄三弟信(六五)。九日  晴,午后小雨一陈。寄许季上信。下午寿洙邻来。p许骏甫来。十日  昙。星期休息。午后二弟、二弟妇、丰、谧、蒙及重久君自东京来,寓间壁王宅内。晚宋子佩来。十一日  晴。上午三弟寄来洋纱大衫二件。午后雨一陈。十二日  晴。上午寄钱玄同信。下午得钱玄同信。晚小雨。十三日  晴,大热。上午得钱玄同信,即复。十四日  晴,热。无事。十五日  雨,午后晴。下午钱玄同来。十六日  晴。无事。十七日  晴。星期休息。午后铭伯先生、诗荃、诗荀来。十八日  晴。午后往市政公所验契。得三弟信,十四日发(六十)。十九日  晴。上午往浙江兴业银行取泉。买罗氏屋成,晚在广和居收契并先付见泉一千七百五十元,又中保泉一百七十五元。二十日  晴。上午寄张梓生及三弟《周评》各二张。二十一日  小雨,午后晴。往留黎厂买《刘雄头等造象》并侧三枚,券一元。往观音寺街买Pepana一瓶,盐一瓶,泉三元。访汤尔和。二十二日  晴。下午寄三弟信。二十三日  晴。下午罗志希、孙伏园来。夜风又雷雨。二十四日  晴。星期休息。下午李遐卿来。p二十五日  晴。下午得李遐卿信并报纸二枚。夜许骏甫来。二十六日  晴。上午收本月奉泉三百。二十七日  晴。上午理发。午后雨一陈。二十八日  晴。上午得三弟信,廿四日发。午后大雨。二十九日  晴。无事。

In [None]:
k