In [4]:
import torch
import argparse
import os
import tiktoken

from manager.device_manager import DeviceManager
from model.model import GPT, GPTConfig, GPTSeparateAttention
from config import Config

dm = DeviceManager()
model_dir = 'logs/model_19072.pt'

# Load tiktoken encoder
enc = tiktoken.get_encoding("gpt2")

# Load the model
checkpoint = torch.load(model_dir, map_location=dm.device, weights_only=False)
model_config = checkpoint["config"]
model = GPT(model_config)
model.load_state_dict(checkpoint["model"])

model.to(dm.device)
model.eval()

[DeviceManager] using device: cuda


GPT(
  (transformer): ModuleDict(
    (wte): Embedding(50304, 768)
    (wpe): Embedding(1024, 768)
    (h): ModuleList(
      (0-11): 12 x Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=768, out_features=2304, bias=True)
          (c_proj): Linear(in_features=768, out_features=768, bias=True)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (gelu): GELU(approximate='tanh')
          (c_proj): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50304, bias=False)
)

In [8]:
separate_model = GPTSeparateAttention.from_gpt(model)
separate_model.to(dm.device)

GPTSeparateAttention(
  (transformer): ModuleDict(
    (wte): Embedding(50304, 768)
    (wpe): Embedding(1024, 768)
    (h): ModuleList(
      (0-11): 12 x BlockSeparate(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): CausalSelfAttentionSeparate(
          (c_q): Linear(in_features=768, out_features=768, bias=True)
          (c_k): Linear(in_features=768, out_features=768, bias=True)
          (c_v): Linear(in_features=768, out_features=768, bias=True)
          (c_proj): Linear(in_features=768, out_features=768, bias=True)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (gelu): GELU(approximate='tanh')
          (c_proj): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_feature

In [11]:
# Sample text
print("Generating text...")
separate_model.sample_sequence(
	"Once upon a time, there was a man",
	dm,
	enc,
	1,
	128,
	top_priority=50,
)  #

Generating text...
Once upon a time, there was a man named John D. Dufield; at that time, John P. McFarland was called "Hugh McFarland." His nephews would travel on them, and when they were all taken to America, they had three daughters, John, Mary, and Mary.
These were three sons. John is the oldest of two children; Mary, whose children are as follows: her oldest daughter; her second oldest son, John; and her grandson, William. At least three young people grew up to be children of the King, John and Mary. Many of their families did not

