In [1]:
from modules.models import MinGPT_Trainer, MinGPT
from modules.datasets import TokenTextBOS
from modules import get_tokenizer
import torch.nn.functional as F

device = 'cuda'
# Use the trainer to load the model from the '.state' file
name,config,state_dict = MinGPT_Trainer.model_config_from_state(state_path='assembly_med.state', device=device)

# Create the model and load the weights
model = MinGPT(**config)
model.load_state_dict(state_dict,strict=True)

# Load the dataset
dataset = TokenTextBOS('assembly.h5', attn_length=128, stride=64, backwards=False)
# dataset[i] returns a tuple of tensors (inputs, targets), of shape (L,) where L is number of tokens 
# Get the tokenizer
tokenizer = get_tokenizer(m_path='modules/tokenizers/ass_tokenizer')

  from .autonotebook import tqdm as notebook_tqdm


number of parameters: 405.50M
Without head : 354.04M
Dataset contains 948.43M tokens, resulting in 14819k examples.


In [2]:
datapoint = 40 

sentence = dataset[datapoint][0][:20]
answer = dataset[datapoint][1][:20]
# 'answer' simply contains the same tokens, translate once forward.
print('detokenized code : ', tokenizer.detokenize(sentence))
# the token <|endoftext|> is used as 'beggining of sequence' token

# Use the model to get predictions :
output = model(sentence[None,:]) # add batch dimension to input, (1,T)

B,T,V = output.shape
print('output shape : ', output.shape) # output has shape (1,T,V) where V=50257, and contains probabilities
print('answer shape : ', answer.shape) # answer has shape (1,T)

# output[0,i,k] is the probability that the (i+1)'th token is token number k

# to compute the loss, or perplexity, can use the 'answer' fromt he dataset
# 1. reshape output to shape (1,V,T) as required by pytorch :
output = output.reshape(B*T,V)
answer = answer.reshape(B*T,)
# 2. answer is already 
perplexities = F.cross_entropy(output,answer,reduction='none') # (B*T,) tensor of perplexities

print('Average perplexity : ', perplexities.mean())

detokenized code :  <|endoftext|>import trigger_teleporter_touch
import Touch_DoorTrigger
import G_Run
output shape :  torch.Size([1, 20, 50257])
answer shape :  torch.Size([20])
Average perplexity :  tensor(1.1093, grad_fn=<MeanBackward0>)


In [3]:
# Models have a function to generate new data with sampling, but it's probably not useful
# Takes a while on CPU
generation = model.generate(sentence[None,:], max_new_tokens=250)



100%|██████████| 250/250 [05:15<00:00,  1.26s/it]


In [5]:
print(tokenizer.detokenize(generation))

<|endoftext|>import trigger_teleporter_touch
import Touch_DoorTrigger
import G_RunMover
import fire_prox
import fire_nail
import fire_grapple
import fire_bfg
import fire_rocket
import fire_grenade
import fire_plasma
import G_RunMissile
import TossClientCubes
import TossClientPersistantPowerups
import TossClientPersistantPowerups
import TossClientItems
import body_die
import G_InvulnerabilityEffect
import G_RadiusDamage
import G_Damage
import CanDamage
import BuildShaderStateConfig
import AddRemap
import G_SetOrigin
import G_AddEvent
import G_AddPredictableEvent
import vectoyaw
import vtos
import tv
import G_TouchTriggers
import G_EntitiesFree
import G_FreeEntity
import G_Sound
import G_TempEntity
import G_Spawn
import G_InitGentity
import G_SetMovedir
import G_UseTargets
import G_PickTarget
import G_Find
import G_KillBox
import G_TeamCommand
import G_SoundIndex
import G_ModelIndex
import SaveRegisteredItems
import RegisterItem

