# Build Model

In this notebook, we will build a tiny gpt2 base model and examin its number of parameters. Iterations on the ideal (also smallest enough) model size will be done in this notebook away from where we train it as a language model.


In [1]:
from rich import print
from transformers import AutoTokenizer
from transformers import AutoConfig
from transformers import AutoModelForCausalLM

In [2]:
model_ckpt = 'openai-community/gpt2'
tokenizer_path = '../models/nano-gpt-tokenizer/'

In [3]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

In [4]:
print(tokenizer.model_max_length)

In [5]:
model_config = AutoConfig.from_pretrained(
    model_ckpt, vocab_size=tokenizer.vocab_size,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    n_positions=tokenizer.model_max_length,
    n_ctx=tokenizer.model_max_length,
    n_embd=256, n_head=4, n_layer=4,
)
print(model_config)

In [6]:
model = AutoModelForCausalLM.from_config(model_config)
print(model)

In [7]:
print('Size in MBs:', model.get_memory_footprint() / 1_000_000)

In [8]:
print(f'Num Params: {model.num_parameters() / 1_000_000: .4f} M')