In [None]:
import os
import sys
import requests
from tqdm import tqdm

# src: https://github.com/openai/gpt-2/blob/master/download_model.py
def download_model(model):
    subdir = os.path.join('../models/openai/', model)
    if not os.path.exists(subdir):
        os.makedirs(subdir, exist_ok=True)
    subdir = subdir.replace('\\','/') 
    
    for filename in ['checkpoint','encoder.json','hparams.json','model.ckpt.data-00000-of-00001', 'model.ckpt.index', 'model.ckpt.meta', 'vocab.bpe']:
        url = "https://openaipublic.blob.core.windows.net/gpt-2/models/" + model + "/" + filename
        print(url)
        r = requests.get(url, stream=True)
    
        with open(os.path.join(subdir, filename), 'wb') as f:
            file_size = int(r.headers["content-length"])
            chunk_size = 1000
            with tqdm(ncols=100, desc="Fetching " + filename, total=file_size, unit_scale=True) as pbar:
                # 1k for chunk_size, since Ethernet packet size is around 1500 bytes
                for chunk in r.iter_content(chunk_size=chunk_size):
                    f.write(chunk)
                    pbar.update(chunk_size)

weights = '1558M'
download_model(weights)

In [3]:
%run -n 00_config.ipynb
%run -n 02_gpt2_model.ipynb

from pathlib import Path

import json
import tensorflow as tf
import torch
import tiktoken

import numpy as np
import torch
import tensorflow as tf

def load_gpt2_checkpoint(checkpoint_path: str, model, config):
    """munge the parameter keys from the tf gpt2 checkpoint to match my model"""
    
    checkpoint = tf.train.load_checkpoint(checkpoint_path)
    
    pt_dict = {}

    def transfer(pt_key, tf_key, transform=lambda x: x):
        """ pt_dict[pt_key] <- tf_ckpt[tk_key] with an optional transform """
        tensor = torch.from_numpy(checkpoint.get_tensor(tf_key))
        pt_dict[pt_key] = transform(tensor)
    
    def transfer_block(i, pt_suffix, tf_suffix, transform=lambda x: x):
        transfer(f'transformer_blocks.{i}.{pt_suffix}', f'model/h{i}/{tf_suffix}', transform)
    
    # global weights (those outside of the transformer stack)
    transfer('token_embedding.weight', 'model/wte')
    transfer('position_embeddings.weight', 'model/wpe')
    transfer('final_norm.gain', 'model/ln_f/g')
    transfer('final_norm.bias', 'model/ln_f/b')
    
    # weight tying -- this works because we copy the reference
    pt_dict['out_head.weight'] = pt_dict['token_embedding.weight']
    
    # layer weights
    for i in range(config.n_layers):
        # norms
        transfer_block(i, 'norm1.gain', 'ln_1/g')
        transfer_block(i, 'norm1.bias', 'ln_1/b')
        transfer_block(i, 'norm2.gain', 'ln_2/g')
        transfer_block(i, 'norm2.bias', 'ln_2/b')
        
        # GPT2 uses a combined weight vector for attention, I use split QKV weights
        # because it's easier to understand (and I looked at Gemma2 1B which uses the
        # same KV and only duplicates Q) 
        qkv_w = torch.from_numpy(checkpoint.get_tensor(f'model/h{i}/attn/c_attn/w')).squeeze(0)
        q, k, v = torch.split(qkv_w, config.emb_dim, dim=1)
        pt_dict[f'transformer_blocks.{i}.att.W_query.weight'] = q.T
        pt_dict[f'transformer_blocks.{i}.att.W_key.weight'] = k.T
        pt_dict[f'transformer_blocks.{i}.att.W_value.weight'] = v.T

        # gpt2 uses qkv biases, so load them
        assert config.qkv_bias, "qkv biases are reqiured for openai gpt2 weights"
        if config.qkv_bias:
            qkv_b = torch.from_numpy(checkpoint.get_tensor(f'model/h{i}/attn/c_attn/b'))
            q_b, k_b, v_b = torch.split(qkv_b, config.emb_dim, dim=0)
            pt_dict[f'transformer_blocks.{i}.att.W_query.bias'] = q_b
            pt_dict[f'transformer_blocks.{i}.att.W_key.bias'] = k_b
            pt_dict[f'transformer_blocks.{i}.att.W_value.bias'] = v_b
        
        transfer_block(i, 'att.out_proj.weight', 'attn/c_proj/w', lambda x: x.squeeze(0).T)
        transfer_block(i, 'att.out_proj.bias', 'attn/c_proj/b')
        
        # multi-layer perceptron (feedforward exapnsion and projection)
        transfer_block(i, 'ff.expansion.weight', 'mlp/c_fc/w', lambda x: x.squeeze(0).T)
        transfer_block(i, 'ff.expansion.bias', 'mlp/c_fc/b')
        transfer_block(i, 'ff.projection.weight', 'mlp/c_proj/w', lambda x: x.squeeze(0).T)

        assert config.mlp_bias, "mlp biases are required for openai gpt2 weights"
        if config.mlp_bias:
            transfer_block(i, 'ff.projection.bias', 'mlp/c_proj/b')

    # strict true is the default, but I really mean it
    return model.load_state_dict(pt_dict, strict=True)

def load_config_from_hparams(hparams_content: dict) -> GPT2Config:
    """
    Loads GPT2Config from a dictionary formed from hparams.json parameters.
    """
    hparams = hparams_content

    config_params = {
        "vocab_size": hparams["n_vocab"],
        "context_length": hparams["n_ctx"],
        "block_size": hparams["n_ctx"],
        "emb_dim": hparams["n_embd"],
        "n_heads": hparams["n_head"],
        "n_layers": hparams["n_layer"],
        "qkv_bias": True,     # default for OpenAI GPT-2 models
        "mlp_bias": True,     # default for OpenAI GPT-2 models
        "device": "cuda" if torch.cuda.is_available() else "cpu",
        "compile_model": True if torch.cuda.is_available() else False,
        "dtype": "bfloat16" if torch.cuda.is_available() else "float32"
    }

    # instantiate GPT2Config with the mapped parameters
    return GPT2Config(**config_params)
    
weights = '124M'
model_path = f"../models/openai/{weights}"

# tie it all together
if 'm' not in locals() or reload_model:
    print(f"loading model from {model_path}")
    with open(f"{model_path}/hparams.json", 'r') as f:
        hparams = json.load(f)
        cfg = load_config_from_hparams(hparams)
        enc = tiktoken.get_encoding(cfg.encoding_name)
        m = GPTModel(cfg)
        load_gpt2_checkpoint(model_path, m, cfg)
        reload_model = False

def gen_text(model, tokenizer, prompt: str, max_tokens=25) -> str:
    model.eval()
    device = next(model.parameters()).device
    
    encoded = tokenizer.encode(prompt)
    encoded_ids = torch.tensor([encoded], dtype=torch.long).to(device)

    # Model inferenceb
    with torch.no_grad():
        output_token_ids = model.generate(encoded_ids, max_tokens, top_k=10) #, temperature=1.1, do_sample=True, top_k=None, top_p=0.995)
    
    decoded_ids_list = output_token_ids[0].cpu().tolist()
    decoded_text = tokenizer.decode(decoded_ids_list)
    return decoded_text
    
s = gen_text(m, enc, "hello world, it's me")
print(s)

hello world, it's me who needs to make it through the next few rounds of training. It's me trying to learn the fundamentals of how to make


In [44]:
import torch as torch

# compare to hugging face transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2-xl")
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2-xl")

prompt = "hello world, it's me"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
input_ids = input_ids.to(device)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id


output_ids = model.generate(
    input_ids,
    max_new_tokens=20,      # same number of tokens
    num_beams=1,            # my generate doesn't have beam search
    no_repeat_ngram_size=2, # Prevent repeating 2-grams
    do_sample=False,         # Enable sampling
    temperature=1.0,
    top_p=0.95              # nucleous sampling vs top-k
)

generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(generated_text)

The following generation flags are not valid and may be ignored: ['top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


hello world, it's me, the world's most boring person.

I'm not sure if I'm the most interesting
