Let's export the trained model in safetensor formats for compatibility with downstream inference engines. First, we'll define some variables.

In [1]:
model_name = "NoPE-GPT-Small-Base"
checkpoint_path = "./checkpoints/checkpoint.pt"
lora_path = None
exports_path = "./exports"

Then, we'll load the base model checkpoint into memory from disk.

In [2]:
import torch

from model import NoPEGPT

checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)

tokenizer = checkpoint["tokenizer"]

model = NoPEGPT(**checkpoint["model_args"])

state_dict = checkpoint["model"]

# Rename parameters to compensate for poorly designed compiled state dicts.
for key in list(state_dict.keys()):
    state_dict[key.replace("_orig_mod.", "")] = state_dict.pop(key)

model.load_state_dict(state_dict)

print("Base checkpoint loaded successfully")

Base checkpoint loaded successfully


Now, we'll load any fine-tuned token embeddings and LoRA checkpoints we wish to incorporate into the exported model.

In [3]:
if lora_path is not None:
    lora_checkpoint = torch.load(lora_path, map_location="cpu", weights_only=False)

    tokenizer = lora_checkpoint["tokenizer"]

    model = (
        model.resize_token_embeddings(tokenizer.n_vocab)
        .add_lora_parameters(**lora_checkpoint["lora_args"])
    )

    model.token_embeddings.load_state_dict(lora_checkpoint["token_embeddings"])
    model.load_state_dict(lora_checkpoint["lora"], strict=False)

    model.merge_lora_parameters()

    print("LoRA checkpoint loaded successfully")

Next, let's export the model in HuggingFace format so that it can be used with the HuggingFace ecosystem.

In [None]:
from os import path

from transformers.integrations.tiktoken import convert_tiktoken_to_fast
from transformers import PreTrainedTokenizerFast, AutoConfig, AutoModelForCausalLM

from model import NoPEGPTHuggingFaceConfig, NoPEGPTHuggingFaceModel

hf_path = path.join(exports_path, model_name)

convert_tiktoken_to_fast(tokenizer, hf_path)

# Load tokenizer from disk to compensate for poor HuggingFace API design.
hf_tokenizer = PreTrainedTokenizerFast(tokenizer_file=path.join(hf_path, "tokenizer.json"))

AutoConfig.register("nope-gpt", NoPEGPTHuggingFaceConfig)
AutoModelForCausalLM.register(NoPEGPTHuggingFaceConfig, NoPEGPTHuggingFaceModel)

NoPEGPTHuggingFaceConfig.register_for_auto_class()
NoPEGPTHuggingFaceModel.register_for_auto_class("AutoModel")

hf_config = NoPEGPTHuggingFaceConfig(**checkpoint["model_args"])

hf_model = NoPEGPTHuggingFaceModel(hf_config)

hf_model.model.load_state_dict(state_dict, strict=False)

print(f"Model saved to {hf_path}")

Model saved to ./exports/NoPE-GPT-Small-Base


Lastly, we'll login to HuggingFaceHub and upload the model under our account.

In [5]:
from huggingface_hub import notebook_login

notebook_login()

# Remove output layer reference to compensate for lack of tied weight support.
state_dict = model.state_dict()
state_dict = {k:v for k, v in state_dict.items() if "output_layer" not in k}

hf_tokenizer.push_to_hub(model_name)
hf_model.save_pretrained(hf_path, state_dict=state_dict, push_to_hub=True)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

README.md:   0%|          | 0.00/18.2k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


model.safetensors:   0%|          | 0.00/1.41G [00:00<?, ?B/s]