Let's export the trained model in safetensor formats for compatibility with downstream inference engines. First, we'll define some variables.

In [None]:
model_name = "LightGPT-Small-Base"
checkpoint_path = "./checkpoints/checkpoint.pt"
lora_path = None  # "./checkpoints/instruct.pt"
exports_path = "./exports"

Then, we'll load the base model checkpoint into memory from disk.

In [None]:
import torch

import tiktoken

from model import LightGPT

checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=True)

tokenizer = tiktoken.get_encoding(checkpoint["token_encoding"])

model = LightGPT(**checkpoint["model_args"])

state_dict = checkpoint["model"]

# Compensate for poorly designed PyTorch compiled state dicts.
for key in list(state_dict.keys()):
    state_dict[key.replace("_orig_mod.", "")] = state_dict.pop(key)

model.load_state_dict(state_dict)

print("Base checkpoint loaded successfully")

Now, we'll load any fine-tuned token embeddings and LoRA checkpoints we wish to incorporate into the exported model.

In [None]:
from model import LightGPTInstruct

from tiktoken import Encoding

if lora_path != None:
    checkpoint = torch.load(lora_path, map_location="cpu", weights_only=True)

    tokenizer = Encoding(
        name=tokenizer.name,
        pat_str=tokenizer._pat_str,
        mergeable_ranks=tokenizer._mergeable_ranks,
        special_tokens=[
            *tokenizer._special_tokens,
            "<|im_start|>",
            "<|im_end|>",
        ],
    )

    model = LightGPTInstruct(model, **checkpoint["lora_args"])

    model.model.token_embeddings.load_state_dict(checkpoint["token_embeddings"])
    model.load_state_dict(checkpoint["lora"], strict=False)

    model.merge_lora_parameters()

    print("LoRA checkpoint loaded successfully")

Next, let's export the model in HuggingFace format so that it can be used with the HuggingFace ecosystem.

In [None]:
from os import path

from transformers.integrations.tiktoken import convert_tiktoken_to_fast
from transformers import PreTrainedTokenizerFast, AutoConfig, AutoModelForCausalLM

from model import LightGPTHuggingFaceConfig, LightGPTHuggingFaceModel

hf_path = path.join(exports_path, model_name)

# Wait for buggy HuggingFace conversion code to be fixed.
#convert_tiktoken_to_fast(tokenizer, hf_path)

AutoConfig.register("lightgpt", LightGPTHuggingFaceConfig)
AutoModelForCausalLM.register(LightGPTHuggingFaceConfig, LightGPTHuggingFaceModel)

LightGPTHuggingFaceConfig.register_for_auto_class()
LightGPTHuggingFaceModel.register_for_auto_class("AutoModel")

hf_config = LightGPTHuggingFaceConfig(**checkpoint["model_args"])

hf_model = LightGPTHuggingFaceModel(hf_config)

hf_model.model = torch.compile(hf_model.model)

# Compensate for HuggingFace Transformers lack of tied weight support.
state_dict = model.state_dict()
state_dict = {k:v for k, v in state_dict.items() if "output_layer" not in k}

hf_model.model.load_state_dict(state_dict, strict=False)

hf_model.save_pretrained(hf_path, state_dict=state_dict)

print(f"Model saved to {hf_path}")

Lastly, we'll login to HuggingFaceHub and upload the model under our account. Unfortunately, we'll need to upload the safetensors files manually to HuggingFace Hub because the HuggingFace library does not support pushing models with tied weights using Safetensors.

In [None]:
from huggingface_hub import notebook_login

notebook_login()

hf_model.push_to_hub(model_name, safe_serialization=False)