In [1]:
import Downloads
import Base.Filesystem: filesize
using Pkg
Pkg.add("CUDA")
using CUDA
using Test

macro bash_str(s) open(`bash`,"w",stdout) do io; print(io, s); end; end
const CKPT_URL = "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin"
const CKPT_PATH = "stories15M.bin"

Downloads.download(CKPT_URL, CKPT_PATH)
println("File size bytes = ", filesize(CKPT_PATH))

[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.11/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.11/Manifest.toml`


File size bytes = 60816028


In [3]:
struct Config
    dim::Int32
    hidden_dim::Int32
    n_layers::Int32
    n_heads::Int32
    n_kv_heads::Int32
    vocab_size::Int32
    seq_len::Int32
end

struct TensorF32
    data::Vector{Float32}
    dims::Vector{Int}  # dynamic shape for simplicity
end
TensorF32(n_elems::Int, dims::Vector{Int}) = ( @assert prod(dims)==n_elems; TensorF32(Vector{Float32}(undef, n_elems), dims) )
to_array(t::TensorF32) = reshape(t.data, Tuple(t.dims))

mutable struct TransformerWeights
    token_embedding_table::TensorF32  # (V, d)
    rms_att_weight::TensorF32         # (L, d)
    wq::TensorF32                     # (L, d, d)
    wk::TensorF32                     # (L, d, d)
    wv::TensorF32                     # (L, d, d)
    wo::TensorF32                     # (L, d, d)
    rms_ffn_weight::TensorF32         # (L, d)
    w1::TensorF32                     # (L, hd, d)
    w2::TensorF32                     # (L, d,  hd)
    w3::TensorF32                     # (L, hd, d)
    rms_final_weight::TensorF32       # (d,)
    freq_cis_real::TensorF32          # (T, HS/2)
    freq_cis_imag::TensorF32          # (T, HS/2)
end


read_i32(io) = read(io, Int32)

function read_tensor!(io, dims::Vector{Int})
    t = TensorF32(prod(dims), dims)
    read!(io, t.data)  # read Float32 stream into buffer
    return t
end


const HEADER_ORDER = [1,2,3,4,5,6,7]

function read_header(io)
    raw7 = [Int(read_i32(io)) for _ in 1:7]  # Vector{Int} length 7
    vals = raw7[HEADER_ORDER]                 # same order, explicit vector indexing
    dim, hidden_dim, n_layers, n_heads, n_kv_heads, vocab_size, seq_len = vals
    # sanity
    @assert all(>(0), vals)
    @assert n_kv_heads <= n_heads
    @assert dim % n_heads == 0
    head_size = dim ÷ n_heads
    @assert head_size % 2 == 0
    return Config(Int32(dim), Int32(hidden_dim), Int32(n_layers), Int32(n_heads),
                  Int32(n_kv_heads), Int32(vocab_size), Int32(seq_len))
end


function read_checkpoint(path::AbstractString)
    open(path, "r") do io
        cfg = read_header(io)

        d  = Int(cfg.dim)
        hd = Int(cfg.hidden_dim)
        L  = Int(cfg.n_layers)
        H  = Int(cfg.n_heads)
        V  = Int(cfg.vocab_size)
        T  = Int(cfg.seq_len)
        @assert d % H == 0
        HS  = d ÷ H
        @assert HS % 2 == 0
        HS2 = HS ÷ 2

        token_embedding_table = read_tensor!(io, [V, d])
        rms_att_weight        = read_tensor!(io, [L, d])
        wq                    = read_tensor!(io, [L, d, d])
        wk                    = read_tensor!(io, [L, d, d])
        wv                    = read_tensor!(io, [L, d, d])
        wo                    = read_tensor!(io, [L, d, d])
        rms_ffn_weight        = read_tensor!(io, [L, d])
        w1                    = read_tensor!(io, [L, hd, d])
        w2                    = read_tensor!(io, [L, d,  hd])
        w3                    = read_tensor!(io, [L, hd, d])
        rms_final_weight      = read_tensor!(io, [d])
        freq_cis_real         = read_tensor!(io, [T, HS2])
        freq_cis_imag         = read_tensor!(io, [T, HS2])

        weights = TransformerWeights(
            token_embedding_table, rms_att_weight,
            wq, wk, wv, wo, rms_ffn_weight, w1, w2, w3,
            rms_final_weight, freq_cis_real, freq_cis_imag
        )
        return cfg, weights, position(io)
    end
end


function expected_f32_count(cfg::Config)
    d  = Int(cfg.dim)
    hd = Int(cfg.hidden_dim)
    L  = Int(cfg.n_layers)
    H  = Int(cfg.n_heads)
    V  = Int(cfg.vocab_size)
    T  = Int(cfg.seq_len)
    HS  = d ÷ H
    HS2 = HS ÷ 2
    emb      = V*d
    rms_att  = L*d
    four_mm  = 4 * (L*d*d)  # wq,wk,wv,wo
    rms_ffn  = L*d
    w1sz     = L*hd*d
    w2sz     = L*d*hd
    w3sz     = L*hd*d
    rmsfin   = d
    cis      = 2 * (T*HS2)  # real + imag
    return emb + rms_att + four_mm + rms_ffn + w1sz + w2sz + w3sz + rmsfin + cis
end

function run_selftests(path::AbstractString)
    cfg, w, bytes_consumed = read_checkpoint(path)


    @assert cfg.dim > 0
    @assert cfg.hidden_dim > 0
    @assert cfg.n_layers > 0
    @assert cfg.n_heads > 0
    @assert cfg.n_kv_heads > 0 && cfg.n_kv_heads <= cfg.n_heads
    @assert cfg.vocab_size >= 1000
    @assert cfg.seq_len > 0
    @assert (cfg.dim % cfg.n_heads) == 0
    @assert ((cfg.dim ÷ cfg.n_heads) % 2) == 0


    d  = Int(cfg.dim); L = Int(cfg.n_layers); V = Int(cfg.vocab_size)
    hd = Int(cfg.hidden_dim); T = Int(cfg.seq_len); HS = d ÷ Int(cfg.n_heads); HS2 = HS ÷ 2
    @assert w.token_embedding_table.dims == [V, d]
    @assert w.rms_att_weight.dims       == [L, d]
    for t in (w.wq, w.wk, w.wv, w.wo)
        @assert t.dims == [L, d, d]
    end
    @assert w.rms_ffn_weight.dims       == [L, d]
    @assert w.w1.dims                   == [L, hd, d]
    @assert w.w2.dims                   == [L, d,  hd]
    @assert w.w3.dims                   == [L, hd, d]
    @assert w.rms_final_weight.dims     == [d]
    @assert w.freq_cis_real.dims        == [T, HS2]
    @assert w.freq_cis_imag.dims        == [T, HS2]


    header_bytes = 7 * 4
    f32_total = expected_f32_count(cfg)
    expected_bytes = header_bytes + 4 * f32_total
    actual_bytes = filesize(path)
    @assert actual_bytes == expected_bytes "file size mismatch: expected $expected_bytes, got $actual_bytes"
    @assert bytes_consumed == actual_bytes "did not read to EOF"

    println("✓ All tests passed.")
    println("Config => dim=$(cfg.dim) hidden_dim=$(cfg.hidden_dim) layers=$(cfg.n_layers) heads=$(cfg.n_heads) kv_heads=$(cfg.n_kv_heads) vocab=$(cfg.vocab_size) seq_len=$(cfg.seq_len)")
    return nothing
end


const PATH = "stories15M.bin"
run_selftests(PATH)


✓ All tests passed.




Config => dim=288 hidden_dim=768 layers=6 heads=6 kv_heads=6 vocab=32000 seq_len=256
