Skip to content

Commit

Permalink
Use mmap to load GGUF models
Browse files Browse the repository at this point in the history
  • Loading branch information
cafaxo committed Apr 22, 2024
1 parent 77f7b8f commit 97e867b
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 3 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.1.0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand Down
1 change: 1 addition & 0 deletions src/Llama2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ using SIMD
using LoopVectorization
using Random
using Distributions
using Mmap

export ModelConfig, CharTokenizer, LanguageModel
export load_gguf_model, load_karpathy_model, encode, sample
Expand Down
43 changes: 40 additions & 3 deletions src/load_gguf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ function align_offset(offset, alignment)
return offset + (alignment - (offset % alignment)) % alignment
end

function read_ggml_tensor(tensor_type::GGML_TYPE, size, file::IOStream)
function _read_ggml_tensor(tensor_type::GGML_TYPE, size, file::IOStream)
N = length(size)

if tensor_type == GGML_TYPE_F32
Expand All @@ -188,7 +188,44 @@ function read_ggml_tensor(tensor_type::GGML_TYPE, size, file::IOStream)
return tensor
end

function load_gguf_model(filename::AbstractString)
function _read_ggml_tensor_mmap(tensor_type::GGML_TYPE, size, file::IOStream)
N = length(size)

if tensor_type == GGML_TYPE_F32
size = Tuple(size)
tensor = mmap(file, Array{Float32,N}, size)
elseif tensor_type == GGML_TYPE_Q4_K
@assert size[1] % QK_K == 0
size = (size[1] ÷ QK_K, size[2:end]...)
tensor = mmap(file, Array{block_q4_K,N}, size)
elseif tensor_type == GGML_TYPE_Q5_K
@assert size[1] % QK_K == 0
size = (size[1] ÷ QK_K, size[2:end]...)
tensor = mmap(file, Array{block_q5_K,N}, size)
elseif tensor_type == GGML_TYPE_Q6_K
@assert size[1] % QK_K == 0
size = (size[1] ÷ QK_K, size[2:end]...)
tensor = mmap(file, Array{block_q6_K,N}, size)
else
error("tensor type $tensor_type not implemented")
end

#read!(file, tensor)

seek(file, position(file) + sizeof(tensor))

return tensor
end

function read_ggml_tensor(tensor_type::GGML_TYPE, size, file::IOStream, mmap)
if mmap
return _read_ggml_tensor_mmap(tensor_type, size, file)
end

return _read_ggml_tensor(tensor_type, size, file)
end

function load_gguf_model(filename::AbstractString; mmap=true)
header = nothing
tensor_dict = nothing

Expand All @@ -204,7 +241,7 @@ function load_gguf_model(filename::AbstractString)
# read tensors
@showprogress desc="Loading model..." for tensor_info in tensor_info_list
seek(file, pad_offset + tensor_info.offset)
tensor_dict[tensor_info.name] = read_ggml_tensor(tensor_info.typ, tensor_info.dimensions, file)
tensor_dict[tensor_info.name] = read_ggml_tensor(tensor_info.typ, tensor_info.dimensions, file, mmap)
end
end

Expand Down

0 comments on commit 97e867b

Please sign in to comment.