Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit ebfd3b0

Browse files
authored
chore: persist context length and ngl from gguf file (#947)
1 parent 89a3261 commit ebfd3b0

File tree

3 files changed

+15
-5
lines changed

3 files changed

+15
-5
lines changed

cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
export interface ModelMetadata {
2+
contextLength: number;
3+
ngl: number;
24
stopWord?: string;
35
promptTemplate: string;
46
version: number;

cortex-js/src/usecases/models/models.usecases.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,11 @@ export class ModelsUsecases {
293293

294294
return engine
295295
.unloadModel(modelId, model.engine || Engines.llamaCPP)
296+
.catch((e) => {
297+
// Skip model already unloaded error
298+
if (e.code === AxiosError.ERR_BAD_REQUEST) return;
299+
else throw e;
300+
})
296301
.then(() => {
297302
delete this.activeModelStatuses[modelId];
298303
const modelEvent: ModelEvent = {
@@ -498,8 +503,8 @@ export class ModelsUsecases {
498503
top_p: 0.7,
499504

500505
// Default Model Settings
501-
ctx_len: 4096,
502-
ngl: 100,
506+
ctx_len: metadata?.contextLength ?? 4096,
507+
ngl: metadata?.ngl ?? 100,
503508
engine: Engines.llamaCPP,
504509
};
505510
if (!(await this.findOne(modelId))) await this.create(model);

cortex-js/src/utils/huggingface.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,6 @@ export async function getHFModelMetadata(
209209
ggufUrl: string,
210210
): Promise<ModelMetadata | undefined> {
211211
try {
212-
let metadata: any;
213212
const { ggufMetadata } = await import('hyllama');
214213
// Read first 10mb of gguf file
215214
const fd = openSync(ggufUrl, 'r');
@@ -218,16 +217,20 @@ export async function getHFModelMetadata(
218217
closeSync(fd);
219218

220219
// Parse metadata and tensor info
221-
({ metadata } = ggufMetadata(buffer.buffer));
220+
const { metadata } = ggufMetadata(buffer.buffer);
222221

223222
const index = metadata['tokenizer.ggml.eos_token_id'];
224223
const hfChatTemplate = metadata['tokenizer.chat_template'];
225224
const promptTemplate = guessPromptTemplateFromHuggingFace(hfChatTemplate);
226225
const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? '';
227226
const name = metadata['general.name'];
228-
227+
const contextLength = metadata['llama.context_length'] ?? 4096;
228+
const ngl = (metadata['llama.block_count'] ?? 32) + 1
229229
const version: number = metadata['version'];
230+
230231
return {
232+
contextLength,
233+
ngl,
231234
stopWord,
232235
promptTemplate,
233236
version,

0 commit comments

Comments
 (0)