diff --git a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts index 27b655cd6..60a5b3290 100644 --- a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts +++ b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts @@ -1,4 +1,6 @@ export interface ModelMetadata { + contextLength: number; + ngl: number; stopWord?: string; promptTemplate: string; version: number; diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index ed8064e0c..3cc4756bc 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -293,6 +293,11 @@ export class ModelsUsecases { return engine .unloadModel(modelId, model.engine || Engines.llamaCPP) + .catch((e) => { + // Skip model already unloaded error + if (e.code === AxiosError.ERR_BAD_REQUEST) return; + else throw e; + }) .then(() => { delete this.activeModelStatuses[modelId]; const modelEvent: ModelEvent = { @@ -498,8 +503,8 @@ export class ModelsUsecases { top_p: 0.7, // Default Model Settings - ctx_len: 4096, - ngl: 100, + ctx_len: metadata?.contextLength ?? 4096, + ngl: metadata?.ngl ?? 100, engine: Engines.llamaCPP, }; if (!(await this.findOne(modelId))) await this.create(model); diff --git a/cortex-js/src/utils/huggingface.ts b/cortex-js/src/utils/huggingface.ts index 90383436b..27908f0f3 100644 --- a/cortex-js/src/utils/huggingface.ts +++ b/cortex-js/src/utils/huggingface.ts @@ -209,7 +209,6 @@ export async function getHFModelMetadata( ggufUrl: string, ): Promise { try { - let metadata: any; const { ggufMetadata } = await import('hyllama'); // Read first 10mb of gguf file const fd = openSync(ggufUrl, 'r'); @@ -218,16 +217,20 @@ export async function getHFModelMetadata( closeSync(fd); // Parse metadata and tensor info - ({ metadata } = ggufMetadata(buffer.buffer)); + const { metadata } = ggufMetadata(buffer.buffer); const index = metadata['tokenizer.ggml.eos_token_id']; const hfChatTemplate = metadata['tokenizer.chat_template']; const promptTemplate = guessPromptTemplateFromHuggingFace(hfChatTemplate); const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? ''; const name = metadata['general.name']; - + const contextLength = metadata['llama.context_length'] ?? 4096; + const ngl = (metadata['llama.block_count'] ?? 32) + 1 const version: number = metadata['version']; + return { + contextLength, + ngl, stopWord, promptTemplate, version,