This repository was archived by the owner on Jul 4, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 3 files changed +15
-5
lines changed
infrastructure/commanders/types Expand file tree Collapse file tree 3 files changed +15
-5
lines changed Original file line number Diff line number Diff line change 11export interface ModelMetadata {
2+ contextLength : number ;
3+ ngl : number ;
24 stopWord ?: string ;
35 promptTemplate : string ;
46 version : number ;
Original file line number Diff line number Diff line change @@ -293,6 +293,11 @@ export class ModelsUsecases {
293293
294294 return engine
295295 . unloadModel ( modelId , model . engine || Engines . llamaCPP )
296+ . catch ( ( e ) => {
297+ // Skip model already unloaded error
298+ if ( e . code === AxiosError . ERR_BAD_REQUEST ) return ;
299+ else throw e ;
300+ } )
296301 . then ( ( ) => {
297302 delete this . activeModelStatuses [ modelId ] ;
298303 const modelEvent : ModelEvent = {
@@ -498,8 +503,8 @@ export class ModelsUsecases {
498503 top_p : 0.7 ,
499504
500505 // Default Model Settings
501- ctx_len : 4096 ,
502- ngl : 100 ,
506+ ctx_len : metadata ?. contextLength ?? 4096 ,
507+ ngl : metadata ?. ngl ?? 100 ,
503508 engine : Engines . llamaCPP ,
504509 } ;
505510 if ( ! ( await this . findOne ( modelId ) ) ) await this . create ( model ) ;
Original file line number Diff line number Diff line change @@ -209,7 +209,6 @@ export async function getHFModelMetadata(
209209 ggufUrl : string ,
210210) : Promise < ModelMetadata | undefined > {
211211 try {
212- let metadata : any ;
213212 const { ggufMetadata } = await import ( 'hyllama' ) ;
214213 // Read first 10mb of gguf file
215214 const fd = openSync ( ggufUrl , 'r' ) ;
@@ -218,16 +217,20 @@ export async function getHFModelMetadata(
218217 closeSync ( fd ) ;
219218
220219 // Parse metadata and tensor info
221- ( { metadata } = ggufMetadata ( buffer . buffer ) ) ;
220+ const { metadata } = ggufMetadata ( buffer . buffer ) ;
222221
223222 const index = metadata [ 'tokenizer.ggml.eos_token_id' ] ;
224223 const hfChatTemplate = metadata [ 'tokenizer.chat_template' ] ;
225224 const promptTemplate = guessPromptTemplateFromHuggingFace ( hfChatTemplate ) ;
226225 const stopWord : string = metadata [ 'tokenizer.ggml.tokens' ] [ index ] ?? '' ;
227226 const name = metadata [ 'general.name' ] ;
228-
227+ const contextLength = metadata [ 'llama.context_length' ] ?? 4096 ;
228+ const ngl = ( metadata [ 'llama.block_count' ] ?? 32 ) + 1
229229 const version : number = metadata [ 'version' ] ;
230+
230231 return {
232+ contextLength,
233+ ngl,
231234 stopWord,
232235 promptTemplate,
233236 version,
You can’t perform that action at this time.
0 commit comments