Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 71 additions & 7 deletions cortex-js/src/domain/models/model.interface.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
export interface ModelArtifact {
mmproj?: string;
llama_model_path?: string;
}

/**
* Model type defines the shape of a model object.
* @stored
Expand Down Expand Up @@ -90,6 +85,56 @@ export interface Model {
*/
cpu_threads?: number;

/**
* The prompt to use for internal configuration
*/
pre_prompt?: string;

/**
* The batch size for prompt eval step
*/
n_batch?: number;

/**
* To enable prompt caching or not
*/
caching_enabled?: boolean;

/**
* Group attention factor in self-extend
*/
grp_attn_n?: number;

/**
* Group attention width in self-extend
*/
grp_attn_w?: number;

/**
* Prevent system swapping of the model to disk in macOS
*/
mlock?: boolean;

/**
* You can constrain the sampling using GBNF grammars by providing path to a grammar file
*/
grammar_file?: string;

/**
* To enable Flash Attention, default is true
*/
flash_attn?: boolean;

/**
* KV cache type: f16, q8_0, q4_0, default is f16
*/
cache_type?: string;

/**
* To enable mmap, default is true
*/
use_mmap?: boolean;

/**
* The model engine.
*/
Expand All @@ -112,10 +157,20 @@ export interface ModelSettingParams {
llama_model_path?: string;
mmproj?: string;
cont_batching?: boolean;
vision_model?: boolean;
text_model?: boolean;
engine?: string;
stop?: string[];
pre_prompt?: string;
n_batch?: number;
caching_enabled?: boolean;
grp_attn_n?: number;
grp_attn_w?: number;
mlock?: boolean;
grammar_file?: string;
model_type?: string;
model_alias?: string;
flash_attn?: boolean;
cache_type?: string;
use_mmap?: boolean;
}

/**
Expand All @@ -133,3 +188,12 @@ export interface ModelRuntimeParams {
presence_penalty?: number;
engine?: string;
}

/**
* The model artifact object.
* In-case the model files is not a raw file list
*/
export interface ModelArtifact {
mmproj?: string;
llama_model_path?: string;
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ import {
OPEN_CHAT_3_5_JINJA,
ZEPHYR,
ZEPHYR_JINJA,
} from '../../constants/prompt-constants';
} from './../../constants/prompt-constants';
import {
HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL,
HUGGING_FACE_REPO_MODEL_API_URL,
HUGGING_FACE_REPO_URL,
HUGGING_FACE_TREE_REF_URL,
} from '../../constants/huggingface';
import { ModelTokenizer } from '../types/model-tokenizer.interface';
import { HttpService } from '@nestjs/axios';
import { firstValueFrom } from 'rxjs';
Expand All @@ -29,12 +35,6 @@ import { join, basename } from 'path';
import { load } from 'js-yaml';
import { existsSync, readdirSync, readFileSync } from 'fs';
import { isLocalModel, normalizeModelId } from '../utils/normalize-model-id';
import {
HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL,
HUGGING_FACE_REPO_MODEL_API_URL,
HUGGING_FACE_REPO_URL,
HUGGING_FACE_TREE_REF_URL,
} from '../../constants/huggingface';

@Injectable()
export class ModelsCliUsecases {
Expand Down
60 changes: 33 additions & 27 deletions cortex-js/src/infrastructure/commanders/usecases/ps.cli.usecases.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import { Injectable } from '@nestjs/common';
import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex';
import { HttpStatus, Injectable } from '@nestjs/common';
import {
CORTEX_CPP_MODELS_URL,
defaultCortexCppHost,
defaultCortexCppPort,
} from '@/infrastructure/constants/cortex';
import { HttpService } from '@nestjs/axios';
import { firstValueFrom } from 'rxjs';

export interface ModelStat {
modelId: string;
Expand All @@ -15,6 +21,7 @@ interface ModelStatResponse {
}
@Injectable()
export class PSCliUsecases {
constructor(private readonly httpService: HttpService) {}
/**
* Get models running in the Cortex C++ server
* @param host Cortex host address
Expand All @@ -25,32 +32,31 @@ export class PSCliUsecases {
port: number = defaultCortexCppPort,
): Promise<ModelStat[]> {
return new Promise<ModelStat[]>((resolve, reject) =>
fetch(`http://${host}:${port}/inferences/server/models`)
firstValueFrom(this.httpService.get(CORTEX_CPP_MODELS_URL(host, port)))
.then((res) => {
if (res.ok) {
res
.json()
.then(({ data }: ModelStatResponse) => {
if (data && Array.isArray(data) && data.length > 0) {
resolve(
data.map((e) => {
const startTime = e.start_time ?? new Date();
const currentTime = new Date();
const duration =
currentTime.getTime() - new Date(startTime).getTime();
return {
modelId: e.id,
engine: e.engine ?? 'cortex.llamacpp',
status: 'running',
duration: this.formatDuration(duration),
ram: e.ram ?? '-',
vram: e.vram ?? '-',
};
}),
);
} else reject();
})
.catch(reject);
const data = res.data as ModelStatResponse;
if (
res.status === HttpStatus.OK &&
data &&
Array.isArray(data.data) &&
data.data.length > 0
) {
resolve(
data.data.map((e) => {
const startTime = e.start_time ?? new Date();
const currentTime = new Date();
const duration =
currentTime.getTime() - new Date(startTime).getTime();
return {
modelId: e.id,
engine: e.engine ?? 'cortex.llamacpp',
status: 'running',
duration: this.formatDuration(duration),
ram: e.ram ?? '-',
vram: e.vram ?? '-',
};
}),
);
} else reject();
})
.catch(reject),
Expand Down
5 changes: 5 additions & 0 deletions cortex-js/src/infrastructure/constants/cortex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ export const CORTEX_CPP_HEALTH_Z_URL = (
port: number = defaultCortexCppPort,
) => `http://${host}:${port}/healthz`;

export const CORTEX_CPP_MODELS_URL = (
host: string = defaultCortexCppHost,
port: number = defaultCortexCppPort,
) => `http://${host}:${port}/inferences/server/models`;

// INITIALIZATION
export const CORTEX_RELEASES_URL =
'https://api.github.com/repos/janhq/cortex/releases';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
IsArray,
IsBoolean,
IsNumber,
IsOptional,
IsString,
ValidateNested,
} from 'class-validator';
Expand Down Expand Up @@ -29,46 +30,53 @@ export class CreateChatCompletionDto {
description:
'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.',
})
@IsOptional()
@IsBoolean()
stream: boolean;
stream?: boolean;

@ApiProperty({
description:
'Sets the upper limit on the number of tokens the model can generate in a single output.',
})
@IsOptional()
@IsNumber()
max_tokens: number;
max_tokens?: number;

@ApiProperty({
description:
'Defines specific tokens or phrases that signal the model to stop producing further output.',
})
@IsOptional()
@IsArray()
stop: string[];
stop?: string[];

@ApiProperty({
description:
'Modifies the likelihood of the model repeating the same words or phrases within a single output.',
})
@IsOptional()
@IsNumber()
frequency_penalty: number;
frequency_penalty?: number;

@ApiProperty({
description:
'Reduces the likelihood of repeating tokens, promoting novelty in the output.',
})
@IsOptional()
@IsNumber()
presence_penalty: number;
presence_penalty?: number;

@ApiProperty({
description: "Influences the randomness of the model's output.",
})
@IsOptional()
@IsNumber()
temperature: number;
temperature?: number;

@ApiProperty({
description: 'Sets probability threshold for more relevant outputs.',
})
@IsOptional()
@IsNumber()
top_p: number;
top_p?: number;
}
5 changes: 4 additions & 1 deletion cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { ApiProperty } from '@nestjs/swagger';
import { IsIP, IsNumber, IsString, Max, Min } from 'class-validator';
import { defaultCortexCppHost, defaultCortexCppPort } from '@/infrastructure/constants/cortex';
import {
defaultCortexCppHost,
defaultCortexCppPort,
} from '@/infrastructure/constants/cortex';

export class StartCortexDto {
@ApiProperty({
Expand Down
Loading