Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 494a155

Browse files
committed
feat: refactor cortex API with new model.yaml structure
1 parent b879f66 commit 494a155

30 files changed

+545
-603
lines changed

cortex-js/src/app.module.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module';
77
import { ChatModule } from './usecases/chat/chat.module';
88
import { AssistantsModule } from './usecases/assistants/assistants.module';
99
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
10+
import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module';
1011
import { CortexModule } from './usecases/cortex/cortex.module';
1112
import { ConfigModule } from '@nestjs/config';
1213
import { env } from 'node:process';
@@ -31,6 +32,7 @@ import { FileManagerModule } from './file-manager/file-manager.module';
3132
CortexModule,
3233
ExtensionModule,
3334
FileManagerModule,
35+
ModelRepositoryModule,
3436
],
3537
providers: [SeedService],
3638
})

cortex-js/src/domain/models/model.interface.ts

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,6 @@
1-
/**
2-
* Represents the information about a model.
3-
* @stored
4-
*/
5-
export interface ModelInfo {
6-
id: string;
7-
settings: ModelSettingParams;
8-
parameters: ModelRuntimeParams;
9-
engine?: string;
10-
}
11-
121
export interface ModelArtifact {
13-
url: string;
14-
}
15-
16-
export enum ModelFormat {
17-
GGUF = 'gguf',
18-
API = 'api',
2+
mmproj?: string;
3+
llama_model_path?: string;
194
}
205

216
/**
@@ -24,64 +9,91 @@ export enum ModelFormat {
249
*/
2510
export interface Model {
2611
/**
27-
* The type of the object.
28-
* Default: "model"
12+
* Model identifier.
2913
*/
30-
object: string;
14+
model: string;
3115

3216
/**
33-
* The version of the model.
17+
* GGUF metadata: general.name
3418
*/
35-
version: string;
19+
name?: string;
3620

3721
/**
38-
* The format of the model.
22+
* GGUF metadata: version
3923
*/
40-
format: ModelFormat;
24+
version?: string;
4125

4226
/**
4327
* The model download source. It can be an external url or a local filepath.
4428
*/
45-
sources: ModelArtifact[];
29+
files: string[] | ModelArtifact;
30+
31+
/**
32+
* GGUF metadata: tokenizer.chat_template
33+
*/
34+
prompt_template?: string;
35+
36+
/**
37+
* Defines specific tokens or phrases at which the model will stop generating further output.
38+
*/
39+
end_token?: string[];
40+
41+
/// Inferencing
42+
/**
43+
* Set probability threshold for more relevant outputs.
44+
*/
45+
top_p?: number;
4646

4747
/**
48-
* The model identifier, which can be referenced in the API endpoints.
48+
* Controls the randomness of the model’s output.
4949
*/
50-
id: string;
50+
temperature?: number;
5151

5252
/**
53-
* Human-readable name that is used for UI.
53+
* Adjusts the likelihood of the model repeating words or phrases in its output.
5454
*/
55-
name: string;
55+
frequency_penalty?: number;
56+
57+
/**
58+
* Influences the generation of new and varied concepts in the model’s output.
59+
*/
60+
presence_penalty?: number;
5661

62+
/// Engines
5763
/**
58-
* The Unix timestamp (in seconds) for when the model was created
64+
* The context length for model operations varies; the maximum depends on the specific model used.
5965
*/
60-
created: number;
66+
ctx_length?: number;
6167

6268
/**
63-
* Default: "A cool model from Huggingface"
69+
* Enable real-time data processing for faster predictions.
6470
*/
65-
description: string;
71+
stream?: boolean;
72+
73+
/*
74+
* The maximum number of tokens the model will generate in a single response.
75+
*/
76+
max_tokens?: number;
6677

6778
/**
68-
* The model settings.
79+
* The number of layers to load onto the GPU for acceleration.
6980
*/
70-
settings: ModelSettingParams;
81+
ngl?: number;
7182

7283
/**
73-
* The model runtime parameters.
84+
* The number of parallel operations. Only set when enable continuous batching.
7485
*/
75-
parameters: ModelRuntimeParams;
86+
n_parallel?: number;
7687

7788
/**
78-
* Metadata of the model.
89+
* Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)
7990
*/
80-
metadata: ModelMetadata;
91+
cpu_threads?: number;
92+
8193
/**
8294
* The model engine.
8395
*/
84-
engine: string;
96+
engine?: string;
8597
}
8698

8799
export interface ModelMetadata {

cortex-js/src/domain/models/thread.interface.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { AssistantTool } from './assistant.interface';
2-
import { ModelInfo } from './model.interface';
2+
import { Model } from './model.interface';
33

44
export interface Thread {
55
/** Unique identifier for the thread, generated by default using the ULID method. **/
@@ -40,7 +40,7 @@ export interface ThreadMetadata {
4040
export interface ThreadAssistantInfo {
4141
assistant_id: string;
4242
assistant_name: string;
43-
model: ModelInfo;
43+
model: Model;
4444
instructions?: string;
4545
tools?: AssistantTool[];
4646
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import { Model } from '../models/model.interface';
2+
import { Repository } from './repository.interface';
3+
4+
export abstract class ModelRepository extends Repository<Model> {}

cortex-js/src/file-manager/file-manager.service.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export class FileManagerService {
1010
private configFile = '.cortexrc';
1111
private cortexDirectoryName = 'cortex';
1212
private modelFolderName = 'models';
13+
private extensionFoldername = 'extensions';
1314
private cortexCppFolderName = 'cortex-cpp';
1415

1516
async getConfig(): Promise<Config> {
@@ -75,4 +76,14 @@ export class FileManagerService {
7576
const config = await this.getConfig();
7677
return config.dataFolderPath;
7778
}
79+
80+
async getModelsPath(): Promise<string> {
81+
const dataFolderPath = await this.getDataFolderPath();
82+
return join(dataFolderPath, this.modelFolderName);
83+
}
84+
85+
async getExtensionsPath(): Promise<string> {
86+
const dataFolderPath = await this.getDataFolderPath();
87+
return join(dataFolderPath, this.extensionFoldername);
88+
}
7889
}

cortex-js/src/infrastructure/commanders/models/model-pull.command.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,12 @@ export class ModelPullCommand extends CommandRunner {
101101

102102
const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
103103
let ggufUrl: string | undefined = undefined;
104-
let fileSize = 0;
105104
for await (const fileInfo of listFiles({
106105
repo: repo,
107106
revision: revision,
108107
})) {
109108
if (fileInfo.path.endsWith('.gguf')) {
110109
ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
111-
fileSize = fileInfo.size;
112110
break;
113111
}
114112
}
@@ -121,7 +119,6 @@ export class ModelPullCommand extends CommandRunner {
121119
await this.modelsCliUsecases.pullModelWithExactUrl(
122120
`${sanitizedRepoName}/${revision}`,
123121
ggufUrl,
124-
fileSize,
125122
);
126123
}
127124
}

cortex-js/src/infrastructure/commanders/models/model-start.command.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ export class ModelStartCommand extends CommandRunner {
4848
message: 'Select a model to start:',
4949
choices: models.map((e) => ({
5050
name: e.name,
51-
value: e.id,
51+
value: e.model,
5252
})),
5353
});
5454
return model;

cortex-js/src/infrastructure/commanders/models/model-update.command.ts

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
import { CommandRunner, SubCommand, Option } from 'nest-commander';
22
import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
33
import { exit } from 'node:process';
4-
import { ModelParameterParser } from '../utils/model-parameter.parser';
5-
import {
6-
ModelRuntimeParams,
7-
ModelSettingParams,
8-
} from '@/domain/models/model.interface';
4+
import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
95

106
type UpdateOptions = {
117
model?: string;
@@ -31,42 +27,13 @@ export class ModelUpdateCommand extends CommandRunner {
3127
exit(0);
3228
}
3329

34-
const parser = new ModelParameterParser();
35-
const settingParams: ModelSettingParams = {};
36-
const runtimeParams: ModelRuntimeParams = {};
30+
const toUpdate: UpdateModelDto = {};
3731

3832
options.forEach((option) => {
3933
const [key, stringValue] = option.split('=');
40-
if (parser.isModelSettingParam(key)) {
41-
const value = parser.parse(key, stringValue);
42-
// @ts-expect-error did the check so it's safe
43-
settingParams[key] = value;
44-
} else if (parser.isModelRuntimeParam(key)) {
45-
const value = parser.parse(key, stringValue);
46-
// @ts-expect-error did the check so it's safe
47-
runtimeParams[key] = value;
48-
}
34+
Object.assign(toUpdate, { key, stringValue });
4935
});
50-
51-
if (Object.keys(settingParams).length > 0) {
52-
const updatedSettingParams =
53-
await this.modelsCliUsecases.updateModelSettingParams(
54-
modelId,
55-
settingParams,
56-
);
57-
console.log(
58-
'Updated setting params! New setting params:',
59-
updatedSettingParams,
60-
);
61-
}
62-
63-
if (Object.keys(runtimeParams).length > 0) {
64-
await this.modelsCliUsecases.updateModelRuntimeParams(
65-
modelId,
66-
runtimeParams,
67-
);
68-
console.log('Updated runtime params! New runtime params:', runtimeParams);
69-
}
36+
this.modelsCliUsecases.updateModel(modelId, toUpdate);
7037
}
7138

7239
@Option({

cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ export class RunCommand extends CommandRunner {
6262
message: 'Select a model to start:',
6363
choices: models.map((e) => ({
6464
name: e.name,
65-
value: e.id,
65+
value: e.model,
6666
})),
6767
});
6868
return model;

cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import { Thread } from '@/domain/models/thread.interface';
1515
import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto';
1616
import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases';
1717
import { CreateThreadAssistantDto } from '@/infrastructure/dtos/threads/create-thread-assistant.dto';
18-
import { CreateThreadModelInfoDto } from '@/infrastructure/dtos/threads/create-thread-model-info.dto';
1918
import { ModelsUsecases } from '@/usecases/models/models.usecases';
2019
import stream from 'stream';
2120
import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto';
@@ -194,16 +193,10 @@ export class ChatCliUsecases {
194193
const assistant = await this.assistantUsecases.findOne('jan');
195194
if (!assistant) throw new Error('No assistant available');
196195

197-
const createThreadModel: CreateThreadModelInfoDto = {
198-
id: modelId,
199-
settings: model.settings,
200-
parameters: model.parameters,
201-
};
202-
203196
const assistantDto: CreateThreadAssistantDto = {
204197
assistant_id: assistant.id,
205198
assistant_name: assistant.name,
206-
model: createThreadModel,
199+
model: model,
207200
};
208201

209202
const createThreadDto: CreateThreadDto = {

0 commit comments

Comments
 (0)