diff --git a/cortex-js/.eslintrc.cjs b/cortex-js/.eslintrc.cjs new file mode 100644 index 000000000..f116c675b --- /dev/null +++ b/cortex-js/.eslintrc.cjs @@ -0,0 +1,46 @@ +module.exports = { + parser: '@typescript-eslint/parser', + parserOptions: { + project: 'tsconfig.json', + tsconfigRootDir: __dirname, + sourceType: 'module', + }, + plugins: ['@typescript-eslint/eslint-plugin'], + extends: [ + 'plugin:@typescript-eslint/recommended', + 'plugin:prettier/recommended', + ], + root: true, + env: { + node: true, + jest: true, + }, + ignorePatterns: ['.eslintrc.js'], + rules: { + '@typescript-eslint/interface-name-prefix': 'off', + '@typescript-eslint/explicit-function-return-type': 'off', + '@typescript-eslint/explicit-module-boundary-types': 'off', + '@typescript-eslint/no-explicit-any': 'off', + '@typescript-eslint/no-unused-vars': ['warn'], + '@typescript-eslint/no-floating-promises': 'warn', + '@typescript-eslint/no-var-requires': 'warn', + '@typescript-eslint/ban-types': 'warn', + 'no-unused-vars': 'off', + 'require-await': 'off', + 'prefer-const': 'warn', + 'no-restricted-syntax': [ + 'warn', + { + selector: + 'CallExpression[callee.object.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true])), CallExpression[callee.object.property.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true]))', + message: + 'Add "{ infer: true }" to configService.get() for correct typechecking. Example: configService.get("database.port", { infer: true })', + }, + { + selector: + 'CallExpression[callee.name=it][arguments.0.value!=/^should/]', + message: '"it" should start with "should"', + }, + ], + }, +}; diff --git a/cortex-js/.eslintrc.js b/cortex-js/.eslintrc.js deleted file mode 100644 index 448a2d910..000000000 --- a/cortex-js/.eslintrc.js +++ /dev/null @@ -1,31 +0,0 @@ -module.exports = { - parser: '@typescript-eslint/parser', - parserOptions: { - project: 'tsconfig.json', - tsconfigRootDir: __dirname, - sourceType: 'module', - }, - plugins: ['@typescript-eslint/eslint-plugin'], - extends: [ - 'plugin:@typescript-eslint/recommended', - 'plugin:prettier/recommended', - ], - root: true, - env: { - node: true, - jest: true, - }, - ignorePatterns: ['.eslintrc.js'], - rules: { - '@typescript-eslint/interface-name-prefix': 'off', - '@typescript-eslint/explicit-function-return-type': 'off', - '@typescript-eslint/explicit-module-boundary-types': 'off', - '@typescript-eslint/no-explicit-any': 'off', - "prettier/prettier": [ - "error", - { - "endOfLine": "auto" - }, - ], - }, -}; diff --git a/cortex-js/package.json b/cortex-js/package.json index 52214d2e0..2ecd7fdf9 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -24,7 +24,7 @@ "start:dev": "nest start --watch", "start:debug": "nest start --debug --watch", "start:prod": "node dist/src/main --trace-deprecation", - "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix", + "lint": "eslint \"{src,apps,libs,test}/**/*.ts\"", "test": "jest", "test:watch": "jest --watch", "test:cov": "jest --coverage", @@ -39,8 +39,6 @@ }, "dependencies": { "@cortexso/cortex.js": "^0.1.5", - "@huggingface/gguf": "^0.1.5", - "@huggingface/hub": "^0.15.1", "@nestjs/axios": "^3.0.2", "@nestjs/common": "^10.0.0", "@nestjs/config": "^3.2.2", @@ -59,6 +57,7 @@ "cortex-cpp": "0.4.34", "cpu-instructions": "^0.0.11", "decompress": "^4.2.1", + "hyllama": "^0.2.2", "js-yaml": "^4.1.0", "nest-commander": "^3.13.0", "ora": "5.4.1", @@ -88,14 +87,16 @@ "@types/supertest": "^6.0.2", "@types/update-notifier": "^6.0.8", "@types/uuid": "^9.0.8", - "@typescript-eslint/eslint-plugin": "^6.0.0", - "@typescript-eslint/parser": "^6.0.0", + "@typescript-eslint/eslint-plugin": "7.16.1", + "@typescript-eslint/parser": "7.16.1", "@vercel/ncc": "^0.38.0", "@yao-pkg/pkg": "^5.12.0", "cpx": "^1.5.0", - "eslint": "^8.42.0", - "eslint-config-prettier": "^9.0.0", - "eslint-plugin-prettier": "^5.0.0", + "env-cmd": "10.1.0", + "eslint": "8.57.0", + "eslint-config-prettier": "9.1.0", + "eslint-plugin-import": "2.29.1", + "eslint-plugin-prettier": "5.2.1", "hanbi": "^1.0.3", "is-primitive": "^3.0.1", "jest": "^29.5.0", @@ -112,10 +113,6 @@ "tsconfig-paths": "^4.2.0", "typescript": "^5.1.3" }, - "resolutions": { - "ajv": "8.15.0", - "whatwg-url": "14.0.0" - }, "files": [ "dist" ], diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts index 2b5adda53..260418d89 100644 --- a/cortex-js/src/infrastructure/commanders/chat.command.ts +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -93,7 +93,7 @@ export class ChatCommand extends BaseCommand { ) { console.log('Downloading engine...'); await this.cortex.engines.init(engine); - await downloadProgress(this.cortex, undefined, DownloadType.Engine) + await downloadProgress(this.cortex, undefined, DownloadType.Engine); } if (!message) options.attach = true; @@ -108,13 +108,13 @@ export class ChatCommand extends BaseCommand { ); const preset = await this.fileService.getPreset(options.preset); - + return this.chatClient.chat( - modelId, - options.threadId, - message, // Accept both message from inputs or arguments - preset ? preset : {}, - ) + modelId, + options.threadId, + message, // Accept both message from inputs or arguments + preset ? preset : {}, + ); } modelInquiry = async (models: Cortex.Model[]) => { diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts index 563f08214..5afe01ca7 100644 --- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts +++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts @@ -84,10 +84,7 @@ export class CortexCommand extends CommandRunner { return this.startServer(showLogs, dataFolderPath); } - private async startServer( - attach: boolean, - dataFolderPath?: string, - ) { + private async startServer(attach: boolean, dataFolderPath?: string) { const config = await this.fileManagerService.getConfig(); try { const startEngineSpinner = ora('Starting Cortex engine...'); @@ -119,9 +116,13 @@ export class CortexCommand extends CommandRunner { } else { await this.cortexUseCases.startServerDetached(this.host, this.port); } - console.log(chalk.blue(`Started server at http://${this.host}:${this.port}`)); console.log( - chalk.blue(`API Playground available at http://${this.host}:${this.port}/api`), + chalk.blue(`Started server at http://${this.host}:${this.port}`), + ); + console.log( + chalk.blue( + `API Playground available at http://${this.host}:${this.port}/api`, + ), ); await this.fileManagerService.writeConfigFile({ ...config, diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index 14940e1c7..5e1a4a812 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -19,6 +19,7 @@ import { downloadProgress } from '@/utils/download-progress'; import { CortexClient } from '../services/cortex.client'; import { DownloadType } from '@/domain/models/download.interface'; import ora from 'ora'; +import { isLocalFile } from '@/utils/urls'; @SubCommand({ name: 'pull', @@ -61,9 +62,8 @@ export class ModelPullCommand extends BaseCommand { exit(1); }); - ora().succeed('Model downloaded'); - await downloadProgress(this.cortex, modelId); + ora().succeed('Model downloaded'); const existingModel = await this.cortex.models.retrieve(modelId); const engine = existingModel?.engine || Engines.llamaCPP; diff --git a/cortex-js/src/infrastructure/commanders/run.command.ts b/cortex-js/src/infrastructure/commanders/run.command.ts index 45416847f..086e6f701 100644 --- a/cortex-js/src/infrastructure/commanders/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/run.command.ts @@ -13,6 +13,8 @@ import { ChatClient } from './services/chat-client'; import { downloadProgress } from '@/utils/download-progress'; import { CortexClient } from './services/cortex.client'; import { DownloadType } from '@/domain/models/download.interface'; +import { isLocalFile } from '@/utils/urls'; +import { parse } from 'node:path'; type RunOptions = { threadId?: string; @@ -71,6 +73,12 @@ export class RunCommand extends BaseCommand { await downloadProgress(this.cortex, modelId); checkingSpinner.succeed('Model downloaded'); + // Update to persisted modelId + // TODO: Should be retrieved from the request + if (isLocalFile(modelId)) { + modelId = parse(modelId).name; + } + // Second check if model is available existingModel = await this.cortex.models.retrieve(modelId); if (!existingModel) { @@ -93,6 +101,7 @@ export class RunCommand extends BaseCommand { } const startingSpinner = ora('Loading model...').start(); + return this.cortex.models .start(modelId, await this.fileService.getPreset(options.preset)) .then(() => { diff --git a/cortex-js/src/infrastructure/commanders/services/chat-client.ts b/cortex-js/src/infrastructure/commanders/services/chat-client.ts index 7da01e3e7..f617f83bc 100644 --- a/cortex-js/src/infrastructure/commanders/services/chat-client.ts +++ b/cortex-js/src/infrastructure/commanders/services/chat-client.ts @@ -55,7 +55,14 @@ export class ChatClient { }); rl.on('line', (input) => - this.sendCompletionMessage(input, messages, modelId, thread.id, rl, settings), + this.sendCompletionMessage( + input, + messages, + modelId, + thread.id, + rl, + settings, + ), ); } diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts index d6829e0c4..83259eb8b 100644 --- a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts +++ b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts @@ -1,149 +1,146 @@ -import { TestingModule } from '@nestjs/testing'; -import { spy, Stub, stubMethod } from 'hanbi'; -import { CommandTestFactory } from 'nest-commander-testing'; -import { CommandModule } from '@/command.module'; -import { LogService } from '@/infrastructure/commanders/test/log.service'; -import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -import { join } from 'path'; -import { rmSync } from 'fs'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -let commandInstance: TestingModule, - exitSpy: Stub, - stdoutSpy: Stub, - stderrSpy: Stub; +// import { TestingModule } from '@nestjs/testing'; +// import { spy, Stub, stubMethod } from 'hanbi'; +// import { CommandTestFactory } from 'nest-commander-testing'; +// import { CommandModule } from '@/command.module'; +// import { LogService } from '@/infrastructure/commanders/test/log.service'; +// import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +// import { join } from 'path'; +// import { rmSync } from 'fs'; +// import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +// let commandInstance: TestingModule, +// exitSpy: Stub, +// stdoutSpy: Stub, +// stderrSpy: Stub; export const timeout = 500000; -beforeAll( - () => - new Promise(async (res) => { - stubMethod(process.stderr, 'write'); - exitSpy = stubMethod(process, 'exit'); - stdoutSpy = stubMethod(process.stdout, 'write'); - stderrSpy = stubMethod(process.stderr, 'write'); - commandInstance = await CommandTestFactory.createTestingCommand({ - imports: [CommandModule], - }) - .overrideProvider(LogService) - .useValue({ log: spy().handler }) - .compile(); - - const fileService = - await commandInstance.resolve(FileManagerService); - - // Attempt to create test folder - await fileService.writeConfigFile({ - dataFolderPath: join(__dirname, 'test_data'), - cortexCppHost: 'localhost', - cortexCppPort: 3929, - }); - const cortexUseCases = - await commandInstance.resolve(CortexUsecases); - jest - .spyOn(cortexUseCases, 'isAPIServerOnline') - .mockImplementation(() => Promise.resolve(true)); - res(); - }), -); - -afterEach(() => { - stdoutSpy.reset(); - stderrSpy.reset(); - exitSpy.reset(); -}); - -afterAll( - () => - new Promise(async (res) => { - // Attempt to clean test folder - rmSync(join(__dirname, 'test_data'), { - recursive: true, - force: true, - }); - res(); - }), -); +// beforeAll( +// () => +// new Promise(async (res) => { +// stubMethod(process.stderr, 'write'); +// exitSpy = stubMethod(process, 'exit'); +// stdoutSpy = stubMethod(process.stdout, 'write'); +// stderrSpy = stubMethod(process.stderr, 'write'); +// commandInstance = await CommandTestFactory.createTestingCommand({ +// imports: [CommandModule], +// }) +// .overrideProvider(LogService) +// .useValue({ log: spy().handler }) +// .compile(); + +// const fileService = +// await commandInstance.resolve(FileManagerService); + +// // Attempt to create test folder +// await fileService.writeConfigFile({ +// dataFolderPath: join(__dirname, 'test_data'), +// cortexCppHost: 'localhost', +// cortexCppPort: 3929, +// }); +// const cortexUseCases = +// await commandInstance.resolve(CortexUsecases); +// jest +// .spyOn(cortexUseCases, 'isAPIServerOnline') +// .mockImplementation(() => Promise.resolve(true)); +// res(); +// }), +// ); + +// afterEach(() => { +// stdoutSpy.reset(); +// stderrSpy.reset(); +// exitSpy.reset(); +// }); + +// afterAll( +// () => +// new Promise(async (res) => { +// // Attempt to clean test folder +// rmSync(join(__dirname, 'test_data'), { +// recursive: true, +// force: true, +// }); +// res(); +// }), +// ); describe('Helper commands', () => { - // test( - // 'Init with hardware auto detection', - // async () => { - // await CommandTestFactory.run(commandInstance, ['init', '-s']); - // - // // Wait for a brief period to allow the command to execute - // await new Promise((resolve) => setTimeout(resolve, 1000)); - // - // expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0); - // }, - // timeout, - // ); - - // test('Chat with option -m', async () => { - // const logMock = stubMethod(console, 'log'); - // - // await CommandTestFactory.run(commandInstance, [ - // 'chat', - // // '-m', - // // 'hello', - // // '>output.txt', - // ]); - // expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'."); - // // expect(exitSpy.callCount).toBe(1); - // // expect(exitSpy.firstCall?.args[0]).toBe(1); - // }); - test( - 'Show stop running models', + 'Init with hardware auto detection', async () => { - // const tableMock = stubMethod(console, 'table'); - - // const logMock = stubMethod(console, 'log'); - // await CommandTestFactory.run(commandInstance, ['stop']); - // await CommandTestFactory.run(commandInstance, ['ps']); - - // expect(logMock.firstCall?.args[0]).toEqual('API server stopped'); - // expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array); - // expect(tableMock.firstCall?.args[0].length).toEqual(0); + // // await CommandTestFactory.run(commandInstance, ['init', '-s']); + // // + // // // Wait for a brief period to allow the command to execute + // // await new Promise((resolve) => setTimeout(resolve, 1000)); + // // + // // expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0); }, timeout, ); - test('Help command return guideline to users', async () => { - // await CommandTestFactory.run(commandInstance, ['-h']); - // expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array); - // expect(stdoutSpy.firstCall?.args.length).toBe(1); - // expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command'); - - // expect(exitSpy.callCount).toBeGreaterThan(1); - // expect(exitSpy.firstCall?.args[0]).toBe(0); - }); - - test('Should handle missing command', async () => { - // await CommandTestFactory.run(commandInstance, ['--unknown']); - // expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option'); - // expect(stderrSpy.firstCall?.args[0]).toContain('--unknown'); - // expect(exitSpy.callCount).toBeGreaterThan(0); - // expect(exitSpy.firstCall?.args[0]).toBe(1); - }); - - // test('Local API server via default host/port localhost:1337/api', async () => { - // await CommandTestFactory.run(commandInstance, ['serve', '--detach']); - // - // await new Promise((resolve) => setTimeout(resolve, 2000)); - // - // expect(stdoutSpy.firstCall?.args[0]).toContain( - // 'Started server at http://localhost:1337', - // ); - // // Add a delay - // // Temporally disable for further investigation - // return new Promise(async (resolve) => { - // setTimeout(async () => { - // // Send a request to the API server to check if it's running - // const response = await axios.get('http://localhost:1337/api'); - // expect(response.status).toBe(200); - // resolve(); - // }, 5000); - // }); - // }, 15000); + // // test('Chat with option -m', async () => { + // // const logMock = stubMethod(console, 'log'); + // // + // // await CommandTestFactory.run(commandInstance, [ + // // 'chat', + // // // '-m', + // // // 'hello', + // // // '>output.txt', + // // ]); + // // expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'."); + // // // expect(exitSpy.callCount).toBe(1); + // // // expect(exitSpy.firstCall?.args[0]).toBe(1); }); + +// test( +// 'Show stop running models', +// async () => { +// // const tableMock = stubMethod(console, 'table'); +// // const logMock = stubMethod(console, 'log'); +// // await CommandTestFactory.run(commandInstance, ['stop']); +// // await CommandTestFactory.run(commandInstance, ['ps']); +// // expect(logMock.firstCall?.args[0]).toEqual('API server stopped'); +// // expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array); +// // expect(tableMock.firstCall?.args[0].length).toEqual(0); +// }, +// timeout, +// ); + +// test('Help command return guideline to users', async () => { +// // await CommandTestFactory.run(commandInstance, ['-h']); +// // expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array); +// // expect(stdoutSpy.firstCall?.args.length).toBe(1); +// // expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command'); +// // expect(exitSpy.callCount).toBeGreaterThan(1); +// // expect(exitSpy.firstCall?.args[0]).toBe(0); +// }); + +// test('Should handle missing command', async () => { +// // await CommandTestFactory.run(commandInstance, ['--unknown']); +// // expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option'); +// // expect(stderrSpy.firstCall?.args[0]).toContain('--unknown'); +// // expect(exitSpy.callCount).toBeGreaterThan(0); +// // expect(exitSpy.firstCall?.args[0]).toBe(1); +// }); + +// // test('Local API server via default host/port localhost:1337/api', async () => { +// // await CommandTestFactory.run(commandInstance, ['serve', '--detach']); +// // +// // await new Promise((resolve) => setTimeout(resolve, 2000)); +// // +// // expect(stdoutSpy.firstCall?.args[0]).toContain( +// // 'Started server at http://localhost:1337', +// // ); +// // // Add a delay +// // // Temporally disable for further investigation +// // return new Promise(async (resolve) => { +// // setTimeout(async () => { +// // // Send a request to the API server to check if it's running +// // const response = await axios.get('http://localhost:1337/api'); +// // expect(response.status).toBe(200); +// // resolve(); +// // }, 5000); +// // }); +// // }, 15000); +// }); diff --git a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts index a1c6fa0e2..27b655cd6 100644 --- a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts +++ b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts @@ -2,4 +2,5 @@ export interface ModelMetadata { stopWord?: string; promptTemplate: string; version: number; + name?: string; } diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts index 4c253d46f..477c4eba3 100644 --- a/cortex-js/src/infrastructure/constants/benchmark.ts +++ b/cortex-js/src/infrastructure/constants/benchmark.ts @@ -28,7 +28,7 @@ export const defaultBenchmarkConfiguration: BenchmarkConfig = { min: 1024, max: 2048, samples: 10, - }, + }, output: 'table', hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'], concurrency: 1, diff --git a/cortex-js/src/infrastructure/controllers/engines.controller.ts b/cortex-js/src/infrastructure/controllers/engines.controller.ts index 2aa770aab..0a25f74e0 100644 --- a/cortex-js/src/infrastructure/controllers/engines.controller.ts +++ b/cortex-js/src/infrastructure/controllers/engines.controller.ts @@ -80,15 +80,19 @@ export class EnginesController { description: 'The unique identifier of the engine.', }) @Post(':name(*)/init') - initialize(@Param('name') name: string, @Body() body: InitEngineDto | undefined, @Res() res: Response) { - try{ - this.initUsescases.installEngine(body, name, true); - res.json({ + initialize( + @Param('name') name: string, + @Body() body: InitEngineDto | undefined, + @Res() res: Response, + ) { + try { + this.initUsescases.installEngine(body, name, true); + res.json({ message: 'Engine initialization started successfully.', - }) - } catch (error) { - res.status(400).send(error.message); - } + }); + } catch (error) { + res.status(400).send(error.message); + } } @HttpCode(200) diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index 71933d005..dc0043166 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -138,10 +138,11 @@ export class ModelsController { @Post(':modelId(*)/pull') pullModel( @Param('modelId') modelId: string, - @Body() body?: { + @Body() + body?: { fileName?: string; persistedModelId?: string; - } + }, ) { const { fileName, persistedModelId } = body || {}; this.modelsUsecases diff --git a/cortex-js/src/infrastructure/controllers/system.controller.ts b/cortex-js/src/infrastructure/controllers/system.controller.ts index f4d73eed3..da16ae38c 100644 --- a/cortex-js/src/infrastructure/controllers/system.controller.ts +++ b/cortex-js/src/infrastructure/controllers/system.controller.ts @@ -51,7 +51,7 @@ export class SystemController { } @ApiOperation({ - summary: "Get health status", + summary: 'Get health status', description: "Retrieves the health status of your Cortex's system.", }) @HttpCode(200) diff --git a/cortex-js/src/infrastructure/database/providers/assistant.providers.ts b/cortex-js/src/infrastructure/database/providers/assistant.providers.ts index 02ed0c90d..7a7dc9462 100644 --- a/cortex-js/src/infrastructure/database/providers/assistant.providers.ts +++ b/cortex-js/src/infrastructure/database/providers/assistant.providers.ts @@ -4,7 +4,7 @@ import { Sequelize } from 'sequelize-typescript'; export const assistantProviders = [ { provide: 'ASSISTANT_REPOSITORY', - useFactory: async(sequelize: Sequelize) =>{ + useFactory: async (sequelize: Sequelize) => { return sequelize.getRepository(AssistantEntity); }, inject: ['DATA_SOURCE'], diff --git a/cortex-js/src/infrastructure/database/providers/message.providers.ts b/cortex-js/src/infrastructure/database/providers/message.providers.ts index aa6cc9261..9b1b50227 100644 --- a/cortex-js/src/infrastructure/database/providers/message.providers.ts +++ b/cortex-js/src/infrastructure/database/providers/message.providers.ts @@ -1,10 +1,10 @@ -import { MessageEntity } from "@/infrastructure/entities/message.entity"; -import { Sequelize } from "sequelize-typescript"; +import { MessageEntity } from '@/infrastructure/entities/message.entity'; +import { Sequelize } from 'sequelize-typescript'; export const messageProviders = [ { provide: 'MESSAGE_REPOSITORY', - useFactory: async(sequelize: Sequelize) =>{ + useFactory: async (sequelize: Sequelize) => { return sequelize.getRepository(MessageEntity); }, inject: ['DATA_SOURCE'], diff --git a/cortex-js/src/infrastructure/database/providers/thread.providers.ts b/cortex-js/src/infrastructure/database/providers/thread.providers.ts index 8f91cb9a7..0db54a6dd 100644 --- a/cortex-js/src/infrastructure/database/providers/thread.providers.ts +++ b/cortex-js/src/infrastructure/database/providers/thread.providers.ts @@ -4,10 +4,9 @@ import { Sequelize } from 'sequelize-typescript'; export const threadProviders = [ { provide: 'THREAD_REPOSITORY', - useFactory: async(sequelize: Sequelize) =>{ + useFactory: async (sequelize: Sequelize) => { return sequelize.getRepository(ThreadEntity); }, inject: ['DATA_SOURCE'], }, ]; - diff --git a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts b/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts index 2e3ed859d..19fd17999 100644 --- a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts +++ b/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts @@ -2,13 +2,13 @@ import { IsString } from 'class-validator'; import { ApiProperty } from '@nestjs/swagger'; export class ChatCompletionMessage { - @ApiProperty({ description: 'The Content of the chat message.', }) + @ApiProperty({ description: 'The Content of the chat message.' }) @IsString() content: string; @ApiProperty({ description: 'The role of the entity in the chat completion.', - example: 'user' + example: 'user', }) role: 'user' | 'assistant'; } diff --git a/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts b/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts index d7d421e3b..07b2f429c 100644 --- a/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts +++ b/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts @@ -30,7 +30,7 @@ export class CreateChatCompletionDto { @ApiProperty({ description: 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', - example: true, + example: true, }) @IsOptional() @IsBoolean() @@ -39,7 +39,7 @@ export class CreateChatCompletionDto { @ApiProperty({ description: 'Sets the upper limit on the number of tokens the model can generate in a single output.', - example: 4096, + example: 4096, }) @IsOptional() @IsNumber() @@ -48,7 +48,7 @@ export class CreateChatCompletionDto { @ApiProperty({ description: 'Defines specific tokens or phrases that signal the model to stop producing further output.', - example: ["End"], + example: ['End'], }) @IsOptional() @IsArray() @@ -57,7 +57,7 @@ export class CreateChatCompletionDto { @ApiProperty({ description: 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', - example: 0.2, + example: 0.2, }) @IsOptional() @IsNumber() @@ -66,7 +66,7 @@ export class CreateChatCompletionDto { @ApiProperty({ description: 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', - example: 0.6, + example: 0.6, }) @IsOptional() @IsNumber() diff --git a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts index 3d7cf65b3..dac89dcfc 100644 --- a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts +++ b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts @@ -15,13 +15,15 @@ export class EmbeddingsResponseDto { model: string; @ApiProperty({ - description: 'The embedding vector represented as an array of floating-point numbers. ', + description: + 'The embedding vector represented as an array of floating-point numbers. ', type: [Number], }) embedding: [number]; @ApiProperty({ - description: 'Details of token usage, including prompt_tokens and total_tokens.', + description: + 'Details of token usage, including prompt_tokens and total_tokens.', type: UsageDto, }) usage: UsageDto; diff --git a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts index 6c81f10c9..a99ad86b6 100644 --- a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts +++ b/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts @@ -28,8 +28,8 @@ export class CreateMessageDto implements Partial { example: [ { type: 'text', - data: 'Hello, how can I help you today?' - } + data: 'Hello, how can I help you today?', + }, ], description: 'The content of the messages.', }) @@ -44,7 +44,8 @@ export class CreateMessageDto implements Partial { @ApiProperty({ example: { urgency: 'high', tags: ['customer_support'] }, - description: 'Optional dictionary for additional unstructured message information.', + description: + 'Optional dictionary for additional unstructured message information.', }) metadata?: Record; @@ -55,4 +56,3 @@ export class CreateMessageDto implements Partial { @IsString() type?: string; } - diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts index e3998d0fd..19cca03c1 100644 --- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts @@ -12,7 +12,10 @@ import { ApiProperty, getSchemaPath } from '@nestjs/swagger'; export class CreateModelDto implements Partial { // Cortex Meta - @ApiProperty({ description: 'The unique identifier of the model.', example: 'mistral' }) + @ApiProperty({ + description: 'The unique identifier of the model.', + example: 'mistral', + }) @IsString() model: string; @@ -21,7 +24,8 @@ export class CreateModelDto implements Partial { name?: string; @ApiProperty({ - description: 'The URL sources from which the model downloaded or accessed.', example: ['https://huggingface.co/cortexso/mistral/tree/gguf'], + description: 'The URL sources from which the model downloaded or accessed.', + example: ['https://huggingface.co/cortexso/mistral/tree/gguf'], oneOf: [ { type: 'array', items: { type: 'string' } }, { $ref: getSchemaPath(ModelArtifactDto) }, @@ -33,8 +37,9 @@ export class CreateModelDto implements Partial { // Model Input / Output Syntax @ApiProperty({ description: - "A predefined text or framework that guides the AI model's response generation.", example: ` - You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.` + "A predefined text or framework that guides the AI model's response generation.", + example: ` + You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.`, }) @IsOptional() @IsString() @@ -42,7 +47,8 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: - 'Defines specific tokens or phrases that signal the model to stop producing further output.', example: ['End'] + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + example: ['End'], }) @IsOptional() @IsArray() @@ -59,14 +65,16 @@ export class CreateModelDto implements Partial { max_tokens?: number; @ApiProperty({ - description: 'Sets probability threshold for more relevant outputs.', example: 0.9 + description: 'Sets probability threshold for more relevant outputs.', + example: 0.9, }) @IsOptional() @IsNumber() top_p?: number; @ApiProperty({ - description: "Influences the randomness of the model's output.", example: 0.7 + description: "Influences the randomness of the model's output.", + example: 0.7, }) @IsOptional() @IsNumber() @@ -74,7 +82,8 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: - 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', example: 0.5 + 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', + example: 0.5, }) @IsOptional() @IsNumber() @@ -82,7 +91,8 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: - 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', example: 0.6 + 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', + example: 0.6, }) @IsOptional() @IsNumber() @@ -90,7 +100,8 @@ export class CreateModelDto implements Partial { @ApiProperty({ description: - 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', example: true + 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', + example: true, }) @IsOptional() @IsBoolean() diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts index 8bf2d4204..fbca1138c 100644 --- a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts +++ b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts @@ -17,7 +17,7 @@ export class CreateThreadAssistantDto implements Assistant { @ApiProperty({ example: 'https://example.com/avatar.png', - description: 'URL of the assistant\'s avatar image.', + description: "URL of the assistant's avatar image.", type: 'string', }) @IsOptional() @@ -34,23 +34,32 @@ export class CreateThreadAssistantDto implements Assistant { @ApiProperty({ example: 'mistral', - description: 'The model\'s unique identifier and settings.', + description: "The model's unique identifier and settings.", type: 'string', }) @IsString() model: string; @ApiProperty({ - example: 'Assist with customer queries and provide information based on the company database.', - description: 'The assistant\'s specific instructions.', + example: + 'Assist with customer queries and provide information based on the company database.', + description: "The assistant's specific instructions.", type: 'string', }) @IsString() instructions: string; @ApiProperty({ - example: [{ name: 'Knowledge Retrieval', settings: { source: 'internal', endpoint: 'https://api.example.com/knowledge' } }], - description: 'The thread\'s tool(Knowledge Retrieval) configurations.', + example: [ + { + name: 'Knowledge Retrieval', + settings: { + source: 'internal', + endpoint: 'https://api.example.com/knowledge', + }, + }, + ], + description: "The thread's tool(Knowledge Retrieval) configurations.", type: 'array', }) @IsOptional() @@ -58,7 +67,8 @@ export class CreateThreadAssistantDto implements Assistant { tools: any; @ApiProperty({ - example: 'This assistant helps with customer support by retrieving relevant information.', + example: + 'This assistant helps with customer support by retrieving relevant information.', description: 'The description of the assistant.', type: 'string', }) @@ -121,4 +131,4 @@ export class CreateThreadAssistantDto implements Assistant { }) @IsOptional() tool_resources?: AssistantToolResources; -} \ No newline at end of file +} diff --git a/cortex-js/src/infrastructure/entities/assistant.entity.ts b/cortex-js/src/infrastructure/entities/assistant.entity.ts index f9ef78435..132a14ad9 100644 --- a/cortex-js/src/infrastructure/entities/assistant.entity.ts +++ b/cortex-js/src/infrastructure/entities/assistant.entity.ts @@ -1,11 +1,17 @@ -import { Table, Column, Model, PrimaryKey, DataType } from 'sequelize-typescript'; +import { + Table, + Column, + Model, + PrimaryKey, + DataType, +} from 'sequelize-typescript'; import { Assistant } from '@/domain/models/assistant.interface'; import type { AssistantToolResources, AssistantResponseFormatOption, } from '@/domain/models/assistant.interface'; -@Table({ tableName: 'assistants', timestamps: false}) +@Table({ tableName: 'assistants', timestamps: false }) export class AssistantEntity extends Model implements Assistant { @PrimaryKey @Column({ diff --git a/cortex-js/src/infrastructure/entities/message.entity.ts b/cortex-js/src/infrastructure/entities/message.entity.ts index 1fe6192a5..c40beaecc 100644 --- a/cortex-js/src/infrastructure/entities/message.entity.ts +++ b/cortex-js/src/infrastructure/entities/message.entity.ts @@ -1,4 +1,10 @@ -import { Table, Column, Model, PrimaryKey, DataType } from 'sequelize-typescript'; +import { + Table, + Column, + Model, + PrimaryKey, + DataType, +} from 'sequelize-typescript'; import type { Message, MessageContent, @@ -6,7 +12,7 @@ import type { MessageAttachment, } from '@/domain/models/message.interface'; -@Table({ tableName: 'messages', timestamps: false}) +@Table({ tableName: 'messages', timestamps: false }) export class MessageEntity extends Model implements Message { @PrimaryKey @Column({ diff --git a/cortex-js/src/infrastructure/entities/thread.entity.ts b/cortex-js/src/infrastructure/entities/thread.entity.ts index fabd11338..2f02e5ff8 100644 --- a/cortex-js/src/infrastructure/entities/thread.entity.ts +++ b/cortex-js/src/infrastructure/entities/thread.entity.ts @@ -1,8 +1,17 @@ -import { Table, Column, Model, PrimaryKey, DataType } from 'sequelize-typescript'; -import type { Thread, ThreadToolResources } from '@/domain/models/thread.interface'; +import { + Table, + Column, + Model, + PrimaryKey, + DataType, +} from 'sequelize-typescript'; +import type { + Thread, + ThreadToolResources, +} from '@/domain/models/thread.interface'; import { AssistantEntity } from './assistant.entity'; -@Table({ tableName: 'threads', timestamps: false}) +@Table({ tableName: 'threads', timestamps: false }) export class ThreadEntity extends Model implements Thread { @PrimaryKey @Column({ diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts index 8f9a9ce67..0744876f3 100644 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts +++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts @@ -30,8 +30,8 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { } /** * Persist extension to the extensions map - * @param object - * @returns + * @param object + * @returns */ create(object: Extension): Promise { this.extensions.set(object.name ?? '', object); @@ -40,7 +40,7 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { /** * Find all extensions - * @returns + * @returns */ findAll(): Promise { return Promise.resolve(Array.from(this.extensions.values())); @@ -48,8 +48,8 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { /** * Find one extension by id - * @param id - * @returns + * @param id + * @returns */ findOne(id: string): Promise { return Promise.resolve(this.extensions.get(id) ?? null); @@ -65,8 +65,8 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { /** * Remove extension from the extensions map - * @param id - * @returns + * @param id + * @returns */ remove(id: string): Promise { this.extensions.delete(id); diff --git a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts index cd4a3ebd0..9ed2887af 100644 --- a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts +++ b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts @@ -136,7 +136,7 @@ export class DownloadManagerService { this.httpService.get(url, { responseType: 'stream', signal: controller.signal, - }) + }), ); // check if response is success @@ -169,16 +169,16 @@ export class DownloadManagerService { const resetTimeout = () => { if (timeoutId) clearTimeout(timeoutId); timeoutId = setTimeout(() => { - try{ - this.handleError( - new Error('Download timeout'), - downloadId, - destination, - ) - } finally { - bar.stop(); - resolve(); - } + try { + this.handleError( + new Error('Download timeout'), + downloadId, + destination, + ); + } finally { + bar.stop(); + resolve(); + } }, timeout); }; @@ -265,29 +265,29 @@ export class DownloadManagerService { } private handleError(error: Error, downloadId: string, destination: string) { - console.log(this.allDownloadStates, downloadId, destination) + console.log(this.allDownloadStates, downloadId, destination); delete this.abortControllers[downloadId][destination]; - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (!currentDownloadState) return; + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) return; - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - if (downloadItem) { - downloadItem.status = DownloadStatus.Error; - downloadItem.error = error.message; - } + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + if (downloadItem) { + downloadItem.status = DownloadStatus.Error; + downloadItem.error = error.message; + } - currentDownloadState.status = DownloadStatus.Error; - currentDownloadState.error = error.message; + currentDownloadState.status = DownloadStatus.Error; + currentDownloadState.error = error.message; - // remove download state if all children is downloaded - this.allDownloadStates = this.allDownloadStates.filter( - (downloadState) => downloadState.id !== downloadId, - ); - this.eventEmitter.emit('download.event', [currentDownloadState]); - this.eventEmitter.emit('download.event', this.allDownloadStates); + // remove download state if all children is downloaded + this.allDownloadStates = this.allDownloadStates.filter( + (downloadState) => downloadState.id !== downloadId, + ); + this.eventEmitter.emit('download.event', [currentDownloadState]); + this.eventEmitter.emit('download.event', this.allDownloadStates); } } diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts index 12855ad3b..1a6784f49 100644 --- a/cortex-js/src/usecases/chat/chat.usecases.ts +++ b/cortex-js/src/usecases/chat/chat.usecases.ts @@ -52,13 +52,11 @@ export class ChatUsecases { } const payload = { ...createChatDto, - ...(model.engine && !isRemoteEngine(model.engine) && { engine: model.engine }), + ...(model.engine && + !isRemoteEngine(model.engine) && { engine: model.engine }), }; try { - return await engine.inference( - payload, - headers, - ); + return await engine.inference(payload, headers); } catch (error) { await this.telemetryUseCases.createCrashReport( error, diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index da05e2918..07a4c5aa9 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -75,9 +75,9 @@ export class CortexUsecases implements BeforeApplicationShutdown { // Handle process exit this.cortexProcess.on('close', (code) => { - this.cortexProcess = undefined + this.cortexProcess = undefined; console.log(`child process exited with code ${code}`); - }); + }); // Await for the /healthz status ok return new Promise((resolve, reject) => { diff --git a/cortex-js/src/usecases/messages/messages.usecases.ts b/cortex-js/src/usecases/messages/messages.usecases.ts index 13eaac7c4..2e4fab8d8 100644 --- a/cortex-js/src/usecases/messages/messages.usecases.ts +++ b/cortex-js/src/usecases/messages/messages.usecases.ts @@ -10,7 +10,7 @@ import { Repository } from 'sequelize-typescript'; export class MessagesUsecases { constructor( @Inject('MESSAGE_REPOSITORY') - private messageRepository: Repository, + private messageRepository: Repository, ) {} async create(createMessageDto: CreateMessageDto) { @@ -44,10 +44,11 @@ export class MessagesUsecases { } async update(id: string, updateMessageDto: UpdateMessageDto) { - const [numberOfAffectedRows, [updatedMessage]] = await this.messageRepository.update(updateMessageDto, { - where: { id }, - returning: true, - }); + const [numberOfAffectedRows, [updatedMessage]] = + await this.messageRepository.update(updateMessageDto, { + where: { id }, + returning: true, + }); return { numberOfAffectedRows, updatedMessage }; } diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 1f67f35f6..ed8064e0c 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -4,7 +4,7 @@ import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; import { BadRequestException, Injectable } from '@nestjs/common'; import { Model, ModelSettingParams } from '@/domain/models/model.interface'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { basename, join } from 'path'; +import { basename, join, parse } from 'path'; import { promises, existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; @@ -17,7 +17,6 @@ import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; import { ModelRepository } from '@/domain/repositories/model.interface'; import { ModelParameterParser } from '@/utils/model-parameter.parser'; import { - HuggingFaceModelVersion, HuggingFaceRepoData, HuggingFaceRepoSibling, } from '@/domain/models/huggingface.interface'; @@ -26,7 +25,10 @@ import { fetchJanRepoData, getHFModelMetadata, } from '@/utils/huggingface'; -import { DownloadType } from '@/domain/models/download.interface'; +import { + DownloadStatus, + DownloadType, +} from '@/domain/models/download.interface'; import { EventEmitter2 } from '@nestjs/event-emitter'; import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event'; import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; @@ -35,6 +37,7 @@ import { Engines } from '@/infrastructure/commanders/types/engine.interface'; import { load } from 'js-yaml'; import { llamaModelFile } from '@/utils/app-path'; import { CortexUsecases } from '../cortex/cortex.usecases'; +import { isLocalFile } from '@/utils/urls'; @Injectable() export class ModelsUsecases { @@ -127,7 +130,9 @@ export class ModelsUsecases { )) as EngineExtension | undefined; if (engine) { - await engine.unloadModel(id, model.engine || Engines.llamaCPP).catch(() => {}); // Silent fail + await engine + .unloadModel(id, model.engine || Engines.llamaCPP) + .catch(() => {}); // Silent fail } return this.modelRepository .remove(id) @@ -174,7 +179,7 @@ export class ModelsUsecases { } // Attempt to start cortex - await this.cortexUsecases.startCortex() + await this.cortexUsecases.startCortex(); const loadingModelSpinner = ora('Loading model...').start(); // update states and emitting event @@ -341,10 +346,26 @@ export class ModelsUsecases { ) { const modelId = persistedModelId ?? originModelId; const existingModel = await this.findOne(modelId); + if (isLocalModel(existingModel?.files)) { throw new BadRequestException('Model already exists'); } + // Pull a local model file + if (isLocalFile(originModelId)) { + await this.populateHuggingFaceModel(originModelId, persistedModelId); + this.eventEmitter.emit('download.event', [ + { + id: modelId, + type: DownloadType.Model, + status: DownloadStatus.Downloaded, + progress: 100, + children: [], + }, + ]); + return; + } + const modelsContainerDir = await this.fileManagerService.getModelsPath(); if (!existsSync(modelsContainerDir)) { @@ -422,22 +443,18 @@ export class ModelsUsecases { model.model = modelId; if (!(await this.findOne(modelId))) await this.create(model); } else { - await this.populateHuggingFaceModel(modelId, files[0]); - const model = await this.findOne(modelId); - if (model) { - const fileUrl = join( - await this.fileManagerService.getModelsPath(), - normalizeModelId(modelId), - basename( - files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ?? - files[0].rfilename, - ), - ); - await this.update(modelId, { - files: [fileUrl], - name: modelId.replace(':main', ''), - }); - } + const fileUrl = join( + await this.fileManagerService.getModelsPath(), + normalizeModelId(modelId), + basename( + files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ?? + files[0].rfilename, + ), + ); + await this.populateHuggingFaceModel( + fileUrl, + modelId.replace(':main', ''), + ); } uploadModelMetadataSpiner.succeed('Model metadata updated'); const modelEvent: ModelEvent = { @@ -458,21 +475,18 @@ export class ModelsUsecases { * It could be a model from Jan's repo or other authors * @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b" */ - async populateHuggingFaceModel( - modelId: string, - modelVersion: HuggingFaceModelVersion, - ) { - if (!modelVersion) throw 'No expected quantization found'; - - const tokenizer = await getHFModelMetadata(modelVersion.downloadUrl!); + async populateHuggingFaceModel(ggufUrl: string, overridenId?: string) { + const metadata = await getHFModelMetadata(ggufUrl); - const stopWords: string[] = tokenizer?.stopWord ? [tokenizer.stopWord] : []; + const stopWords: string[] = metadata?.stopWord ? [metadata.stopWord] : []; + const modelId = + overridenId ?? (isLocalFile(ggufUrl) ? parse(ggufUrl).name : ggufUrl); const model: CreateModelDto = { - files: [modelVersion.downloadUrl ?? ''], + files: [ggufUrl], model: modelId, - name: modelId, - prompt_template: tokenizer?.promptTemplate, + name: metadata?.name ?? modelId, + prompt_template: metadata?.promptTemplate, stop: stopWords, // Default Inference Params diff --git a/cortex-js/src/usecases/threads/threads.usecases.ts b/cortex-js/src/usecases/threads/threads.usecases.ts index 0ffcfa522..7791b955e 100644 --- a/cortex-js/src/usecases/threads/threads.usecases.ts +++ b/cortex-js/src/usecases/threads/threads.usecases.ts @@ -21,7 +21,7 @@ export class ThreadsUsecases { @Inject('THREAD_REPOSITORY') private threadRepository: Repository, @Inject('MESSAGE_REPOSITORY') - private messageRepository: Repository, + private messageRepository: Repository, ) {} async create(createThreadDto: CreateThreadDto): Promise { @@ -131,7 +131,9 @@ export class ThreadsUsecases { updateMessageDto: UpdateMessageDto, ) { await this.getThreadOrThrow(threadId); - await this.messageRepository.update(updateMessageDto, { where: { id: messageId } }); + await this.messageRepository.update(updateMessageDto, { + where: { id: messageId }, + }); return this.messageRepository.findOne({ where: { id: messageId } }); } @@ -183,7 +185,10 @@ export class ThreadsUsecases { await this.threadRepository.destroy({ where: { id } }); } - async deleteMessage(_threadId: string, messageId: string): Promise { + async deleteMessage( + _threadId: string, + messageId: string, + ): Promise { await this.getMessageOrThrow(messageId); await this.messageRepository.destroy({ where: { id: messageId } }); @@ -195,7 +200,7 @@ export class ThreadsUsecases { } async retrieveMessage(_threadId: string, messageId: string) { - // we still allow user to delete message even if the thread is not there + // we still allow user to delete message even if the thread is not there return this.getMessageOrThrow(messageId); } diff --git a/cortex-js/src/utils/app-path.ts b/cortex-js/src/utils/app-path.ts index 08124fa46..d4fb0b1e0 100644 --- a/cortex-js/src/utils/app-path.ts +++ b/cortex-js/src/utils/app-path.ts @@ -35,9 +35,9 @@ export const llamaModelFile = ( } const path = 'llama_model_path' in artifact - ? (artifact as ModelArtifact).llama_model_path ?? '' + ? ((artifact as ModelArtifact).llama_model_path ?? '') : 'model_path' in files - ? (artifact as ModelArtifact).model_path ?? '' + ? ((artifact as ModelArtifact).model_path ?? '') : (artifact as string[])[0]; return basename(path); }; diff --git a/cortex-js/src/utils/cuda.ts b/cortex-js/src/utils/cuda.ts index c533414f9..679f61842 100644 --- a/cortex-js/src/utils/cuda.ts +++ b/cortex-js/src/utils/cuda.ts @@ -15,7 +15,6 @@ export type GpuSettingInfo = { * @returns CUDA Version 11 | 12 */ export const cudaVersion = async () => { - let filesCuda12: string[]; let filesCuda11: string[]; let paths: string[]; @@ -77,24 +76,27 @@ export const getCudaVersion = (): Promise => { // Execute the nvidia-smi command exec('nvidia-smi', (error, stdout) => { if (!error) { - const cudaVersionLine = stdout.split('\n').find(line => line.includes('CUDA Version')); - + const cudaVersionLine = stdout + .split('\n') + .find((line) => line.includes('CUDA Version')); + if (cudaVersionLine) { - // Extract the CUDA version number - const cudaVersionMatch = cudaVersionLine.match(/CUDA Version:\s+(\d+\.\d+)/); - if (cudaVersionMatch) { - const cudaVersion = cudaVersionMatch[1]; - resolve(cudaVersion); - } else { - reject('CUDA Version not found.'); - } - } else { + // Extract the CUDA version number + const cudaVersionMatch = cudaVersionLine.match( + /CUDA Version:\s+(\d+\.\d+)/, + ); + if (cudaVersionMatch) { + const cudaVersion = cudaVersionMatch[1]; + resolve(cudaVersion); + } else { reject('CUDA Version not found.'); + } + } else { + reject('CUDA Version not found.'); } } else { reject(error); } - }); }); }; diff --git a/cortex-js/src/utils/download-progress.ts b/cortex-js/src/utils/download-progress.ts index dd803f84f..68eafcd09 100644 --- a/cortex-js/src/utils/download-progress.ts +++ b/cortex-js/src/utils/download-progress.ts @@ -1,53 +1,69 @@ -import { Presets, SingleBar } from "cli-progress"; -import { Cortex } from "@cortexso/cortex.js"; +import { Presets, SingleBar } from 'cli-progress'; +import { Cortex } from '@cortexso/cortex.js'; import { exit, stdin, stdout } from 'node:process'; -import { DownloadState, DownloadStatus, DownloadType } from "@/domain/models/download.interface"; - -export const downloadProgress = async (cortex: Cortex, downloadId?: string, downloadType?: DownloadType) => { - const response = await cortex.events.downloadEvent(); - - const rl = require('readline').createInterface({ - input: stdin, - output: stdout, - }); - - rl.on('SIGINT', () => { - console.log('\nStopping download...'); - process.emit('SIGINT'); - }); - process.on('SIGINT', async () => { - if (downloadId){ - await cortex.models.abortDownload(downloadId); - } - exit(1); - }); - - const progressBar = new SingleBar({}, Presets.shades_classic); - progressBar.start(100, 0); - - for await (const stream of response) { - if (stream.length) { - const data = (stream.find((data: any) => data.id === downloadId || !downloadId) as DownloadState | undefined); - if (!data) continue; - if (downloadType && data.type !== downloadType) continue; - - if (data.status === DownloadStatus.Downloaded) break; - if(data.status === DownloadStatus.Error) { - rl.close(); - progressBar.stop(); - console.log('\n Download failed: ', data.error); - exit(1); - } - - let totalBytes = 0; - let totalTransferred = 0; - data.children.forEach((child: any) => { - totalBytes += child.size.total; - totalTransferred += child.size.transferred; - }); - progressBar.update(Math.floor((totalTransferred / (totalBytes || 1)) * 100)); +import { + DownloadState, + DownloadStatus, + DownloadType, +} from '@/domain/models/download.interface'; +import { isLocalFile } from './urls'; + +export const downloadProgress = async ( + cortex: Cortex, + downloadId?: string, + downloadType?: DownloadType, +) => { + // Do not update on local file symlink + if (downloadId && isLocalFile(downloadId)) return; + + const response = await cortex.events.downloadEvent(); + + const rl = require('readline').createInterface({ + input: stdin, + output: stdout, + }); + + rl.on('SIGINT', () => { + console.log('\nStopping download...'); + process.emit('SIGINT'); + }); + process.on('SIGINT', async () => { + if (downloadId) { + await cortex.models.abortDownload(downloadId); + } + exit(1); + }); + + const progressBar = new SingleBar({}, Presets.shades_classic); + progressBar.start(100, 0); + + for await (const stream of response) { + if (stream.length) { + const data = stream.find( + (data: any) => data.id === downloadId || !downloadId, + ) as DownloadState | undefined; + if (!data) continue; + if (downloadType && data.type !== downloadType) continue; + + if (data.status === DownloadStatus.Downloaded) break; + if (data.status === DownloadStatus.Error) { + rl.close(); + progressBar.stop(); + console.log('\n Download failed: ', data.error); + exit(1); } + + let totalBytes = 0; + let totalTransferred = 0; + data.children.forEach((child: any) => { + totalBytes += child.size.total; + totalTransferred += child.size.transferred; + }); + progressBar.update( + Math.floor((totalTransferred / (totalBytes || 1)) * 100), + ); } - progressBar.stop(); - rl.close(); -}; \ No newline at end of file + } + progressBar.stop(); + rl.close(); +}; diff --git a/cortex-js/src/utils/huggingface.ts b/cortex-js/src/utils/huggingface.ts index 85d1afe94..90383436b 100644 --- a/cortex-js/src/utils/huggingface.ts +++ b/cortex-js/src/utils/huggingface.ts @@ -21,9 +21,9 @@ import { ZEPHYR, ZEPHYR_JINJA, } from '@/infrastructure/constants/prompt-constants'; -import { gguf } from '@huggingface/gguf'; import axios from 'axios'; import { parseModelHubEngineBranch } from './normalize-model-id'; +import { closeSync, openSync, readSync } from 'fs'; // TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider? export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string { @@ -209,20 +209,29 @@ export async function getHFModelMetadata( ggufUrl: string, ): Promise { try { - const { metadata } = await gguf(ggufUrl); - // @ts-expect-error "tokenizer.ggml.eos_token_id" + let metadata: any; + const { ggufMetadata } = await import('hyllama'); + // Read first 10mb of gguf file + const fd = openSync(ggufUrl, 'r'); + const buffer = new Uint8Array(10_000_000); + readSync(fd, buffer, 0, 10_000_000, 0); + closeSync(fd); + + // Parse metadata and tensor info + ({ metadata } = ggufMetadata(buffer.buffer)); + const index = metadata['tokenizer.ggml.eos_token_id']; - // @ts-expect-error "tokenizer.ggml.eos_token_id" const hfChatTemplate = metadata['tokenizer.chat_template']; const promptTemplate = guessPromptTemplateFromHuggingFace(hfChatTemplate); - // @ts-expect-error "tokenizer.ggml.tokens" const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? ''; + const name = metadata['general.name']; const version: number = metadata['version']; return { stopWord, promptTemplate, version, + name, }; } catch (err) { console.log('Failed to get model metadata:', err.message); diff --git a/cortex-js/src/utils/init.ts b/cortex-js/src/utils/init.ts index 9eb27346e..022c3063e 100644 --- a/cortex-js/src/utils/init.ts +++ b/cortex-js/src/utils/init.ts @@ -7,7 +7,7 @@ import { checkNvidiaGPUExist } from './cuda'; * @returns */ export const defaultInstallationOptions = async (): Promise => { - let options: InitOptions = {}; + const options: InitOptions = {}; // Skip check if darwin if (process.platform === 'darwin') { diff --git a/cortex-js/src/utils/model-check.ts b/cortex-js/src/utils/model-check.ts index 53ef84d47..78ca84b60 100644 --- a/cortex-js/src/utils/model-check.ts +++ b/cortex-js/src/utils/model-check.ts @@ -1,8 +1,11 @@ -import { MIN_CUDA_VERSION } from "@/infrastructure/constants/cortex"; -import { getCudaVersion } from "./cuda"; -import ora from "ora"; +import { MIN_CUDA_VERSION } from '@/infrastructure/constants/cortex'; +import { getCudaVersion } from './cuda'; +import ora from 'ora'; -export const checkModelCompatibility = async (modelId: string, spinner?: ora.Ora) => { +export const checkModelCompatibility = async ( + modelId: string, + spinner?: ora.Ora, +) => { function log(message: string) { if (spinner) { spinner.fail(message); @@ -15,8 +18,8 @@ export const checkModelCompatibility = async (modelId: string, spinner?: ora.Ora process.exit(1); } - if (modelId.includes('tensorrt-llm') ) { - if(process.platform === 'darwin'){ + if (modelId.includes('tensorrt-llm')) { + if (process.platform === 'darwin') { log('Tensorrt-LLM models are not supported on this OS'); process.exit(1); } @@ -24,17 +27,21 @@ export const checkModelCompatibility = async (modelId: string, spinner?: ora.Ora try { const version = await getCudaVersion(); const [currentMajor, currentMinor] = version.split('.').map(Number); - const [requiredMajor, requiredMinor] = MIN_CUDA_VERSION.split('.').map(Number); - const isMatchRequired = currentMajor > requiredMajor || (currentMajor === requiredMajor && currentMinor >= requiredMinor); + const [requiredMajor, requiredMinor] = + MIN_CUDA_VERSION.split('.').map(Number); + const isMatchRequired = + currentMajor > requiredMajor || + (currentMajor === requiredMajor && currentMinor >= requiredMinor); if (!isMatchRequired) { - log(`CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`) + log( + `CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`, + ); process.exit(1); } - } catch (e) { - console.error(e.message ?? e); - log(e.message ?? e); - process.exit(1); - } - + } catch (e) { + console.error(e.message ?? e); + log(e.message ?? e); + process.exit(1); + } } }; diff --git a/cortex-js/src/utils/urls.ts b/cortex-js/src/utils/urls.ts index edadabf79..6559ef804 100644 --- a/cortex-js/src/utils/urls.ts +++ b/cortex-js/src/utils/urls.ts @@ -1,3 +1,5 @@ +import { isAbsolute } from 'path'; + /** * Check if a string is a valid URL. * @param input - The string to check. @@ -12,3 +14,12 @@ export function isValidUrl(input: string | undefined): boolean { return false; } } + +/** + * Check if the URL is a lcoal file path + * @param modelFiles + * @returns + */ +export const isLocalFile = (path: string): boolean => { + return !/^(http|https):\/\/[^/]+\/.*/.test(path) && isAbsolute(path); +}; diff --git a/cortex-js/tsconfig.json b/cortex-js/tsconfig.json index d6395629e..44f33f788 100644 --- a/cortex-js/tsconfig.json +++ b/cortex-js/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { - "module": "commonjs", - "moduleResolution": "node", + "module": "node16", + "moduleResolution": "node16", "declaration": true, "removeComments": true, "emitDecoratorMetadata": true,