From ca84724f2b3154fcc43eed6f3000b5f168b8718c Mon Sep 17 00:00:00 2001 From: marknguyen1302 Date: Tue, 27 Aug 2024 14:15:08 +0700 Subject: [PATCH 1/4] chore: rename js folder --- platform/.env.development | 0 platform/.env.example | 2 + platform/.eslintrc.cjs | 46 ++ platform/.gitignore | 60 ++ platform/.prettierrc | 4 + platform/CONTRIBUTING.md | 42 ++ platform/Makefile | 68 +++ platform/README.md | 142 +++++ platform/control.template | 9 + platform/cortex.ico | Bin 0 -> 15086 bytes platform/entitlements.plist | 33 + platform/installer.iss | 86 +++ platform/nest-cli.json | 17 + platform/package.json | 158 +++++ platform/patches/sqlite3+5.1.7.patch | 15 + platform/presets/alpaca.yml | 16 + platform/presets/chatml.yml | 18 + platform/presets/llama3.yml | 19 + platform/presets/vicuna.yml | 20 + platform/src/app.module.ts | 77 +++ platform/src/app.ts | 75 +++ platform/src/command.module.ts | 83 +++ platform/src/command.ts | 50 ++ .../src/domain/abstracts/engine.abstract.ts | 65 ++ .../domain/abstracts/extension.abstract.ts | 38 ++ platform/src/domain/abstracts/oai.abstract.ts | 70 +++ .../src/domain/config/config.interface.ts | 8 + .../src/domain/models/assistant.interface.ts | 12 + .../src/domain/models/download.interface.ts | 80 +++ .../domain/models/huggingface.interface.ts | 74 +++ .../models/inference-setting.interface.ts | 20 + .../src/domain/models/message.interface.ts | 13 + platform/src/domain/models/model.event.ts | 38 ++ platform/src/domain/models/model.interface.ts | 186 ++++++ .../models/prompt-template.interface.ts | 6 + .../src/domain/models/resource.interface.ts | 21 + .../src/domain/models/thread.interface.ts | 13 + .../repositories/extension.interface.ts | 4 + .../domain/repositories/model.interface.ts | 10 + .../repositories/repository.interface.ts | 11 + .../repositories/telemetry.interface.ts | 50 ++ .../domain/telemetry/telemetry.interface.ts | 115 ++++ platform/src/extensions/anthropic.engine.ts | 113 ++++ platform/src/extensions/cohere.engine.ts | 122 ++++ platform/src/extensions/extensions.module.ts | 40 ++ platform/src/extensions/groq.engine.ts | 49 ++ platform/src/extensions/martian.engine.ts | 49 ++ platform/src/extensions/mistral.engine.ts | 48 ++ platform/src/extensions/nvidia.engine.ts | 49 ++ platform/src/extensions/openai.engine.ts | 48 ++ platform/src/extensions/openrouter.engine.ts | 55 ++ platform/src/index.ts | 103 ++++ .../infrastructure/commanders/base.command.ts | 39 ++ .../commanders/benchmark.command.ts | 338 +++++++++++ .../infrastructure/commanders/chat.command.ts | 178 ++++++ .../commanders/cortex-command.commander.ts | 240 ++++++++ .../commanders/decorators/CommandContext.ts | 34 ++ .../commanders/embeddings.command.ts | 107 ++++ .../commanders/engines.command.ts | 87 +++ .../commanders/engines/engines-get.command.ts | 36 ++ .../engines/engines-init.command.ts | 79 +++ .../engines/engines-list.command.ts | 30 + .../commanders/engines/engines-set.command.ts | 28 + .../commanders/models.command.ts | 87 +++ .../commanders/models/model-get.command.ts | 29 + .../commanders/models/model-list.command.ts | 44 ++ .../commanders/models/model-pull.command.ts | 93 +++ .../commanders/models/model-remove.command.ts | 27 + .../commanders/models/model-start.command.ts | 115 ++++ .../commanders/models/model-stop.command.ts | 34 ++ .../commanders/models/model-update.command.ts | 74 +++ .../commanders/presets.command.ts | 31 + .../infrastructure/commanders/ps.command.ts | 146 +++++ .../commanders/questions/init.questions.ts | 40 ++ .../infrastructure/commanders/run.command.ts | 170 ++++++ .../commanders/serve-stop.command.ts | 18 + .../commanders/services/chat-client.ts | 193 ++++++ .../services/cortex.client.module.ts | 9 + .../commanders/services/cortex.client.ts | 19 + .../commanders/telemetry.command.ts | 44 ++ .../commanders/test/helpers.command.spec.ts | 146 +++++ .../commanders/test/log.service.ts | 8 + .../commanders/test/models.command.spec.ts | 131 ++++ .../types/benchmark-config.interface.ts | 32 + .../commanders/types/engine.interface.ts | 34 ++ .../types/init-options.interface.ts | 9 + .../commanders/types/model-stat.interface.ts | 8 + .../types/model-tokenizer.interface.ts | 8 + .../types/telemetry-options.interface.ts | 3 + .../src/infrastructure/constants/benchmark.ts | 36 ++ .../src/infrastructure/constants/cortex.ts | 64 ++ .../infrastructure/constants/huggingface.ts | 39 ++ .../constants/prompt-constants.ts | 45 ++ .../controllers/assistants.controller.spec.ts | 31 + .../controllers/assistants.controller.ts | 128 ++++ .../controllers/chat.controller.spec.ts | 41 ++ .../controllers/chat.controller.ts | 103 ++++ .../controllers/embeddings.controller.spec.ts | 44 ++ .../controllers/embeddings.controller.ts | 26 + .../controllers/engines.controller.spec.ts | 43 ++ .../controllers/engines.controller.ts | 122 ++++ .../controllers/models.controller.spec.ts | 44 ++ .../controllers/models.controller.ts | 303 +++++++++ .../controllers/system.controller.ts | 138 +++++ .../controllers/threads.controller.spec.ts | 22 + .../controllers/threads.controller.ts | 342 +++++++++++ .../database/database.module.ts | 18 + .../database/providers/assistant.providers.ts | 12 + .../database/providers/message.providers.ts | 12 + .../database/providers/thread.providers.ts | 12 + .../database/sqlite-database.providers.ts | 28 + .../dtos/assistants/create-assistant.dto.ts | 84 +++ .../dtos/assistants/delete-assistant.dto.ts | 22 + .../dtos/assistants/model-setting.dto.ts | 208 +++++++ .../dtos/chat/chat-completion-message.dto.ts | 14 + .../dtos/chat/chat-completion-response.dto.ts | 52 ++ .../infrastructure/dtos/chat/choice.dto.ts | 25 + .../dtos/chat/create-chat-completion.dto.ts | 90 +++ .../dtos/chat/embeddings-response.dto.ts | 30 + .../infrastructure/dtos/chat/message.dto.ts | 17 + .../src/infrastructure/dtos/chat/usage.dto.ts | 24 + .../dtos/common/common-response.dto.ts | 10 + .../dtos/configs/config-update.dto.ts | 22 + .../cortex-operation-successfully.dto.ts | 13 + .../dtos/cortex/start-cortex.dto.ts | 27 + .../dtos/embeddings/embeddings-request.dto.ts | 37 ++ .../dtos/engines/engines.dto.ts | 106 ++++ .../dtos/messages/create-message.dto.ts | 58 ++ .../dtos/messages/delete-message.dto.ts | 22 + .../dtos/messages/get-message.dto.ts | 91 +++ .../dtos/messages/list-message.dto.ts | 95 +++ .../dtos/messages/update-message.dto.ts | 4 + .../dtos/models/create-model.dto.ts | 158 +++++ .../dtos/models/delete-model.dto.ts | 22 + .../dtos/models/download-model.dto.ts | 9 + .../dtos/models/list-model-response.dto.ts | 10 + .../dtos/models/model-artifact.dto.ts | 17 + .../dtos/models/model-settings.dto.ts | 142 +++++ .../infrastructure/dtos/models/model.dto.ts | 204 +++++++ .../dtos/models/start-model-success.dto.ts | 15 + .../dtos/models/update-model.dto.ts | 4 + platform/src/infrastructure/dtos/page.dto.ts | 28 + .../dtos/threads/create-message.dto.ts | 17 + .../threads/create-thread-assistant.dto.ts | 134 ++++ .../dtos/threads/create-thread.dto.ts | 10 + .../dtos/threads/delete-message.dto.ts | 22 + .../dtos/threads/delete-thread.dto.ts | 22 + .../dtos/threads/get-thread.dto.ts | 39 ++ .../dtos/threads/update-message.dto.ts | 4 + .../dtos/threads/update-thread.dto.ts | 4 + .../entities/assistant.entity.ts | 96 +++ .../infrastructure/entities/message.entity.ts | 94 +++ .../infrastructure/entities/thread.entity.ts | 54 ++ .../duplicate-assistant.exception.ts | 10 + .../exception/global.exception.ts | 39 ++ .../exception/model-not-found.exception.ts | 7 + .../model-setting-not-found.exception.ts | 7 + .../interceptors/transform.interceptor.ts | 33 + .../middlewares/app.logger.middleware.ts | 40 ++ .../providers/cortex/cortex.module.ts | 30 + .../providers/cortex/cortex.provider.ts | 183 ++++++ .../providers/cortex/llamacpp.provider.ts | 11 + .../providers/cortex/onnx.provider.ts | 11 + .../providers/cortex/tensorrtllm.provider.ts | 11 + .../extensions/extension.module.ts | 24 + .../extensions/extension.repository.ts | 159 +++++ .../repositories/models/model.module.ts | 24 + .../repositories/models/model.repository.ts | 194 ++++++ .../telemetry/telemetry.repository.ts | 276 +++++++++ .../services/context/context.module.ts | 9 + .../services/context/context.service.ts | 21 + .../download-manager.module.ts | 10 + .../download-manager.service.spec.ts | 21 + .../download-manager.service.ts | 348 +++++++++++ .../file-manager/file-manager.module.ts | 8 + .../file-manager/file-manager.service.spec.ts | 18 + .../file-manager/file-manager.service.ts | 406 +++++++++++++ .../resources-manager.module.ts | 8 + .../resources-manager.service.ts | 35 ++ platform/src/main.ts | 23 + .../usecases/assistants/assistants.module.ts | 12 + .../assistants/assistants.usecases.spec.ts | 29 + .../assistants/assistants.usecases.ts | 96 +++ platform/src/usecases/chat/chat.module.ts | 25 + .../src/usecases/chat/chat.usecases.spec.ts | 40 ++ platform/src/usecases/chat/chat.usecases.ts | 111 ++++ .../exception/invalid-api-url.exception.ts | 10 + .../src/usecases/configs/configs.module.ts | 11 + .../src/usecases/configs/configs.usecase.ts | 84 +++ platform/src/usecases/cortex/cortex.module.ts | 11 + .../usecases/cortex/cortex.usecases.spec.ts | 23 + .../src/usecases/cortex/cortex.usecases.ts | 254 ++++++++ .../src/usecases/engines/engines.module.ts | 22 + .../src/usecases/engines/engines.usecase.ts | 305 ++++++++++ .../src/usecases/messages/messages.module.ts | 11 + .../messages/messages.usecases.spec.ts | 21 + .../usecases/messages/messages.usecases.ts | 70 +++ platform/src/usecases/models/models.module.ts | 29 + .../usecases/models/models.usecases.spec.ts | 46 ++ .../src/usecases/models/models.usecases.ts | 574 ++++++++++++++++++ .../usecases/telemetry/telemetry.module.ts | 20 + .../usecases/telemetry/telemetry.usecases.ts | 235 +++++++ .../src/usecases/threads/threads.module.ts | 11 + .../usecases/threads/threads.usecases.spec.ts | 20 + .../src/usecases/threads/threads.usecases.ts | 269 ++++++++ platform/src/utils/app-path.ts | 43 ++ platform/src/utils/cortex-cpp.ts | 7 + platform/src/utils/cuda.ts | 148 +++++ platform/src/utils/download-progress.ts | 69 +++ platform/src/utils/health.ts | 36 ++ platform/src/utils/huggingface.ts | 200 ++++++ platform/src/utils/init.ts | 19 + platform/src/utils/logs.ts | 80 +++ platform/src/utils/model-check.ts | 63 ++ platform/src/utils/model-parameter.parser.ts | 74 +++ platform/src/utils/normalize-model-id.ts | 48 ++ platform/src/utils/request.ts | 22 + platform/src/utils/system-resource.ts | 16 + platform/src/utils/urls.ts | 25 + platform/test/jest-e2e.json | 13 + platform/test/models.e2e-spec.ts | 24 + platform/tsconfig.build.json | 4 + platform/tsconfig.json | 28 + platform/uninstall.js | 40 ++ 224 files changed, 14371 insertions(+) create mode 100644 platform/.env.development create mode 100644 platform/.env.example create mode 100644 platform/.eslintrc.cjs create mode 100644 platform/.gitignore create mode 100644 platform/.prettierrc create mode 100644 platform/CONTRIBUTING.md create mode 100644 platform/Makefile create mode 100644 platform/README.md create mode 100644 platform/control.template create mode 100644 platform/cortex.ico create mode 100644 platform/entitlements.plist create mode 100644 platform/installer.iss create mode 100644 platform/nest-cli.json create mode 100644 platform/package.json create mode 100644 platform/patches/sqlite3+5.1.7.patch create mode 100644 platform/presets/alpaca.yml create mode 100644 platform/presets/chatml.yml create mode 100644 platform/presets/llama3.yml create mode 100644 platform/presets/vicuna.yml create mode 100644 platform/src/app.module.ts create mode 100644 platform/src/app.ts create mode 100644 platform/src/command.module.ts create mode 100644 platform/src/command.ts create mode 100644 platform/src/domain/abstracts/engine.abstract.ts create mode 100644 platform/src/domain/abstracts/extension.abstract.ts create mode 100644 platform/src/domain/abstracts/oai.abstract.ts create mode 100644 platform/src/domain/config/config.interface.ts create mode 100644 platform/src/domain/models/assistant.interface.ts create mode 100644 platform/src/domain/models/download.interface.ts create mode 100644 platform/src/domain/models/huggingface.interface.ts create mode 100644 platform/src/domain/models/inference-setting.interface.ts create mode 100644 platform/src/domain/models/message.interface.ts create mode 100644 platform/src/domain/models/model.event.ts create mode 100644 platform/src/domain/models/model.interface.ts create mode 100644 platform/src/domain/models/prompt-template.interface.ts create mode 100644 platform/src/domain/models/resource.interface.ts create mode 100644 platform/src/domain/models/thread.interface.ts create mode 100644 platform/src/domain/repositories/extension.interface.ts create mode 100644 platform/src/domain/repositories/model.interface.ts create mode 100644 platform/src/domain/repositories/repository.interface.ts create mode 100644 platform/src/domain/repositories/telemetry.interface.ts create mode 100644 platform/src/domain/telemetry/telemetry.interface.ts create mode 100644 platform/src/extensions/anthropic.engine.ts create mode 100644 platform/src/extensions/cohere.engine.ts create mode 100644 platform/src/extensions/extensions.module.ts create mode 100644 platform/src/extensions/groq.engine.ts create mode 100644 platform/src/extensions/martian.engine.ts create mode 100644 platform/src/extensions/mistral.engine.ts create mode 100644 platform/src/extensions/nvidia.engine.ts create mode 100644 platform/src/extensions/openai.engine.ts create mode 100644 platform/src/extensions/openrouter.engine.ts create mode 100644 platform/src/index.ts create mode 100644 platform/src/infrastructure/commanders/base.command.ts create mode 100644 platform/src/infrastructure/commanders/benchmark.command.ts create mode 100644 platform/src/infrastructure/commanders/chat.command.ts create mode 100644 platform/src/infrastructure/commanders/cortex-command.commander.ts create mode 100644 platform/src/infrastructure/commanders/decorators/CommandContext.ts create mode 100644 platform/src/infrastructure/commanders/embeddings.command.ts create mode 100644 platform/src/infrastructure/commanders/engines.command.ts create mode 100644 platform/src/infrastructure/commanders/engines/engines-get.command.ts create mode 100644 platform/src/infrastructure/commanders/engines/engines-init.command.ts create mode 100644 platform/src/infrastructure/commanders/engines/engines-list.command.ts create mode 100644 platform/src/infrastructure/commanders/engines/engines-set.command.ts create mode 100644 platform/src/infrastructure/commanders/models.command.ts create mode 100644 platform/src/infrastructure/commanders/models/model-get.command.ts create mode 100644 platform/src/infrastructure/commanders/models/model-list.command.ts create mode 100644 platform/src/infrastructure/commanders/models/model-pull.command.ts create mode 100644 platform/src/infrastructure/commanders/models/model-remove.command.ts create mode 100644 platform/src/infrastructure/commanders/models/model-start.command.ts create mode 100644 platform/src/infrastructure/commanders/models/model-stop.command.ts create mode 100644 platform/src/infrastructure/commanders/models/model-update.command.ts create mode 100644 platform/src/infrastructure/commanders/presets.command.ts create mode 100644 platform/src/infrastructure/commanders/ps.command.ts create mode 100644 platform/src/infrastructure/commanders/questions/init.questions.ts create mode 100644 platform/src/infrastructure/commanders/run.command.ts create mode 100644 platform/src/infrastructure/commanders/serve-stop.command.ts create mode 100644 platform/src/infrastructure/commanders/services/chat-client.ts create mode 100644 platform/src/infrastructure/commanders/services/cortex.client.module.ts create mode 100644 platform/src/infrastructure/commanders/services/cortex.client.ts create mode 100644 platform/src/infrastructure/commanders/telemetry.command.ts create mode 100644 platform/src/infrastructure/commanders/test/helpers.command.spec.ts create mode 100644 platform/src/infrastructure/commanders/test/log.service.ts create mode 100644 platform/src/infrastructure/commanders/test/models.command.spec.ts create mode 100644 platform/src/infrastructure/commanders/types/benchmark-config.interface.ts create mode 100644 platform/src/infrastructure/commanders/types/engine.interface.ts create mode 100644 platform/src/infrastructure/commanders/types/init-options.interface.ts create mode 100644 platform/src/infrastructure/commanders/types/model-stat.interface.ts create mode 100644 platform/src/infrastructure/commanders/types/model-tokenizer.interface.ts create mode 100644 platform/src/infrastructure/commanders/types/telemetry-options.interface.ts create mode 100644 platform/src/infrastructure/constants/benchmark.ts create mode 100644 platform/src/infrastructure/constants/cortex.ts create mode 100644 platform/src/infrastructure/constants/huggingface.ts create mode 100644 platform/src/infrastructure/constants/prompt-constants.ts create mode 100644 platform/src/infrastructure/controllers/assistants.controller.spec.ts create mode 100644 platform/src/infrastructure/controllers/assistants.controller.ts create mode 100644 platform/src/infrastructure/controllers/chat.controller.spec.ts create mode 100644 platform/src/infrastructure/controllers/chat.controller.ts create mode 100644 platform/src/infrastructure/controllers/embeddings.controller.spec.ts create mode 100644 platform/src/infrastructure/controllers/embeddings.controller.ts create mode 100644 platform/src/infrastructure/controllers/engines.controller.spec.ts create mode 100644 platform/src/infrastructure/controllers/engines.controller.ts create mode 100644 platform/src/infrastructure/controllers/models.controller.spec.ts create mode 100644 platform/src/infrastructure/controllers/models.controller.ts create mode 100644 platform/src/infrastructure/controllers/system.controller.ts create mode 100644 platform/src/infrastructure/controllers/threads.controller.spec.ts create mode 100644 platform/src/infrastructure/controllers/threads.controller.ts create mode 100644 platform/src/infrastructure/database/database.module.ts create mode 100644 platform/src/infrastructure/database/providers/assistant.providers.ts create mode 100644 platform/src/infrastructure/database/providers/message.providers.ts create mode 100644 platform/src/infrastructure/database/providers/thread.providers.ts create mode 100644 platform/src/infrastructure/database/sqlite-database.providers.ts create mode 100644 platform/src/infrastructure/dtos/assistants/create-assistant.dto.ts create mode 100644 platform/src/infrastructure/dtos/assistants/delete-assistant.dto.ts create mode 100644 platform/src/infrastructure/dtos/assistants/model-setting.dto.ts create mode 100644 platform/src/infrastructure/dtos/chat/chat-completion-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/chat/chat-completion-response.dto.ts create mode 100644 platform/src/infrastructure/dtos/chat/choice.dto.ts create mode 100644 platform/src/infrastructure/dtos/chat/create-chat-completion.dto.ts create mode 100644 platform/src/infrastructure/dtos/chat/embeddings-response.dto.ts create mode 100644 platform/src/infrastructure/dtos/chat/message.dto.ts create mode 100644 platform/src/infrastructure/dtos/chat/usage.dto.ts create mode 100644 platform/src/infrastructure/dtos/common/common-response.dto.ts create mode 100644 platform/src/infrastructure/dtos/configs/config-update.dto.ts create mode 100644 platform/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts create mode 100644 platform/src/infrastructure/dtos/cortex/start-cortex.dto.ts create mode 100644 platform/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts create mode 100644 platform/src/infrastructure/dtos/engines/engines.dto.ts create mode 100644 platform/src/infrastructure/dtos/messages/create-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/messages/delete-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/messages/get-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/messages/list-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/messages/update-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/create-model.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/delete-model.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/download-model.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/list-model-response.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/model-artifact.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/model-settings.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/model.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/start-model-success.dto.ts create mode 100644 platform/src/infrastructure/dtos/models/update-model.dto.ts create mode 100644 platform/src/infrastructure/dtos/page.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/create-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/create-thread.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/delete-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/delete-thread.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/get-thread.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/update-message.dto.ts create mode 100644 platform/src/infrastructure/dtos/threads/update-thread.dto.ts create mode 100644 platform/src/infrastructure/entities/assistant.entity.ts create mode 100644 platform/src/infrastructure/entities/message.entity.ts create mode 100644 platform/src/infrastructure/entities/thread.entity.ts create mode 100644 platform/src/infrastructure/exception/duplicate-assistant.exception.ts create mode 100644 platform/src/infrastructure/exception/global.exception.ts create mode 100644 platform/src/infrastructure/exception/model-not-found.exception.ts create mode 100644 platform/src/infrastructure/exception/model-setting-not-found.exception.ts create mode 100644 platform/src/infrastructure/interceptors/transform.interceptor.ts create mode 100644 platform/src/infrastructure/middlewares/app.logger.middleware.ts create mode 100644 platform/src/infrastructure/providers/cortex/cortex.module.ts create mode 100644 platform/src/infrastructure/providers/cortex/cortex.provider.ts create mode 100644 platform/src/infrastructure/providers/cortex/llamacpp.provider.ts create mode 100644 platform/src/infrastructure/providers/cortex/onnx.provider.ts create mode 100644 platform/src/infrastructure/providers/cortex/tensorrtllm.provider.ts create mode 100644 platform/src/infrastructure/repositories/extensions/extension.module.ts create mode 100644 platform/src/infrastructure/repositories/extensions/extension.repository.ts create mode 100644 platform/src/infrastructure/repositories/models/model.module.ts create mode 100644 platform/src/infrastructure/repositories/models/model.repository.ts create mode 100644 platform/src/infrastructure/repositories/telemetry/telemetry.repository.ts create mode 100644 platform/src/infrastructure/services/context/context.module.ts create mode 100644 platform/src/infrastructure/services/context/context.service.ts create mode 100644 platform/src/infrastructure/services/download-manager/download-manager.module.ts create mode 100644 platform/src/infrastructure/services/download-manager/download-manager.service.spec.ts create mode 100644 platform/src/infrastructure/services/download-manager/download-manager.service.ts create mode 100644 platform/src/infrastructure/services/file-manager/file-manager.module.ts create mode 100644 platform/src/infrastructure/services/file-manager/file-manager.service.spec.ts create mode 100644 platform/src/infrastructure/services/file-manager/file-manager.service.ts create mode 100644 platform/src/infrastructure/services/resources-manager/resources-manager.module.ts create mode 100644 platform/src/infrastructure/services/resources-manager/resources-manager.service.ts create mode 100644 platform/src/main.ts create mode 100644 platform/src/usecases/assistants/assistants.module.ts create mode 100644 platform/src/usecases/assistants/assistants.usecases.spec.ts create mode 100644 platform/src/usecases/assistants/assistants.usecases.ts create mode 100644 platform/src/usecases/chat/chat.module.ts create mode 100644 platform/src/usecases/chat/chat.usecases.spec.ts create mode 100644 platform/src/usecases/chat/chat.usecases.ts create mode 100644 platform/src/usecases/chat/exception/invalid-api-url.exception.ts create mode 100644 platform/src/usecases/configs/configs.module.ts create mode 100644 platform/src/usecases/configs/configs.usecase.ts create mode 100644 platform/src/usecases/cortex/cortex.module.ts create mode 100644 platform/src/usecases/cortex/cortex.usecases.spec.ts create mode 100644 platform/src/usecases/cortex/cortex.usecases.ts create mode 100644 platform/src/usecases/engines/engines.module.ts create mode 100644 platform/src/usecases/engines/engines.usecase.ts create mode 100644 platform/src/usecases/messages/messages.module.ts create mode 100644 platform/src/usecases/messages/messages.usecases.spec.ts create mode 100644 platform/src/usecases/messages/messages.usecases.ts create mode 100644 platform/src/usecases/models/models.module.ts create mode 100644 platform/src/usecases/models/models.usecases.spec.ts create mode 100644 platform/src/usecases/models/models.usecases.ts create mode 100644 platform/src/usecases/telemetry/telemetry.module.ts create mode 100644 platform/src/usecases/telemetry/telemetry.usecases.ts create mode 100644 platform/src/usecases/threads/threads.module.ts create mode 100644 platform/src/usecases/threads/threads.usecases.spec.ts create mode 100644 platform/src/usecases/threads/threads.usecases.ts create mode 100644 platform/src/utils/app-path.ts create mode 100644 platform/src/utils/cortex-cpp.ts create mode 100644 platform/src/utils/cuda.ts create mode 100644 platform/src/utils/download-progress.ts create mode 100644 platform/src/utils/health.ts create mode 100644 platform/src/utils/huggingface.ts create mode 100644 platform/src/utils/init.ts create mode 100644 platform/src/utils/logs.ts create mode 100644 platform/src/utils/model-check.ts create mode 100644 platform/src/utils/model-parameter.parser.ts create mode 100644 platform/src/utils/normalize-model-id.ts create mode 100644 platform/src/utils/request.ts create mode 100644 platform/src/utils/system-resource.ts create mode 100644 platform/src/utils/urls.ts create mode 100644 platform/test/jest-e2e.json create mode 100644 platform/test/models.e2e-spec.ts create mode 100644 platform/tsconfig.build.json create mode 100644 platform/tsconfig.json create mode 100644 platform/uninstall.js diff --git a/platform/.env.development b/platform/.env.development new file mode 100644 index 000000000..e69de29bb diff --git a/platform/.env.example b/platform/.env.example new file mode 100644 index 000000000..d0666607c --- /dev/null +++ b/platform/.env.example @@ -0,0 +1,2 @@ +EXTENSIONS_PATH= +CORTEX_MODELS_DIR= diff --git a/platform/.eslintrc.cjs b/platform/.eslintrc.cjs new file mode 100644 index 000000000..f116c675b --- /dev/null +++ b/platform/.eslintrc.cjs @@ -0,0 +1,46 @@ +module.exports = { + parser: '@typescript-eslint/parser', + parserOptions: { + project: 'tsconfig.json', + tsconfigRootDir: __dirname, + sourceType: 'module', + }, + plugins: ['@typescript-eslint/eslint-plugin'], + extends: [ + 'plugin:@typescript-eslint/recommended', + 'plugin:prettier/recommended', + ], + root: true, + env: { + node: true, + jest: true, + }, + ignorePatterns: ['.eslintrc.js'], + rules: { + '@typescript-eslint/interface-name-prefix': 'off', + '@typescript-eslint/explicit-function-return-type': 'off', + '@typescript-eslint/explicit-module-boundary-types': 'off', + '@typescript-eslint/no-explicit-any': 'off', + '@typescript-eslint/no-unused-vars': ['warn'], + '@typescript-eslint/no-floating-promises': 'warn', + '@typescript-eslint/no-var-requires': 'warn', + '@typescript-eslint/ban-types': 'warn', + 'no-unused-vars': 'off', + 'require-await': 'off', + 'prefer-const': 'warn', + 'no-restricted-syntax': [ + 'warn', + { + selector: + 'CallExpression[callee.object.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true])), CallExpression[callee.object.property.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true]))', + message: + 'Add "{ infer: true }" to configService.get() for correct typechecking. Example: configService.get("database.port", { infer: true })', + }, + { + selector: + 'CallExpression[callee.name=it][arguments.0.value!=/^should/]', + message: '"it" should start with "should"', + }, + ], + }, +}; diff --git a/platform/.gitignore b/platform/.gitignore new file mode 100644 index 000000000..6b6383708 --- /dev/null +++ b/platform/.gitignore @@ -0,0 +1,60 @@ +# compiled output +/dist +/node_modules +/build + +# Logs +logs +*.log +npm-debug.log* +pnpm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# OS +.DS_Store + +# Tests +/coverage +/.nyc_output + +# IDEs and editors +/.idea +.project +.classpath +.c9/ +*.launch +.settings/ +*.sublime-workspace + +# IDE - VSCode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# temp directory +.temp +.tmp + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# cortex-js +cortex.db +dist diff --git a/platform/.prettierrc b/platform/.prettierrc new file mode 100644 index 000000000..dcb72794f --- /dev/null +++ b/platform/.prettierrc @@ -0,0 +1,4 @@ +{ + "singleQuote": true, + "trailingComma": "all" +} \ No newline at end of file diff --git a/platform/CONTRIBUTING.md b/platform/CONTRIBUTING.md new file mode 100644 index 000000000..53909c391 --- /dev/null +++ b/platform/CONTRIBUTING.md @@ -0,0 +1,42 @@ +### Repo Structure +``` +# Entity Definitions +domain/ # This is the core directory where the domains are defined. + abstracts/ # Abstract base classes for common attributes and methods. + models/ # Domain interface definitions, e.g. model, assistant. + repositories/ # Extensions abstract and interface + +# Business Rules +usecases/ # Application logic + assistants/ # CRUD logic (invokes dtos, entities). + chat/ # Logic for chat functionalities. + models/ # Logic for model operations. + +# Adapters & Implementations +infrastructure/ # Implementations for Cortex interactions + commanders/ # CLI handlers + models/ + questions/ # CLI installation UX + shortcuts/ # CLI chained syntax + types/ + usecases/ # Invokes UseCases + + controllers/ # Nest controllers and HTTP routes + assistants/ # Invokes UseCases + chat/ # Invokes UseCases + models/ # Invokes UseCases + + database/ # Database providers (mysql, sqlite) + + # Framework specific object definitions + dtos/ # DTO definitions (data transfer & validation) + entities/ # TypeORM entity definitions (db schema) + + # Providers + providers/cortex # Cortex [server] provider (a core extension) + repositories/extensions # Extension provider (core & external extensions) + +extensions/ # External extensions +command.module.ts # CLI Commands List +main.ts # Entrypoint +``` \ No newline at end of file diff --git a/platform/Makefile b/platform/Makefile new file mode 100644 index 000000000..2d9d7bcec --- /dev/null +++ b/platform/Makefile @@ -0,0 +1,68 @@ +CODE_SIGN ?= false +AZURE_KEY_VAULT_URI ?= xxxx +AZURE_CLIENT_ID ?= xxxx +AZURE_TENANT_ID ?= xxxx +AZURE_CLIENT_SECRET ?= xxxx +AZURE_CERT_NAME ?= xxxx +DEVELOPER_ID ?= xxxx +CORTEX_EXE_IN ?= "dist/cortexso.exe" +CORTEX_EXE_OUT ?= "cortex.exe" +CORTEX_VERSION ?= "0.0.0.1" + +update-app-info: +ifeq ($(OS),Windows_NT) + @powershell -Command 'npx resedit --in $(CORTEX_EXE_IN) --out $(CORTEX_EXE_OUT) --icon "1,cortex.ico" --no-grow --company-name "Homebrew Computer Pte Ltd" --file-description "cortex cli" --file-version "$(CORTEX_VERSION)" --internal-name "cortex" --product-name "cortex" --product-version "$(CORTEX_VERSION)"' +else ifeq ($(shell uname -s),Linux) + @cp ./dist/cortexso ./cortex +endif + +codesign-binary: +ifeq ($(CODE_SIGN),false) + @echo "Skipping Code Sign" + @exit 0 +endif + +ifeq ($(OS),Windows_NT) + @powershell -Command "dotnet tool install --global AzureSignTool;" + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\cortex.exe";' +else ifeq ($(shell uname -s),Linux) + @echo "Skipping Code Sign for linux" + @exit 0 +else + codesign --force -s "$(DEVELOPER_ID)" --options=runtime --entitlements="./entitlements.plist" ./cortex; +endif + +codesign-installer: +ifeq ($(CODE_SIGN),false) + @echo "Skipping Code Sign" + @exit 0 +endif + +ifeq ($(OS),Windows_NT) + @powershell -Command "dotnet tool install --global AzureSignTool;" + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\setup.exe";' +else ifeq ($(shell uname -s),Linux) + @echo "Skipping Code Sign for linux" + @exit 0 +else + productsign --sign "Developer ID Installer: $(DEVELOPER_ID)" cortex-installer.pkg cortex-installer-signed.pkg; + rm cortex-installer.pkg; + mv cortex-installer-signed.pkg cortex-installer.pkg; +endif + +postbundle: +ifeq ($(RUN_TESTS),false) + @echo "Skipping tests" + @exit 0 +endif +ifeq ($(OS),Windows_NT) + @powershell -Command "7z a -ttar temp.tar cortex.exe; 7z a -tgzip cortex.tar.gz temp.tar;" + @powershell -Command "7z a -ttar temp2.tar setup.exe; 7z a -tgzip cortex-installer.tar.gz temp2.tar;" +else ifeq ($(shell uname -s),Linux) + @chmod +x cortex; \ + tar -czvf cortex.tar.gz cortex; +else + @chmod +x cortex; \ + tar -czvf cortex.tar.gz cortex; \ + tar -czvf cortex-installer.tar.gz cortex-installer.pkg; +endif \ No newline at end of file diff --git a/platform/README.md b/platform/README.md new file mode 100644 index 000000000..660664159 --- /dev/null +++ b/platform/README.md @@ -0,0 +1,142 @@ +# Cortex +

+ cortex-cpplogo +

+ +

+ Documentation - API Reference + - Changelog - Bug reports - Discord +

+ +> ⚠️ **Cortex is currently in Development**: Expect breaking changes and bugs! + +## About +Cortex is an OpenAI-compatible AI engine that developers can use to build LLM apps. It is packaged with a Docker-inspired command-line interface and client libraries. It can be used as a standalone server or imported as a library. + +## Cortex Engines +Cortex supports the following engines: +- [`cortex.llamacpp`](https://github.com/janhq/cortex.llamacpp): `cortex.llamacpp` library is a C++ inference tool that can be dynamically loaded by any server at runtime. We use this engine to support GGUF inference with GGUF models. The `llama.cpp` is optimized for performance on both CPU and GPU. +- [`cortex.onnx` Repository](https://github.com/janhq/cortex.onnx): `cortex.onnx` is a C++ inference library for Windows that leverages `onnxruntime-genai` and uses DirectML to provide GPU acceleration across a wide range of hardware and drivers, including AMD, Intel, NVIDIA, and Qualcomm GPUs. +- [`cortex.tensorrt-llm`](https://github.com/janhq/cortex.tensorrt-llm): `cortex.tensorrt-llm` is a C++ inference library designed for NVIDIA GPUs. It incorporates NVIDIA’s TensorRT-LLM for GPU-accelerated inference. + +## Quicklinks + +- [Homepage](https://cortex.so/) +- [Docs](https://cortex.so/docs/) + +## Quickstart +### Prerequisites +- **OS**: + - MacOSX 13.6 or higher. + - Windows 10 or higher. + - Ubuntu 22.04 and later. +- **Dependencies**: + - **Node.js**: Version 18 and above is required to run the installation. + - **NPM**: Needed to manage packages. + - **CPU Instruction Sets**: Available for download from the [Cortex GitHub Releases](https://github.com/janhq/cortex/releases) page. + - **OpenMPI**: Required for Linux. Install by using the following command: + ```bash + sudo apt install openmpi-bin libopenmpi-dev + ``` + +> Visit [Quickstart](https://cortex.so/docs/quickstart) to get started. + +### NPM +``` bash +# Install using NPM +npm i -g cortexso +# Run model +cortex run mistral +# To uninstall globally using NPM +npm uninstall -g cortexso +``` + +### Homebrew +``` bash +# Install using Brew +brew install cortexso +# Run model +cortex run mistral +# To uninstall using Brew +brew uninstall cortexso +``` +> You can also install Cortex using the Cortex Installer available on [GitHub Releases](https://github.com/janhq/cortex/releases). + +## Cortex Server +```bash +cortex serve + +# Output +# Started server at http://localhost:1337 +# Swagger UI available at http://localhost:1337/api +``` + +You can now access the Cortex API server at `http://localhost:1337`, +and the Swagger UI at `http://localhost:1337/api`. + +## Build from Source + +To install Cortex from the source, follow the steps below: + +1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev). +2. Navigate to the `cortex-js` folder. +3. Open the terminal and run the following command to build the Cortex project: + +```bash +npx nest build +``` + +4. Make the `command.js` executable: + +```bash +chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js' +``` + +5. Link the package globally: + +```bash +npm link +``` + +## Cortex CLI Commands + +The following CLI commands are currently available. +See [CLI Reference Docs](https://cortex.so/docs/cli) for more information. + +```bash + + serve Providing API endpoint for Cortex backend. + chat Send a chat request to a model. + init|setup Init settings and download cortex's dependencies. + ps Show running models and their status. + kill Kill running cortex processes. + pull|download Download a model. Working with HuggingFace model id. + run [options] EXPERIMENTAL: Shortcut to start a model and chat. + models Subcommands for managing models. + models list List all available models. + models pull Download a specified model. + models remove Delete a specified model. + models get Retrieve the configuration of a specified model. + models start Start a specified model. + models stop Stop a specified model. + models update Update the configuration of a specified model. + benchmark Benchmark and analyze the performance of a specific AI model using your system. + presets Show all the available model presets within Cortex. + telemetry Retrieve telemetry logs for monitoring and analysis. + embeddings Creates an embedding vector representing the input text. + engines Subcommands for managing engines. + engines get Get an engine details. + engines list Get all the available Cortex engines. + engines init Setup and download the required dependencies to run cortex engines. + configs Subcommands for managing configurations. + configs get Get a configuration details. + configs list Get all the available configurations. + configs set Set a configuration. +``` + +## Contact Support +- For support, please file a [GitHub ticket](https://github.com/janhq/cortex/issues/new/choose). +- For questions, join our Discord [here](https://discord.gg/FTk2MvZwJH). +- For long-form inquiries, please email [hello@jan.ai](mailto:hello@jan.ai). + + diff --git a/platform/control.template b/platform/control.template new file mode 100644 index 000000000..92a6e7a2a --- /dev/null +++ b/platform/control.template @@ -0,0 +1,9 @@ +Package: cortexso +Version: +Section: base +Priority: optional +Architecture: amd64 +Depends: openmpi-bin,libopenmpi-dev +Maintainer: Homebrew Pte Ltd +Description: Cortex + Cortex is an OpenAI-compatible AI engine that developers can use to build LLM apps. It is packaged with a Docker-inspired command-line interface and client libraries. It can be used as a standalone server or imported as a library. diff --git a/platform/cortex.ico b/platform/cortex.ico new file mode 100644 index 0000000000000000000000000000000000000000..d9498120a6bf064f4f83889c9e7918163ce32209 GIT binary patch literal 15086 zcmeHOZA@EL7``1Q{xC7~m&V1UKUgI3tELIT43ipEKp~=Id?^g7d=z80*@clgTm{BZ z93X|ZK={~%jR6ibw(bYu7ElIlLHSz2F=aC50#UM!kCwt-&$;zp+CWKfZ(Az*!`I5Cp~_kI~lv=+S1;#Xdv zIpXn#;%Cc$q$L-=Irt%Ga7lfTpYHYZB=${mo5K_iVF0Y2C&>{qx(M9?>$l8@2lha+l)UCi~g`;0a!MB9Am~2 zx3(XpQVDZ&b6$hNU~nt{&3=}#9e;TK?4EnP{_ON)+y2<F~!L!vYS{sRL8>&6dR@-#a?uJE6jm>_MN`zqDk@q7kw|3c#})q3(b15Ulw^m<$VfZ8Z27yw%bj(2{*K2Fck1lO z)}yBXg|g4#`JKL;!53+}8Rf4$kyCJoO$Yh6MQr=fClSz8_!%98xWk{jZQO1@0fGJ^ znj={A&qnO0`}-phAbtr_V}cKM|MZkRY0;~9nR?3K<~kM)Wp6C}rru#? z&hL2z9&H7z?+pauY^lIfPD_X&K@gCi&CG4cJ?H;MZCQYPcF*^y;MDOnIJQ5=)02SY#Pkns|9 zsh&dq7g|dXKwZ8JrY{$J95|Lti4G?H&-Yh7i#4yxP6k86QMmI{5wv`_&tvF1d6@LO ze{UFBk};x<|K9p9?eZgz-g{j=9vd#%GX867YG88k3gDbCTO`L@^G-~zzrDR3mhTLb zda>=7a#;d-Uzz$}nf((iziU||Eh&z%f2~#vCX>l62oDcu?BCTkzEM3U`fp^%)pJ|P ze`;zfR4NPL(&-WsqL*KEdTf$NB#cs%GQR&aGc$JmPx{err~TK~)&lN3 zRKInl|8c?p1M_`Yxr^pT5W?OGVRZdqS)}X6Mp?CE(*Ek}>!H8DpN3d0cG|yGDg~8F zMI#^}fbsozwU2F7kID6KWXIKWT=$RTFT7UOQ@C1pWWD%f9{a1#P4UXw-m&v}bo@7` zD|-gT!JP}eD!^-ay%GNwp!KuVT4xwI^@OEY^O{*#K1+>NUMu@FtvLRt-b46ua}==V z6IS;MYt{wd7monjU-brn^AtEOAw&?6d-MVQ*NkhMsaG?M=Q5bi0}P+bg@vB7$ie39 z2mNOi&{LP?H2NA8(2VAO(EGVef95FQ`b@@tJSTGz&Do(bCf&CFh@lfqeKi~6*Nf%k zoX&dn$47@@X?k=+Xq!(utnchKIN~3@=Y7Cye0+U<*XZHr=Lg}TZ-FE*z@dN3>?HXd zcvG@=ofBS@fY&3BbyPd7KQ(z1=Kh_6_0}mwL_|Qpz8i|a&39Pe)$1QQmjmf>p-v+! zJ(9lvw>t_K@c$2(wqKl&Sk$YfFjV>UlCDDJ3a)EQ9%tL1QEm+7K9LoMqY_o`KW`r8 AZU6uP literal 0 HcmV?d00001 diff --git a/platform/entitlements.plist b/platform/entitlements.plist new file mode 100644 index 000000000..e56476ecf --- /dev/null +++ b/platform/entitlements.plist @@ -0,0 +1,33 @@ + + + + + + com.apple.security.cs.allow-jit + + com.apple.security.cs.allow-unsigned-executable-memory + + + + com.apple.security.app-sandbox + + com.apple.security.network.client + + com.apple.security.network.server + + com.apple.security.device.audio-input + + com.apple.security.device.microphone + + com.apple.security.device.camera + + com.apple.security.files.user-selected.read-write + + com.apple.security.cs.disable-library-validation + + com.apple.security.cs.allow-dyld-environment-variables + + com.apple.security.cs.allow-executable-memory + + + \ No newline at end of file diff --git a/platform/installer.iss b/platform/installer.iss new file mode 100644 index 000000000..874e4d79a --- /dev/null +++ b/platform/installer.iss @@ -0,0 +1,86 @@ +; Inno Setup Script +; Define the application name, version, and other details +[Setup] +AppName=Cortexso +AppVersion=1.0 +DefaultDirName={pf}\Cortexso +DefaultGroupName=Cortexso +OutputDir=. +OutputBaseFilename=setup +Compression=lzma +SolidCompression=yes +PrivilegesRequired=admin + +; Define the languages section +[Languages] +Name: "english"; MessagesFile: "compiler:Default.isl" + +; Define the files to be installed +[Files] +Source: "cortex.exe"; DestDir: "{app}"; Flags: ignoreversion + +; Define the icons to be created +[Icons] +Name: "{group}\Cortexso"; Filename: "{app}\cortex.exe" + +; Define the run section to execute the application after installation +[Run] +Filename: "cmd"; Parameters: "/c setx PATH ""%PATH%;{app}"""; StatusMsg: "Updating system PATH environment variable..."; Flags: runhidden +Filename: "{app}\cortex.exe"; Description: "{cm:LaunchProgram,Cortexso}"; Flags: nowait postinstall skipifsilent + +; Define the tasks section (optional, for additional tasks like creating desktop icons) +[Tasks] +Name: "desktopicon"; Description: "Create a &desktop icon"; GroupDescription: "Additional icons:"; Flags: unchecked +Name: "quicklaunchicon"; Description: "Create a &Quick Launch icon"; GroupDescription: "Additional icons:"; Flags: unchecked + +; Define icons for the additional tasks +[Icons] +Name: "{commondesktop}\Cortexso"; Filename: "{app}\cortex.exe"; Tasks: desktopicon +Name: "{userappdata}\Microsoft\Internet Explorer\Quick Launch\Cortexso"; Filename: "{app}\cortex.exe"; Tasks: quicklaunchicon + +; Define the uninstall run section to execute commands before uninstallation +[UninstallRun] +Filename: "{app}\cortex.exe"; Parameters: "stop"; StatusMsg: "Stopping Cortexso service..."; Flags: runhidden + +; Use Pascal scripting to delete the directory for all users +[Code] +function GetUsersFolder: String; +var + WinDir: String; +begin + WinDir := ExpandConstant('{win}'); + Result := Copy(WinDir, 1, Pos('\Windows', WinDir) - 1) + '\Users'; +end; + +procedure DeleteUserCortexFolder; +var + UsersFolder: String; + FindRec: TFindRec; +begin + UsersFolder := GetUsersFolder; + if FindFirst(UsersFolder + '\*', FindRec) then + begin + try + repeat + if (FindRec.Attributes and FILE_ATTRIBUTE_DIRECTORY <> 0) and + (FindRec.Name <> '.') and (FindRec.Name <> '..') then + begin + if DirExists(UsersFolder + '\' + FindRec.Name + '\cortex') then + begin + DelTree(UsersFolder + '\' + FindRec.Name + '\cortex', True, True, True); + end; + end; + until not FindNext(FindRec); + finally + FindClose(FindRec); + end; + end; +end; + +procedure CurUninstallStepChanged(CurUninstallStep: TUninstallStep); +begin + if CurUninstallStep = usPostUninstall then + begin + DeleteUserCortexFolder; + end; +end; diff --git a/platform/nest-cli.json b/platform/nest-cli.json new file mode 100644 index 000000000..f5e93169b --- /dev/null +++ b/platform/nest-cli.json @@ -0,0 +1,17 @@ +{ + "$schema": "https://json.schemastore.org/nest-cli", + "collection": "@nestjs/schematics", + "sourceRoot": "src", + "compilerOptions": { + "deleteOutDir": true, + "plugins": [ + { + "name": "@nestjs/swagger", + "options": { + "classValidatorShim": true, + "introspectComments": true + } + } + ] + } +} diff --git a/platform/package.json b/platform/package.json new file mode 100644 index 000000000..3cabf3381 --- /dev/null +++ b/platform/package.json @@ -0,0 +1,158 @@ +{ + "name": "cortexso", + "version": "0.0.1", + "description": "Cortex is an openAI-compatible local AI server that developers can use to build LLM apps. It is packaged with a Docker-inspired command-line interface and a Typescript client library. It can be used as a standalone server, or imported as a library.", + "author": "Jan ", + "license": "AGPL-3.0", + "homepage": "https://github.com/janhq/cortex", + "bin": { + "cortex": "./dist/src/command.js" + }, + "main": "dist/src/index.js", + "types": "dist/src/index.d.ts", + "scripts": { + "dev": "nest dev", + "compile": "npx ncc build ./dist/src/command.js -o command", + "build": "yarn add sqlite3 --build-from-source && nest build", + "build:binary": "yarn build && yarn compile && npx -q patch-package && run-script-os", + "build:binary:windows": "npx @yao-pkg/pkg . --targets node20-win", + "build:binary:linux": "npx @yao-pkg/pkg . --targets node20-linux", + "build:binary:macos": "npx @yao-pkg/pkg . --targets node20-macos", + "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"", + "build:extensions": "run-script-os", + "build:extensions:windows": "powershell -command \"$jobs = Get-ChildItem -Path './src/extensions' -Directory | ForEach-Object { Start-Job -Name ($_.Name) -ScriptBlock { param($_dir); try { Set-Location $_dir; yarn; yarn build; Write-Output 'Build successful in ' + $_dir } catch { Write-Error 'Error in ' + $_dir; throw } } -ArgumentList $_.FullName }; $jobs | Wait-Job; $jobs | ForEach-Object { Receive-Job -Job $_ -Keep } | ForEach-Object { Write-Host $_ }; $failed = $jobs | Where-Object { $_.State -ne 'Completed' -or $_.ChildJobs[0].JobStateInfo.State -ne 'Completed' }; if ($failed) { Exit 1 }\"", + "build:extensions:linux": "for dir in ./src/extensions/*/; do (cd \"$dir\" && yarn && yarn build); done", + "build:extensions:macos": "for dir in ./src/extensions/*/; do (cd \"$dir\" && yarn && yarn build); done", + "start": "nest start", + "start:dev": "nest start --watch", + "start:debug": "nest start --debug --watch", + "start:prod": "node dist/src/main --trace-deprecation", + "lint": "eslint \"{src,apps,libs,test}/**/*.ts\"", + "test": "jest", + "test:watch": "jest --watch", + "test:cov": "jest --coverage", + "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand", + "test:e2e": "jest --config ./test/jest-e2e.json", + "typeorm": "typeorm-ts-node-esm", + "build:dev": "yarn build && run-script-os && npm link", + "build:dev:windows": "echo 'Windows build complete'", + "build:dev:macos": "chmod +x ./dist/src/command.js", + "build:dev:linux": "chmod +x ./dist/src/command.js", + "preuninstall": "node ./uninstall.js" + }, + "dependencies": { + "@cortexso/cortex.js": "^0.1.7", + "@nestjs/axios": "^3.0.2", + "@nestjs/common": "^10.0.0", + "@nestjs/config": "^3.2.2", + "@nestjs/core": "^10.0.0", + "@nestjs/devtools-integration": "^0.1.6", + "@nestjs/event-emitter": "^2.0.4", + "@nestjs/mapped-types": "*", + "@nestjs/platform-express": "^10.0.0", + "@nestjs/sequelize": "^10.0.1", + "@nestjs/swagger": "^7.3.1", + "@terascope/fetch-github-release": "^0.8.8", + "@types/node-os-utils": "^1.3.4", + "axios": "^1.6.8", + "class-transformer": "^0.5.1", + "class-validator": "^0.14.1", + "cli-progress": "^3.12.0", + "cortex-cpp": "0.5.0-46", + "decompress": "^4.2.1", + "hyllama": "^0.2.2", + "js-yaml": "^4.1.0", + "nest-commander": "^3.13.0", + "node-os-utils": "^1.3.7", + "ora": "5.4.1", + "readline": "^1.3.0", + "reflect-metadata": "^0.2.0", + "rxjs": "^7.8.1", + "sequelize": "^6.37.3", + "sequelize-typescript": "^2.1.6", + "sqlite3": "^5.1.7", + "systeminformation": "^5.23.4", + "ulid": "^2.3.0", + "uuid": "^9.0.1", + "whatwg-url": "^14.0.0", + "yaml": "^2.4.2" + }, + "devDependencies": { + "@nestjs/cli": "^10.0.0", + "@nestjs/schematics": "^10.0.0", + "@nestjs/testing": "^10.0.0", + "@types/cli-progress": "^3.11.5", + "@types/decompress": "^4.2.7", + "@types/express": "^4.17.17", + "@types/jest": "^29.5.2", + "@types/js-yaml": "^4.0.9", + "@types/node": "^20.12.9", + "@types/sequelize": "^4.28.20", + "@types/supertest": "^6.0.2", + "@types/update-notifier": "^6.0.8", + "@types/uuid": "^9.0.8", + "@typescript-eslint/eslint-plugin": "7.16.1", + "@typescript-eslint/parser": "7.16.1", + "@vercel/ncc": "^0.38.0", + "@yao-pkg/pkg": "^5.12.0", + "cpx": "^1.5.0", + "env-cmd": "10.1.0", + "eslint": "8.57.0", + "eslint-config-prettier": "9.1.0", + "eslint-plugin-import": "2.29.1", + "eslint-plugin-prettier": "5.2.1", + "hanbi": "^1.0.3", + "is-primitive": "^3.0.1", + "jest": "^29.5.0", + "nest-commander-testing": "^3.3.0", + "node-gyp": "^10.1.0", + "patch-package": "^8.0.0", + "prettier": "^3.0.0", + "resedit-cli": "^2.0.0", + "run-script-os": "^1.1.6", + "source-map-support": "^0.5.21", + "supertest": "^6.3.3", + "ts-jest": "^29.1.0", + "ts-loader": "^9.4.3", + "tsconfig-paths": "^4.2.0", + "typescript": "^5.1.3" + }, + "files": [ + "dist" + ], + "jest": { + "moduleFileExtensions": [ + "js", + "json", + "ts" + ], + "rootDir": "src", + "testRegex": ".*\\.spec\\.ts$", + "transform": { + "^.+\\.(t|j)s$": "ts-jest" + }, + "collectCoverageFrom": [ + "**/*.(t|j)s" + ], + "coverageDirectory": "../coverage", + "testEnvironment": "node", + "moduleNameMapper": { + "@/(.*)$": "/$1", + "@commanders/(.*)$": "/../src/infrastructure/commanders/$1" + } + }, + "pkg": { + "scripts": "command/**/*.js", + "assets": [ + "command/*.node", + "**/package.json", + "node_modules/axios/**/*", + "node_modules/swagger-ui-dist/", + "**/main.js", + "node_modules/cortex-cpp/**/*", + "dist/src/utils/cortex-cpp/**/*", + "**/cortex-cpp.js" + ], + "outputPath": "dist" + } +} diff --git a/platform/patches/sqlite3+5.1.7.patch b/platform/patches/sqlite3+5.1.7.patch new file mode 100644 index 000000000..fc72a4d4d --- /dev/null +++ b/platform/patches/sqlite3+5.1.7.patch @@ -0,0 +1,15 @@ +diff --git a/node_modules/sqlite3/.DS_Store b/node_modules/sqlite3/.DS_Store +new file mode 100644 +index 0000000..10f4d43 +Binary files /dev/null and b/node_modules/sqlite3/.DS_Store differ +diff --git a/node_modules/sqlite3/lib/sqlite3.js b/node_modules/sqlite3/lib/sqlite3.js +index 430a2b8..9df0857 100644 +--- a/node_modules/sqlite3/lib/sqlite3.js ++++ b/node_modules/sqlite3/lib/sqlite3.js +@@ -1,5 +1,5 @@ + const path = require('path'); +-const sqlite3 = require('./sqlite3-binding.js'); ++const sqlite3 = require('./../build/Release/node_sqlite3.node'); + const EventEmitter = require('events').EventEmitter; + module.exports = exports = sqlite3; + diff --git a/platform/presets/alpaca.yml b/platform/presets/alpaca.yml new file mode 100644 index 000000000..4d9a0f9b2 --- /dev/null +++ b/platform/presets/alpaca.yml @@ -0,0 +1,16 @@ +version: 1.0 +# Model Settings +prompt_template: |+ + {system_message} + ### Instruction: {prompt} + ### Response: +ctx_len: 2048 + +# Results Preferences +stop: + - +temperature: 0.7 +top_p: 0.95 +max_tokens: 2048 +frequency_penalty: 0 +presence_penalty: 0 diff --git a/platform/presets/chatml.yml b/platform/presets/chatml.yml new file mode 100644 index 000000000..88b525ef0 --- /dev/null +++ b/platform/presets/chatml.yml @@ -0,0 +1,18 @@ +version: 1.0 +# Model Settings +prompt_template: |+ + <|im_start|>system + {system_message}<|im_end|> + <|im_start|>user + {prompt}<|im_end|> + <|im_start|>assistant +ctx_len: 2048 + +# Results Preferences +stop: + - <|im_end|> +temperature: 0.7 +top_p: 0.95 +max_tokens: 2048 +frequency_penalty: 0 +presence_penalty: 0 diff --git a/platform/presets/llama3.yml b/platform/presets/llama3.yml new file mode 100644 index 000000000..92f2cb677 --- /dev/null +++ b/platform/presets/llama3.yml @@ -0,0 +1,19 @@ +version: 1.0 +# Model Settings +prompt_template: |+ + <|begin_of_text|><|start_header_id|>system<|end_header_id|> + + {system_message}<|eot_id|><|start_header_id|>user<|end_header_id|> + + {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|> +ctx_len: 2048 + +# Results Preferences +stop: + - <|eot_id|> + - <|<|end_header_id|>|> +temperature: 0.7 +top_p: 0.95 +max_tokens: 2048 +frequency_penalty: 0 +presence_penalty: 0 diff --git a/platform/presets/vicuna.yml b/platform/presets/vicuna.yml new file mode 100644 index 000000000..b08495c6d --- /dev/null +++ b/platform/presets/vicuna.yml @@ -0,0 +1,20 @@ +version: 1.0 +# Model Settings +prompt_template: |- + <|begin_of_sentence|>{system_prompt} + + User: {prompt} + + Assistant: + +ctx_len: 2048 + +# Results Preferences +stop: + - + - end_of_sentence +temperature: 0.7 +top_p: 0.95 +max_tokens: 2048 +frequency_penalty: 0 +presence_penalty: 0 diff --git a/platform/src/app.module.ts b/platform/src/app.module.ts new file mode 100644 index 000000000..e7f8d6698 --- /dev/null +++ b/platform/src/app.module.ts @@ -0,0 +1,77 @@ +import { MiddlewareConsumer, Module, NestModule } from '@nestjs/common'; +import { MessagesModule } from './usecases/messages/messages.module'; +import { ThreadsModule } from './usecases/threads/threads.module'; +import { ModelsModule } from './usecases/models/models.module'; +import { DatabaseModule } from './infrastructure/database/database.module'; +import { ChatModule } from './usecases/chat/chat.module'; +import { AssistantsModule } from './usecases/assistants/assistants.module'; +import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; +import { CortexModule } from './usecases/cortex/cortex.module'; +import { ConfigModule } from '@nestjs/config'; +import { env } from 'node:process'; +import { FileManagerModule } from './infrastructure/services/file-manager/file-manager.module'; +import { AppLoggerMiddleware } from './infrastructure/middlewares/app.logger.middleware'; +import { TelemetryModule } from './usecases/telemetry/telemetry.module'; +import { APP_FILTER } from '@nestjs/core'; +import { GlobalExceptionFilter } from './infrastructure/exception/global.exception'; +import { EventEmitterModule } from '@nestjs/event-emitter'; +import { AssistantsController } from './infrastructure/controllers/assistants.controller'; +import { ChatController } from './infrastructure/controllers/chat.controller'; +import { EmbeddingsController } from './infrastructure/controllers/embeddings.controller'; +import { ModelsController } from './infrastructure/controllers/models.controller'; +import { ThreadsController } from './infrastructure/controllers/threads.controller'; +import { SystemController } from './infrastructure/controllers/system.controller'; +import { DownloadManagerModule } from './infrastructure/services/download-manager/download-manager.module'; +import { ContextModule } from './infrastructure/services/context/context.module'; +import { ExtensionsModule } from './extensions/extensions.module'; +import { ConfigsModule } from './usecases/configs/configs.module'; +import { EnginesModule } from './usecases/engines/engines.module'; +import { EnginesController } from './infrastructure/controllers/engines.controller'; +import { ResourceManagerModule } from './infrastructure/services/resources-manager/resources-manager.module'; + +@Module({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: env.NODE_ENV !== 'production' ? '.env.development' : '.env', + }), + EventEmitterModule.forRoot(), + DownloadManagerModule, + DatabaseModule, + MessagesModule, + ThreadsModule, + ModelsModule, + ChatModule, + AssistantsModule, + CortexModule, + ExtensionModule, + FileManagerModule, + TelemetryModule, + ContextModule, + DownloadManagerModule, + ExtensionsModule, + ConfigsModule, + EnginesModule, + ResourceManagerModule, + ], + controllers: [ + AssistantsController, + ChatController, + EmbeddingsController, + ModelsController, + ThreadsController, + SystemController, + EnginesController, + ], + providers: [ + { + provide: APP_FILTER, + useClass: GlobalExceptionFilter, + }, + ], +}) +export class AppModule implements NestModule { + configure(consumer: MiddlewareConsumer): void { + consumer.apply(AppLoggerMiddleware).forRoutes('*'); + } +} diff --git a/platform/src/app.ts b/platform/src/app.ts new file mode 100644 index 000000000..53104b886 --- /dev/null +++ b/platform/src/app.ts @@ -0,0 +1,75 @@ +import { NestFactory } from '@nestjs/core'; +import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger'; +import { AppModule } from './app.module'; +import { fileManagerService } from './infrastructure/services/file-manager/file-manager.service'; +import { ValidationPipe } from '@nestjs/common'; +import { TelemetryUsecases } from './usecases/telemetry/telemetry.usecases'; +import { cleanLogs } from './utils/logs'; +export const getApp = async (host?: string, port?: number) => { + const app = await NestFactory.create(AppModule, { + snapshot: true, + cors: true, + logger: console, + }); + + // Set the global prefix for the API /v1/ + app.setGlobalPrefix('v1'); + + await fileManagerService.getConfig(); + + const telemetryService = await app.resolve(TelemetryUsecases); + await telemetryService.initInterval(); + + app.useGlobalPipes( + new ValidationPipe({ + transform: true, + enableDebugMessages: true, + }), + ); + + cleanLogs(); + const config = new DocumentBuilder() + .setTitle('Cortex API') + .setDescription( + 'Cortex API provides a command-line interface (CLI) for seamless interaction with Large Language Models (LLMs). It is fully compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference) and enables straightforward command execution and management of LLM interactions.', + ) + .setVersion('1.0') + .addTag( + 'Inference', + 'This endpoint initiates interaction with a Large Language Models (LLM).', + ) + .addTag( + 'Assistants', + 'These endpoints manage the lifecycle of an Assistant within a conversation thread.', + ) + .addTag( + 'Models', + 'These endpoints provide a list and descriptions of all available models within the Cortex framework.', + ) + .addTag( + 'Messages', + 'These endpoints manage the retrieval and storage of conversation content, including responses from LLMs and other metadata related to chat interactions.', + ) + .addTag( + 'Threads', + 'These endpoints handle the creation, retrieval, updating, and deletion of conversation threads.', + ) + .addTag( + 'Embeddings', + 'Endpoint for creating and retrieving embedding vectors from text inputs using specified models.', + ) + .addTag( + 'Engines', + 'Endpoints for managing the available engines within Cortex.', + ) + .addTag( + 'System', + 'Endpoints for stopping the Cortex API server, checking its status, and fetching system events.', + ) + .addServer(`http://${host || '127.0.0.1'}:${port || 1337}`) + .build(); + const document = SwaggerModule.createDocument(app, config); + + SwaggerModule.setup('api', app, document); + return app; +}; diff --git a/platform/src/command.module.ts b/platform/src/command.module.ts new file mode 100644 index 000000000..242136d07 --- /dev/null +++ b/platform/src/command.module.ts @@ -0,0 +1,83 @@ +import { Module } from '@nestjs/common'; +import { ConfigModule } from '@nestjs/config'; +import { CortexModule } from './usecases/cortex/cortex.module'; +import { ModelsCommand } from './infrastructure/commanders/models.command'; +import { HttpModule } from '@nestjs/axios'; +import { InitRunModeQuestions } from './infrastructure/commanders/questions/init.questions'; +import { ModelListCommand } from './infrastructure/commanders/models/model-list.command'; +import { ModelPullCommand } from './infrastructure/commanders/models/model-pull.command'; +import { CortexCommand } from './infrastructure/commanders/cortex-command.commander'; +import { ChatCommand } from './infrastructure/commanders/chat.command'; +import { ModelStartCommand } from './infrastructure/commanders/models/model-start.command'; +import { ModelStopCommand } from './infrastructure/commanders/models/model-stop.command'; +import { ModelGetCommand } from './infrastructure/commanders/models/model-get.command'; +import { ModelRemoveCommand } from './infrastructure/commanders/models/model-remove.command'; +import { RunCommand } from './infrastructure/commanders/run.command'; +import { ModelUpdateCommand } from './infrastructure/commanders/models/model-update.command'; +import { FileManagerModule } from './infrastructure/services/file-manager/file-manager.module'; +import { PSCommand } from './infrastructure/commanders/ps.command'; +import { PresetCommand } from './infrastructure/commanders/presets.command'; +import { TelemetryModule } from './usecases/telemetry/telemetry.module'; +import { TelemetryCommand } from './infrastructure/commanders/telemetry.command'; +import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command'; +import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command'; +import { ServeStopCommand } from './infrastructure/commanders/serve-stop.command'; +import { ContextModule } from './infrastructure/services/context/context.module'; +import { EnginesCommand } from './infrastructure/commanders/engines.command'; +import { EnginesListCommand } from './infrastructure/commanders/engines/engines-list.command'; +import { EnginesGetCommand } from './infrastructure/commanders/engines/engines-get.command'; +import { EnginesInitCommand } from './infrastructure/commanders/engines/engines-init.command'; +import { EnginesSetCommand } from './infrastructure/commanders/engines/engines-set.command'; + +@Module({ + imports: [ + ConfigModule.forRoot({ + isGlobal: true, + envFilePath: + process.env.NODE_ENV !== 'production' ? '.env.development' : '.env', + }), + CortexModule, + HttpModule, + FileManagerModule, + TelemetryModule, + ContextModule, + ], + providers: [ + CortexCommand, + ModelsCommand, + ChatCommand, + PSCommand, + PresetCommand, + EmbeddingCommand, + BenchmarkCommand, + EnginesCommand, + + // Questions + InitRunModeQuestions, + + // Model commands + ModelStartCommand, + ModelStopCommand, + ModelListCommand, + ModelGetCommand, + ModelRemoveCommand, + ModelPullCommand, + ModelUpdateCommand, + + // Shortcuts + RunCommand, + + // Telemetry + TelemetryCommand, + + // Serve + ServeStopCommand, + + // Engines + EnginesListCommand, + EnginesGetCommand, + EnginesInitCommand, + EnginesSetCommand, + ], +}) +export class CommandModule {} diff --git a/platform/src/command.ts b/platform/src/command.ts new file mode 100644 index 000000000..37aa2a77a --- /dev/null +++ b/platform/src/command.ts @@ -0,0 +1,50 @@ +#!/usr/bin/env node +import ora from 'ora'; +const dependenciesSpinner = ora('Loading dependencies...').start(); +const time = Date.now(); +import { CommandFactory } from 'nest-commander'; +import { CommandModule } from './command.module'; +import { TelemetryUsecases } from './usecases/telemetry/telemetry.usecases'; +import { TelemetrySource } from './domain/telemetry/telemetry.interface'; +import { ContextService } from '@/infrastructure/services/context/context.service'; + +dependenciesSpinner.succeed( + 'Dependencies loaded in ' + (Date.now() - time) + 'ms', +); + +process.removeAllListeners('warning'); +process.title = 'Cortex CLI Command Process'; + +async function bootstrap() { + let telemetryUseCase: TelemetryUsecases | null = null; + let contextService: ContextService | null = null; + const app = await CommandFactory.createWithoutRunning(CommandModule, { + logger: ['warn', 'error'], + enablePositionalOptions: true, + errorHandler: async (error) => { + await telemetryUseCase!.createCrashReport(error, TelemetrySource.CLI); + console.error(error); + process.exit(1); + }, + serviceErrorHandler: async (error) => { + await telemetryUseCase!.createCrashReport(error, TelemetrySource.CLI); + console.error(error); + process.exit(1); + }, + }); + + telemetryUseCase = await app.resolve(TelemetryUsecases); + contextService = await app.resolve(ContextService); + + const anonymousData = await telemetryUseCase!.updateAnonymousData(); + + await contextService!.init(async () => { + contextService!.set('source', TelemetrySource.CLI); + contextService!.set('sessionId', anonymousData?.sessionId); + telemetryUseCase!.sendActivationEvent(TelemetrySource.CLI); + telemetryUseCase!.sendCrashReport(); + await CommandFactory.runApplication(app); + }); +} + +bootstrap(); diff --git a/platform/src/domain/abstracts/engine.abstract.ts b/platform/src/domain/abstracts/engine.abstract.ts new file mode 100644 index 000000000..39a546018 --- /dev/null +++ b/platform/src/domain/abstracts/engine.abstract.ts @@ -0,0 +1,65 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ +import stream from 'stream'; +import { Model, ModelSettingParams } from '../models/model.interface'; +import { Extension } from './extension.abstract'; + +export enum EngineStatus { + READY = 'READY', + MISSING_CONFIGURATION = 'MISSING_CONFIGURATION', + NOT_INITIALIZED = 'NOT_INITIALIZED', + NOT_SUPPORTED = 'NOT_SUPPORTED', + ERROR = 'ERROR', +} + +/** + * This class should be extended by any class that represents an engine extension. + * It provides methods for loading and unloading models, and for making inference requests. + */ +export abstract class EngineExtension extends Extension { + abstract onLoad(): void; + + transformPayload?: Function; + + transformResponse?: Function; + + status: EngineStatus = EngineStatus.READY; + + /** + * Makes an inference request to the engine. + * @param dto + * @param headers + */ + abstract inference( + dto: any, + headers: Record, + ): Promise; + + /** + * Checks if a model is running by the engine + * This method should check run-time status of the model + * Since the model can be corrupted during the run-time + * This method should return false if the model is not running + * @param modelId + */ + async isModelRunning(modelId: string): Promise { + return true; + } + + /** + * Loads a model into the engine. + * There are model settings such as `ngl` and `ctx_len` that can be passed to the engine. + * Applicable for local engines only + * @param model + * @param settingParams + */ + async loadModel( + model: Model, + settingParams?: ModelSettingParams, + ): Promise {} + + /** + * Unloads a model from the engine. + * @param modelId + */ + async unloadModel(modelId: string, engine?: string): Promise {} +} diff --git a/platform/src/domain/abstracts/extension.abstract.ts b/platform/src/domain/abstracts/extension.abstract.ts new file mode 100644 index 000000000..ce62df74c --- /dev/null +++ b/platform/src/domain/abstracts/extension.abstract.ts @@ -0,0 +1,38 @@ +/** + * Represents a base extension. + * This class should be extended by any class that represents an extension. + */ +export abstract class Extension { + /** @type {string} Name of the extension. */ + name: string; + + /** @type {string} Product Name of the extension. */ + productName?: string; + + /** @type {string} The URL of the extension to load. */ + url?: string; + + /** @type {boolean} Whether the extension is activated or not. */ + active?: boolean; + + /** @type {string} Extension's description. */ + description?: string; + + /** @type {string} Extension's version. */ + version?: string; + + /** @type {string} The status of the extension. */ + status: string; + + /** + * Called when the extension is loaded. + * Any initialization logic for the extension should be put here. + */ + onLoad(): void {} + + /** + * Called when the extension is unloaded. + * Any cleanup logic for the extension should be put here. + */ + onUnload(): void {} +} diff --git a/platform/src/domain/abstracts/oai.abstract.ts b/platform/src/domain/abstracts/oai.abstract.ts new file mode 100644 index 000000000..09b2fbbf1 --- /dev/null +++ b/platform/src/domain/abstracts/oai.abstract.ts @@ -0,0 +1,70 @@ +import { HttpService } from '@nestjs/axios'; +import { EngineExtension } from './engine.abstract'; +import stream, { Transform } from 'stream'; +import { firstValueFrom } from 'rxjs'; +import { omit } from 'lodash'; + +export abstract class OAIEngineExtension extends EngineExtension { + abstract apiUrl: string; + abstract apiKey?: string; + + constructor(protected readonly httpService: HttpService) { + super(); + } + + override onLoad(): void {} + + override async inference( + createChatDto: any, + headers: Record, + ): Promise { + const payload = this.transformPayload + ? this.transformPayload(createChatDto) + : createChatDto; + const { stream: isStream } = payload; + const additionalHeaders = omit(headers, [ + 'content-type', + 'authorization', + 'content-length', + ]); + const response = await firstValueFrom( + this.httpService.post(this.apiUrl, payload, { + headers: { + 'Content-Type': headers['content-type'] ?? 'application/json', + Authorization: this.apiKey + ? `Bearer ${this.apiKey}` + : headers['authorization'], + ...additionalHeaders, + }, + responseType: isStream ? 'stream' : 'json', + }), + ); + + if (!response) { + throw new Error('No response'); + } + if (!this.transformResponse) { + return response.data; + } + if (isStream) { + const transformResponse = this.transformResponse.bind(this); + const lineStream = new Transform({ + transform(chunk, encoding, callback) { + const lines = chunk.toString().split('\n'); + const transformedLines = []; + for (const line of lines) { + if (line.trim().length > 0) { + const transformedLine = transformResponse(line, true); + if (transformedLine) { + transformedLines.push(`data: ${transformedLine}\n\n`); + } + } + } + callback(null, transformedLines.join('')); + }, + }); + return response.data.pipe(lineStream); + } + return this.transformResponse(response.data, false); + } +} diff --git a/platform/src/domain/config/config.interface.ts b/platform/src/domain/config/config.interface.ts new file mode 100644 index 000000000..b99d76600 --- /dev/null +++ b/platform/src/domain/config/config.interface.ts @@ -0,0 +1,8 @@ +export interface Config { + dataFolderPath: string; + cortexCppHost: string; + cortexCppPort: number; + // todo: will remove optional when all command request api server + apiServerPort?: number; + apiServerHost?: string; +} diff --git a/platform/src/domain/models/assistant.interface.ts b/platform/src/domain/models/assistant.interface.ts new file mode 100644 index 000000000..d49eb1362 --- /dev/null +++ b/platform/src/domain/models/assistant.interface.ts @@ -0,0 +1,12 @@ +import { Cortex } from '@cortexso/cortex.js'; +// TODO: Why we need to alias these? + +export interface Assistant extends Cortex.Beta.Assistant { + avatar?: string; +} + +export type AssistantResponseFormatOption = + Cortex.Beta.Threads.AssistantResponseFormatOption; + +export interface AssistantToolResources + extends Cortex.Beta.Assistant.ToolResources {} diff --git a/platform/src/domain/models/download.interface.ts b/platform/src/domain/models/download.interface.ts new file mode 100644 index 000000000..8075f25f5 --- /dev/null +++ b/platform/src/domain/models/download.interface.ts @@ -0,0 +1,80 @@ +export class DownloadState { + /** + * The id of a particular download. Being used to prevent duplication of downloads. + */ + id: string; + + /** + * For displaying purposes. + */ + title: string; + + /** + * The type of download. + */ + type: DownloadType; + + /** + * The status of the download. + */ + status: DownloadStatus; + + /** + * Explanation of the error if the download failed. + */ + error?: string; + + /** + * The progress of the download. It is a number between 0 and 100. + */ + progress: number; + + /** + * The actual downloads. [DownloadState] is just a group to supporting for download multiple files. + */ + children: DownloadItem[]; +} + +export enum DownloadStatus { + Pending = 'pending', + Downloading = 'downloading', + Error = 'error', + Downloaded = 'downloaded', +} + +export class DownloadItem { + /** + * Filename of the download. + */ + id: string; + + time: { + elapsed: number; + remaining: number; + }; + + size: { + total: number; + transferred: number; + }; + + progress: number; + + checksum?: string; + + status: DownloadStatus; + + error?: string; + + metadata?: Record; +} + +export interface DownloadStateEvent { + data: DownloadState[]; +} + +export enum DownloadType { + Model = 'model', + Miscelanous = 'miscelanous', + Engine = 'engine', +} diff --git a/platform/src/domain/models/huggingface.interface.ts b/platform/src/domain/models/huggingface.interface.ts new file mode 100644 index 000000000..2c2274b3e --- /dev/null +++ b/platform/src/domain/models/huggingface.interface.ts @@ -0,0 +1,74 @@ +export interface HuggingFaceModelVersion { + rfilename: string; + downloadUrl?: string; + fileSize?: number; + quantization?: Quantization; +} + +export interface HuggingFaceRepoSibling { + rfilename: string; + downloadUrl?: string; + fileSize?: number; + quantization?: Quantization; + lfs?: { oid?: string }; +} +export interface HuggingFaceRepoData { + id: string; + modelId: string; + modelUrl?: string; + author: string; + sha: string; + downloads: number; + lastModified: string; + private: boolean; + disabled: boolean; + gated: boolean; + pipeline_tag: 'text-generation'; + tags: Array<'transformers' | 'pytorch' | 'safetensors' | string>; + cardData: Record; + siblings: HuggingFaceRepoSibling[]; + createdAt: string; +} + +const CardDataKeys = [ + 'base_model', + 'datasets', + 'inference', + 'language', + 'library_name', + 'license', + 'model_creator', + 'model_name', + 'model_type', + 'pipeline_tag', + 'prompt_template', + 'quantized_by', + 'tags', +] as const; +export type CardDataKeysTuple = typeof CardDataKeys; +export type CardDataKeys = CardDataKeysTuple[number]; + +export const AllQuantizations = [ + 'Q3_K_S', + 'Q3_K_M', + 'Q3_K_L', + 'Q4_K_S', + 'Q4_K_M', + 'Q5_K_S', + 'Q5_K_M', + 'Q4_0', + 'Q4_1', + 'Q5_0', + 'Q5_1', + 'IQ2_XXS', + 'IQ2_XS', + 'Q2_K', + 'Q2_K_S', + 'Q6_K', + 'Q8_0', + 'F16', + 'F32', + 'COPY', +]; +export type QuantizationsTuple = typeof AllQuantizations; +export type Quantization = QuantizationsTuple[number]; diff --git a/platform/src/domain/models/inference-setting.interface.ts b/platform/src/domain/models/inference-setting.interface.ts new file mode 100644 index 000000000..4a7a30c44 --- /dev/null +++ b/platform/src/domain/models/inference-setting.interface.ts @@ -0,0 +1,20 @@ +export interface InferenceSetting { + inferenceId: string; + + settings: InferenceSettingDocument[]; +} + +export interface InferenceSettingDocument { + key: string; + extensionName: string; + title: string; + description: string; + controllerType: string; + controllerProps: ControllerProps; +} + +export interface ControllerProps { + placeholder: string; + value: string; + type?: string; +} diff --git a/platform/src/domain/models/message.interface.ts b/platform/src/domain/models/message.interface.ts new file mode 100644 index 000000000..6cd2176a2 --- /dev/null +++ b/platform/src/domain/models/message.interface.ts @@ -0,0 +1,13 @@ +import { Cortex } from '@cortexso/cortex.js'; + +export interface Message extends Cortex.Beta.Threads.Message {} + +export type MessageContent = Cortex.Beta.Threads.MessageContent; + +export type TextContentBlock = Cortex.Beta.Threads.TextContentBlock; + +export interface MessageIncompleteDetails + extends Cortex.Beta.Threads.Message.IncompleteDetails {} + +export interface MessageAttachment + extends Cortex.Beta.Threads.Message.Attachment {} diff --git a/platform/src/domain/models/model.event.ts b/platform/src/domain/models/model.event.ts new file mode 100644 index 000000000..a28a9afa4 --- /dev/null +++ b/platform/src/domain/models/model.event.ts @@ -0,0 +1,38 @@ +export type ModelId = string; + +const ModelLoadingEvents = [ + 'starting', + 'stopping', + 'started', + 'stopped', + 'starting-failed', + 'stopping-failed', + 'model-downloaded', + 'model-downloaded-failed', + 'model-deleted', +] as const; +export type ModelLoadingEvent = (typeof ModelLoadingEvents)[number]; + +const AllModelStates = ['starting', 'stopping', 'started'] as const; +export type ModelState = (typeof AllModelStates)[number]; + +export interface ModelStatus { + model: ModelId; + status: ModelState; + metadata: Record; +} + +export interface ModelEvent { + model: ModelId; + event: ModelLoadingEvent; + metadata: Record; +} + +export const EmptyModelEvent = {}; + +export interface ModelStatusAndEvent { + data: { + status: Record; + event: ModelEvent | typeof EmptyModelEvent; + }; +} diff --git a/platform/src/domain/models/model.interface.ts b/platform/src/domain/models/model.interface.ts new file mode 100644 index 000000000..51861b768 --- /dev/null +++ b/platform/src/domain/models/model.interface.ts @@ -0,0 +1,186 @@ +import { Cortex } from '@cortexso/cortex.js'; + +export interface Model + extends Cortex.Model, + ModelSettingParams, + ModelRuntimeParams { + /** + * Model identifier. + */ + model: string; + + /** + * GGUF metadata: general.name + */ + name?: string; + + /** + * GGUF metadata: version + */ + version?: string; + + /** + * The model download source. It can be an external url or a local filepath. + */ + files: string[] | ModelArtifact; + + metadata?: Record; +} + +/** + * The available model settings. + */ +export interface ModelSettingParams { + /** + * The context length for model operations varies; the maximum depends on the specific model used. + */ + ctx_len?: number; + + /** + * The number of layers to load onto the GPU for acceleration. + */ + ngl?: number; + + /** + * Support embedding or not (legacy) + */ + embedding?: boolean; + + /** + * Number of parallel sequences to decode + */ + n_parallel?: number; + + /** + * Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system) + */ + cpu_threads?: number; + + /** + * GGUF metadata: tokenizer.chat_template + */ + prompt_template?: string; + system_prompt?: string; + ai_prompt?: string; + user_prompt?: string; + llama_model_path?: string; + mmproj?: string; + cont_batching?: boolean; + + /** + * The model engine. + */ + engine?: string; + + /** + * The prompt to use for internal configuration + */ + pre_prompt?: string; + + /** + * The batch size for prompt eval step + */ + n_batch?: number; + + /** + * To enable prompt caching or not + */ + caching_enabled?: boolean; + + /** + * Group attention factor in self-extend + */ + grp_attn_n?: number; + + /** + * Group attention width in self-extend + */ + grp_attn_w?: number; + + /** + * Prevent system swapping of the model to disk in macOS + */ + mlock?: boolean; + + /** + * You can constrain the sampling using GBNF grammars by providing path to a grammar file + */ + grammar_file?: string; + + /** + * To enable Flash Attention, default is true + */ + flash_attn?: boolean; + + /** + * KV cache type: f16, q8_0, q4_0, default is f16 + */ + cache_type?: string; + + /** + * To enable mmap, default is true + */ + use_mmap?: boolean; + + /** + * Model type we want to use: llm or embedding, default value is llm (latest llama.cpp update) + */ + model_type?: string; + + /** + * The model path. + */ + model_path?: string; +} + +/** + * The available model runtime parameters. + */ +export interface ModelRuntimeParams { + /** + * Controls the randomness of the model’s output. + */ + temperature?: number; + token_limit?: number; + top_k?: number; + + /** + * Set probability threshold for more relevant outputs. + */ + top_p?: number; + + /** + * Enable real-time data processing for faster predictions. + */ + stream?: boolean; + + /* + * The maximum number of tokens the model will generate in a single response. + */ + max_tokens?: number; + + /** + * Defines specific tokens or phrases at which the model will stop generating further output. + */ + stop?: string[]; + + /** + * Adjusts the likelihood of the model repeating words or phrases in its output. + */ + frequency_penalty?: number; + + /** + * Influences the generation of new and varied concepts in the model’s output. + */ + presence_penalty?: number; +} + +/** + * The model artifact object. + * In-case the model files is not a raw file list + */ +export interface ModelArtifact { + mmproj?: string; + llama_model_path?: string; + model_path?: string; +} diff --git a/platform/src/domain/models/prompt-template.interface.ts b/platform/src/domain/models/prompt-template.interface.ts new file mode 100644 index 000000000..a066c5ff6 --- /dev/null +++ b/platform/src/domain/models/prompt-template.interface.ts @@ -0,0 +1,6 @@ +export type PromptTemplate = { + system_prompt?: string; + ai_prompt?: string; + user_prompt?: string; + error?: string; +}; diff --git a/platform/src/domain/models/resource.interface.ts b/platform/src/domain/models/resource.interface.ts new file mode 100644 index 000000000..a048c38fd --- /dev/null +++ b/platform/src/domain/models/resource.interface.ts @@ -0,0 +1,21 @@ +export interface ResourceEvent { + data: ResourceStatus; +} + +export interface ResourceStatus { + mem: UsedMemInfo; + cpu: { + usage: number; + }; + gpus: GpuInfo[]; +} + +export interface UsedMemInfo { + total: number; + used: number; +} + +export interface GpuInfo { + name: string | undefined; + vram: UsedMemInfo; +} diff --git a/platform/src/domain/models/thread.interface.ts b/platform/src/domain/models/thread.interface.ts new file mode 100644 index 000000000..7e98a06b0 --- /dev/null +++ b/platform/src/domain/models/thread.interface.ts @@ -0,0 +1,13 @@ +import { Assistant } from './assistant.interface'; +import { Cortex } from '@cortexso/cortex.js'; + +export interface ThreadToolResources + extends Cortex.Beta.Threads.Thread.ToolResources {} + +export interface Thread extends Cortex.Beta.Threads.Thread { + title: string; + + assistants: Assistant[]; + + tool_resources: ThreadToolResources | null; +} diff --git a/platform/src/domain/repositories/extension.interface.ts b/platform/src/domain/repositories/extension.interface.ts new file mode 100644 index 000000000..5c453af86 --- /dev/null +++ b/platform/src/domain/repositories/extension.interface.ts @@ -0,0 +1,4 @@ +import { Extension } from '@/domain/abstracts/extension.abstract'; +import { Repository } from './repository.interface'; + +export abstract class ExtensionRepository extends Repository {} diff --git a/platform/src/domain/repositories/model.interface.ts b/platform/src/domain/repositories/model.interface.ts new file mode 100644 index 000000000..193af9a52 --- /dev/null +++ b/platform/src/domain/repositories/model.interface.ts @@ -0,0 +1,10 @@ +import { Model } from '@/domain/models/model.interface'; +import { Repository } from './repository.interface'; + +export abstract class ModelRepository extends Repository { + abstract loadModelByFile( + modelId: string, + filePath: string, + modelFile: string, + ): Promise; +} diff --git a/platform/src/domain/repositories/repository.interface.ts b/platform/src/domain/repositories/repository.interface.ts new file mode 100644 index 000000000..33eb18581 --- /dev/null +++ b/platform/src/domain/repositories/repository.interface.ts @@ -0,0 +1,11 @@ +export abstract class Repository { + abstract create(object: Partial): Promise; + + abstract findAll(): Promise; + + abstract findOne(id: string): Promise; + + abstract update(id: string, object: Partial): Promise; + + abstract remove(id: string): Promise; +} diff --git a/platform/src/domain/repositories/telemetry.interface.ts b/platform/src/domain/repositories/telemetry.interface.ts new file mode 100644 index 000000000..b5c82c89e --- /dev/null +++ b/platform/src/domain/repositories/telemetry.interface.ts @@ -0,0 +1,50 @@ +import { ModelStat } from '@/infrastructure/commanders/types/model-stat.interface'; +import { + BenchmarkHardware, + CrashReportAttributes, + EventAttributes, + Telemetry, + TelemetryAnonymized, + TelemetrySource, +} from '../telemetry/telemetry.interface'; + +export abstract class TelemetryRepository { + abstract readCrashReports( + callback: (Telemetry: Telemetry) => void, + ): Promise; + + abstract createCrashReport( + crashReport: CrashReportAttributes, + source?: TelemetrySource, + ): Promise; + + abstract getLastCrashReport(): Promise; + + abstract markLastCrashReportAsSent(): Promise; + + abstract sendTelemetryToOTelCollector( + endpoint: string, + telemetry: Telemetry, + ): Promise; + + abstract sendTelemetryToServer( + telemetryEvent: Telemetry['event'], + ): Promise; + + abstract sendEvent( + events: EventAttributes[], + source: TelemetrySource, + ): Promise; + + abstract getAnonymizedData(): Promise; + + abstract updateAnonymousData(data: TelemetryAnonymized): Promise; + + abstract sendBenchmarkToServer(data: { + hardware: BenchmarkHardware; + results: any; + metrics: any; + model: any; + sessionId: string; + }): Promise; +} diff --git a/platform/src/domain/telemetry/telemetry.interface.ts b/platform/src/domain/telemetry/telemetry.interface.ts new file mode 100644 index 000000000..3c400b1e6 --- /dev/null +++ b/platform/src/domain/telemetry/telemetry.interface.ts @@ -0,0 +1,115 @@ +// TODO. move openTelemetry interfaces to dto +export interface TelemetryResource { + osName: string; + osVersion: string; + architecture: string; + appVersion: string; + 'service.name'?: string; + source?: TelemetrySource; + cpu?: string; + gpus?: string; + sessionId?: string; +} + +export interface Resource { + attributes: Attribute[]; +} + +export enum TelemetrySource { + CLI = 'cli', + CORTEX_SERVER = 'cortex-server', + CORTEX_CPP = 'cortex-cpp', +} + +export interface TelemetryEvent { + resource: Resource; + scopeLogs: ScopeLog[]; +} + +interface StringValue { + stringValue: string; +} + +interface ObjectValue { + kvlist_value: { + values: { + key: string; + value: StringValue; + }[]; + }; +} + +export interface Attribute { + key: string; + value: StringValue | ObjectValue; +} + +interface ScopeLog { + scope: object; + logRecords: TelemetryLog[]; +} + +export interface TelemetryLog { + traceId: string; + timeUnixNano?: string; + endTimeUnixNano?: string; + severityText: string; + body: { + stringValue: string; + }; + attributes: Attribute[]; +} + +export interface CrashReportPayload { + modelId?: string; + endpoint?: string; + command?: string; +} + +export interface CrashReportAttributes { + stack?: string; + message: string; + payload: CrashReportPayload; + sessionId?: string; +} + +export interface TelemetryEventMetadata { + createdAt: string; + sentAt: string | null; +} + +export interface Telemetry { + metadata: TelemetryEventMetadata; + event: { + resourceLogs: TelemetryEvent[]; + }; +} + +export enum EventName { + INIT = 'init', + DOWNLOAD_MODEL = 'download-model', + CHAT = 'chat', + ACTIVATE = 'activate', + NEW_ACTIVATE = 'new_activate', +} + +export interface EventAttributes { + name: string; + modelId?: string; + sessionId?: string; +} + +export interface TelemetryAnonymized { + sessionId: string | null; + lastActiveAt?: string | null; +} + +export interface BenchmarkHardware { + gpu: any[]; + cpu: any; + board: any; + disk: any[]; + chassis: any; + memLayout: any[]; + os: any; +} diff --git a/platform/src/extensions/anthropic.engine.ts b/platform/src/extensions/anthropic.engine.ts new file mode 100644 index 000000000..6875d72b5 --- /dev/null +++ b/platform/src/extensions/anthropic.engine.ts @@ -0,0 +1,113 @@ +import stream from 'stream'; +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { pick } from 'lodash'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class AnthropicEngineExtension extends OAIEngineExtension { + apiUrl = 'https://api.anthropic.com/v1/messages'; + name = 'anthropic'; + productName = 'Anthropic Inference Engine'; + description = 'This extension enables Anthropic chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + + override async inference( + dto: any, + headers: Record, + ): Promise { + headers['x-api-key'] = this.apiKey as string; + headers['Content-Type'] = 'application/json'; + headers['anthropic-version'] = '2023-06-01'; + return super.inference(dto, headers); + } + + transformPayload = (data: any): any => { + const system = data.messages.find((m: any) => m.role === 'system'); + const messages = data.messages.filter((m: any) => m.role !== 'system'); + return { + system: system?.content ?? '', + messages, + ...pick(data, ['model', 'stream', 'max_tokens']), + }; + }; + + transformResponse = (data: any) => { + // handling stream response + if (typeof data === 'string' && data.trim().length === 0) { + return ''; + } + if (typeof data === 'string' && data.startsWith('event: ')) { + return ''; + } + if (typeof data === 'string' && data.startsWith('data: ')) { + data = data.replace('data: ', ''); + const parsedData = JSON.parse(data); + if (parsedData.type !== 'content_block_delta') { + return ''; + } + const text = parsedData.delta?.text; + //convert to have this format data.choices[0]?.delta?.content + return JSON.stringify({ + choices: [ + { + delta: { + content: text, + }, + }, + ], + }); + } + // non-stream response + if (data.content && data.content.length > 0 && data.content[0].text) { + return { + choices: [ + { + message: { + content: data.content[0].text, + }, + }, + ], + }; + } + + console.error('Invalid response format:', data); + return ''; + }; +} diff --git a/platform/src/extensions/cohere.engine.ts b/platform/src/extensions/cohere.engine.ts new file mode 100644 index 000000000..9c719c05c --- /dev/null +++ b/platform/src/extensions/cohere.engine.ts @@ -0,0 +1,122 @@ +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; +import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; + +enum RoleType { + user = 'USER', + chatbot = 'CHATBOT', + system = 'SYSTEM', +} + +type CoherePayloadType = { + chat_history?: Array<{ role: RoleType; message: string }>; + message?: string; + preamble?: string; +}; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class CoHereEngineExtension extends OAIEngineExtension { + apiUrl = 'https://api.cohere.ai/v1/chat'; + name = 'cohere'; + productName = 'Cohere Inference Engine'; + description = 'This extension enables Cohere chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + + transformPayload = (payload: any): CoherePayloadType => { + console.log('payload', payload); + if (payload.messages.length === 0) { + return {}; + } + + const { messages, ...params } = payload; + const convertedData: CoherePayloadType = { + ...params, + chat_history: [], + message: '', + }; + (messages as ChatCompletionMessage[]).forEach( + (item: ChatCompletionMessage, index: number) => { + // Assign the message of the last item to the `message` property + if (index === messages.length - 1) { + convertedData.message = item.content as string; + return; + } + if (item.role === 'user') { + convertedData.chat_history!.push({ + role: 'USER' as RoleType, + message: item.content as string, + }); + } else if (item.role === 'assistant') { + convertedData.chat_history!.push({ + role: 'CHATBOT' as RoleType, + message: item.content as string, + }); + } else if (item.role === 'system') { + convertedData.preamble = item.content as string; + } + }, + ); + return convertedData; + }; + + transformResponse = (data: any, stream: boolean) => { + const text = + typeof data === 'object' ? data.text : (JSON.parse(data).text ?? ''); + return stream + ? JSON.stringify({ + choices: [ + { + delta: { + content: text, + }, + }, + ], + }) + : { + choices: [ + { + message: { + content: text, + }, + }, + ], + }; + }; +} diff --git a/platform/src/extensions/extensions.module.ts b/platform/src/extensions/extensions.module.ts new file mode 100644 index 000000000..690a739f5 --- /dev/null +++ b/platform/src/extensions/extensions.module.ts @@ -0,0 +1,40 @@ +import { Module } from '@nestjs/common'; +import GroqEngineExtension from './groq.engine'; +import MistralEngineExtension from './mistral.engine'; +import OpenAIEngineExtension from './openai.engine'; +import { HttpModule, HttpService } from '@nestjs/axios'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { ConfigsModule } from '@/usecases/configs/configs.module'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import AnthropicEngineExtension from './anthropic.engine'; +import OpenRouterEngineExtension from './openrouter.engine'; +import CoHereEngineExtension from './cohere.engine'; +import MartianEngineExtension from './martian.engine'; +import NvidiaEngineExtension from './nvidia.engine'; + +const provider = { + provide: 'EXTENSIONS_PROVIDER', + inject: [HttpService, ConfigsUsecases, EventEmitter2], + useFactory: ( + httpService: HttpService, + configUsecases: ConfigsUsecases, + eventEmitter: EventEmitter2, + ) => [ + new OpenAIEngineExtension(httpService, configUsecases, eventEmitter), + new GroqEngineExtension(httpService, configUsecases, eventEmitter), + new MistralEngineExtension(httpService, configUsecases, eventEmitter), + new AnthropicEngineExtension(httpService, configUsecases, eventEmitter), + new OpenRouterEngineExtension(httpService, configUsecases, eventEmitter), + new CoHereEngineExtension(httpService, configUsecases, eventEmitter), + new MartianEngineExtension(httpService, configUsecases, eventEmitter), + new NvidiaEngineExtension(httpService, configUsecases, eventEmitter), + ], +}; + +@Module({ + imports: [HttpModule, ConfigsModule], + controllers: [], + providers: [provider], + exports: [provider], +}) +export class ExtensionsModule {} diff --git a/platform/src/extensions/groq.engine.ts b/platform/src/extensions/groq.engine.ts new file mode 100644 index 000000000..c9835f8f8 --- /dev/null +++ b/platform/src/extensions/groq.engine.ts @@ -0,0 +1,49 @@ +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class GroqEngineExtension extends OAIEngineExtension { + apiUrl = 'https://api.groq.com/openai/v1/chat/completions'; + name = 'groq'; + productName = 'Groq Inference Engine'; + description = 'This extension enables fast Groq chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } +} diff --git a/platform/src/extensions/martian.engine.ts b/platform/src/extensions/martian.engine.ts new file mode 100644 index 000000000..18ac19d8f --- /dev/null +++ b/platform/src/extensions/martian.engine.ts @@ -0,0 +1,49 @@ +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class MartianEngineExtension extends OAIEngineExtension { + apiUrl = 'https://withmartian.com/api/openai/v1/chat/completions'; + name = 'martian'; + productName = 'Martian Inference Engine'; + description = 'This extension enables Martian chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } +} diff --git a/platform/src/extensions/mistral.engine.ts b/platform/src/extensions/mistral.engine.ts new file mode 100644 index 000000000..10d1725f0 --- /dev/null +++ b/platform/src/extensions/mistral.engine.ts @@ -0,0 +1,48 @@ +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class MistralEngineExtension extends OAIEngineExtension { + apiUrl = 'https://api.mistral.ai/v1/chat/completions'; + name = 'mistral'; + productName = 'Mistral Inference Engine'; + description = 'This extension enables Mistral chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } +} diff --git a/platform/src/extensions/nvidia.engine.ts b/platform/src/extensions/nvidia.engine.ts new file mode 100644 index 000000000..caec1a09c --- /dev/null +++ b/platform/src/extensions/nvidia.engine.ts @@ -0,0 +1,49 @@ +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class NvidiaEngineExtension extends OAIEngineExtension { + apiUrl = 'https://integrate.api.nvidia.com/v1/chat/completions'; + name = 'nvidia'; + productName = 'Nvidia Inference Engine'; + description = 'This extension enables Nvidia chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } +} diff --git a/platform/src/extensions/openai.engine.ts b/platform/src/extensions/openai.engine.ts new file mode 100644 index 000000000..e1ab01455 --- /dev/null +++ b/platform/src/extensions/openai.engine.ts @@ -0,0 +1,48 @@ +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class OpenAIEngineExtension extends OAIEngineExtension { + apiUrl = 'https://api.openai.com/v1/chat/completions'; + name = 'openai'; + productName = 'OpenAI Inference Engine'; + description = 'This extension enables OpenAI chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } +} diff --git a/platform/src/extensions/openrouter.engine.ts b/platform/src/extensions/openrouter.engine.ts new file mode 100644 index 000000000..95bd1b8b6 --- /dev/null +++ b/platform/src/extensions/openrouter.engine.ts @@ -0,0 +1,55 @@ +import { HttpService } from '@nestjs/axios'; +import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; +import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class OpenRouterEngineExtension extends OAIEngineExtension { + apiUrl = 'https://openrouter.ai/api/v1/chat/completions'; + name = 'openrouter'; + productName = 'OpenRouter Inference Engine'; + description = 'This extension enables OpenRouter chat completion API calls'; + version = '0.0.1'; + apiKey?: string; + + constructor( + protected readonly httpService: HttpService, + protected readonly configsUsecases: ConfigsUsecases, + protected readonly eventEmmitter: EventEmitter2, + ) { + super(httpService); + + eventEmmitter.on('config.updated', async (data) => { + if (data.engine === this.name) { + this.apiKey = data.value; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + }); + } + + async onLoad() { + const configs = (await this.configsUsecases.getGroupConfigs( + this.name, + )) as unknown as { apiKey: string }; + this.apiKey = configs?.apiKey; + this.status = + (this.apiKey?.length ?? 0) > 0 + ? EngineStatus.READY + : EngineStatus.MISSING_CONFIGURATION; + } + + transformPayload = (data: any): any => { + return { + ...data, + model: 'openrouter/auto', + }; + }; +} diff --git a/platform/src/index.ts b/platform/src/index.ts new file mode 100644 index 000000000..092876409 --- /dev/null +++ b/platform/src/index.ts @@ -0,0 +1,103 @@ +import { + CORTEX_JS_SYSTEM_URL, + defaultCortexCppPort, + defaultCortexJsHost, + defaultCortexJsPort, +} from '@/infrastructure/constants/cortex'; +import { getApp } from './app'; +import { fileManagerService } from './infrastructure/services/file-manager/file-manager.service'; +import { CortexUsecases } from './usecases/cortex/cortex.usecases'; + +let host: string; +let port: number; +let enginePort: number; + +/** + * Start the API server + */ +export async function start( + name?: string, + address?: string, + portNumber?: number, + enginePortNumber?: number, + dataFolder?: string, +) { + if (name) { + fileManagerService.setConfigProfile(name); + const isProfileConfigExists = fileManagerService.profileConfigExists(name); + if (!isProfileConfigExists) { + await fileManagerService.writeConfigFile({ + ...fileManagerService.defaultConfig(), + apiServerHost: address || defaultCortexJsHost, + apiServerPort: port || defaultCortexJsPort, + cortexCppPort: Number(enginePort) || defaultCortexCppPort, + }); + } + } + const { + apiServerHost: configApiServerHost, + apiServerPort: configApiServerPort, + cortexCppPort: configCortexCppPort, + } = await fileManagerService.getConfig(); + + host = address || configApiServerHost || defaultCortexJsHost; + port = portNumber || configApiServerPort || defaultCortexJsPort; + if (host === 'localhost') { + host = '127.0.0.1'; + } + enginePort = + Number(enginePortNumber) || configCortexCppPort || defaultCortexCppPort; + const dataFolderPath = dataFolder; + + return startServer(dataFolderPath); +} + +async function startServer(dataFolderPath?: string) { + const config = await fileManagerService.getConfig(); + try { + if (dataFolderPath) { + await fileManagerService.writeConfigFile({ + ...config, + dataFolderPath, + }); + // load config again to create the data folder + await fileManagerService.getConfig(dataFolderPath); + } + const app = await getApp(host, port); + const cortexUsecases = await app.resolve(CortexUsecases); + await cortexUsecases.startCortex().catch((e) => { + throw e; + }); + const isServerOnline = await cortexUsecases.isAPIServerOnline(); + if (isServerOnline) { + console.log( + `Server is already running at http://${host}:${port}. Please use 'cortex stop' to stop the server.`, + ); + } + await app.listen(port, host); + await fileManagerService.writeConfigFile({ + ...config, + apiServerHost: host, + apiServerPort: port, + dataFolderPath: dataFolderPath || config.dataFolderPath, + cortexCppPort: enginePort, + }); + return app; + } catch (e) { + console.error(e); + // revert the data folder path if it was set + await fileManagerService.writeConfigFile({ + ...config, + }); + console.error(`Failed to start server. Is port ${port} in use?`); + } +} +/** + * Stop the API server + * @returns + */ +export async function stop(host?: string, port?: number) { + return fetch(CORTEX_JS_SYSTEM_URL(host, port), { + method: 'DELETE', + }); +} diff --git a/platform/src/infrastructure/commanders/base.command.ts b/platform/src/infrastructure/commanders/base.command.ts new file mode 100644 index 000000000..e6b23463f --- /dev/null +++ b/platform/src/infrastructure/commanders/base.command.ts @@ -0,0 +1,39 @@ +import { CommandRunner } from 'nest-commander'; +import { Injectable } from '@nestjs/common'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import ora from 'ora'; +import { CortexClient } from './services/cortex.client'; + +@Injectable() +export abstract class BaseCommand extends CommandRunner { + cortex: CortexClient; + constructor(readonly cortexUseCases: CortexUsecases) { + super(); + } + protected abstract runCommand( + passedParam: string[], + options?: Record, + ): Promise; + + async run( + passedParam: string[], + options?: Record, + ): Promise { + const checkingSpinner = ora('Checking API server online...').start(); + const result = await this.cortexUseCases.isAPIServerOnline(); + if (!result) { + checkingSpinner.fail( + 'API server is offline. Please run "cortex" before running this command', + ); + process.exit(1); + } + checkingSpinner.succeed('API server is online'); + if (!this.cortex) { + this.cortex = new CortexClient(); + } + await this.runCommand(passedParam, options); + } + protected setCortex(cortex: CortexClient) { + this.cortex = cortex; + } +} diff --git a/platform/src/infrastructure/commanders/benchmark.command.ts b/platform/src/infrastructure/commanders/benchmark.command.ts new file mode 100644 index 000000000..2820ded7e --- /dev/null +++ b/platform/src/infrastructure/commanders/benchmark.command.ts @@ -0,0 +1,338 @@ +import { SubCommand, Option } from 'nest-commander'; +import { + BenchmarkConfig, + ParametersConfig, +} from './types/benchmark-config.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from './base.command'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import yaml from 'js-yaml'; +import si from 'systeminformation'; +import { Presets, SingleBar } from 'cli-progress'; +import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; +import { BenchmarkHardware } from '@/domain/telemetry/telemetry.interface'; +import { defaultBenchmarkConfiguration } from '../constants/benchmark'; +import { inspect } from 'util'; +import { Cortex } from '@cortexso/cortex.js'; +import { fileManagerService } from '../services/file-manager/file-manager.service'; + +@SubCommand({ + name: 'benchmark', + arguments: '[model_id]', + argsDescription: { + model_id: 'Model to benchmark with', + }, + description: + 'Benchmark and analyze the performance of a specific AI model using a variety of system resources', +}) +export class BenchmarkCommand extends BaseCommand { + constructor( + readonly cortexUsecases: CortexUsecases, + private readonly telemetryUsecases: TelemetryUsecases, + ) { + super(cortexUsecases); + } + + async runCommand( + passedParams: string[], + options?: Partial, + ): Promise { + return this.benchmark( + options ?? {}, + passedParams[0] + ? ({ model: passedParams[0] } as ParametersConfig) + : undefined, + ); + } + + @Option({ + flags: '-n, --num_rounds ', + description: 'Number of rounds to run the benchmark', + }) + parseRounds(value: number) { + return value; + } + + @Option({ + flags: '-c, --concurrency ', + description: 'Number of concurrent requests to run the benchmark', + }) + parseConcurrency(value: number) { + return value; + } + + @Option({ + flags: '-o, --output ', + description: 'Output format for the benchmark results. json or table', + }) + parseOutput(value: string) { + return value; + } + + config: BenchmarkConfig; + /** + * Benchmark and analyze the performance of a specific AI model using a variety of system resources + */ + async benchmark( + options: Partial, + params?: ParametersConfig, + ) { + return this.getBenchmarkConfig() + .then(async (config) => { + this.config = { + ...config, + ...options, + }; + + const modelId = params?.model ?? this.config.api.parameters.model; + + // TODO: Do we really need to submit these data? or just model information is enough? + const model = await this.cortex.models.retrieve(modelId); + + if (model) { + await this.cortex.models.start(modelId); + await this.runBenchmarks(model); + } + + process.exit(0); + }) + .catch((error) => console.log(error.message ?? error)); + } + + /** + * Get the benchmark configuration + * @returns the benchmark configuration + */ + private async getBenchmarkConfig() { + const benchmarkFolder = await fileManagerService.getBenchmarkPath(); + const configurationPath = join(benchmarkFolder, 'config.yaml'); + if (existsSync(configurationPath)) { + return yaml.load( + readFileSync(configurationPath, 'utf8'), + ) as BenchmarkConfig; + } else { + const config = yaml.dump(defaultBenchmarkConfiguration); + if (!existsSync(benchmarkFolder)) { + mkdirSync(benchmarkFolder, { + recursive: true, + }); + } + await writeFileSync(configurationPath, config, 'utf8'); + return defaultBenchmarkConfiguration; + } + } + + /** + * Get the system resources for benchmarking + * using the systeminformation library + * @returns the system resources + */ + private async getSystemResources(): Promise< + BenchmarkHardware & { + cpuLoad: any; + mem: any; + } + > { + return { + cpuLoad: await si.currentLoad(), + mem: await si.mem(), + gpu: (await si.graphics()).controllers, + cpu: await si.cpu(), + board: await si.baseboard(), + disk: await si.diskLayout(), + chassis: await si.chassis(), + memLayout: await si.memLayout(), + os: await si.osInfo(), + }; + } + + /** + * Get the resource change between two data points + * @param startData the start data point + * @param endData the end data point + * @returns the resource change + */ + private async getResourceChange(startData: any, endData: any) { + return { + cpuLoad: + startData.cpuLoad && endData.cpuLoad + ? ((endData.cpuLoad.currentLoad - startData.cpuLoad.currentLoad) / + startData.cpuLoad.currentLoad) * + 100 + : null, + mem: + startData.mem && endData.mem + ? ((endData.mem.used - startData.mem.used) / startData.mem.total) * + 100 + : null, + }; + } + + /** + * Benchmark a user using the OpenAI API + * @returns + */ + private async benchmarkUser(model: Cortex.Models.Model) { + const startResources = await this.getSystemResources(); + const start = Date.now(); + let tokenCount = 0; + let firstTokenTime = null; + + try { + const stream = await this.cortex.chat.completions.create({ + model: model.id, + messages: this.config.api.parameters.messages, + max_tokens: this.config.api.parameters.max_tokens, + stream: true, + }); + + for await (const chunk of stream) { + if (!firstTokenTime && chunk.choices[0]?.delta?.content) { + firstTokenTime = Date.now(); + } + tokenCount += (chunk.choices[0]?.delta?.content || '').split( + /\s+/, + ).length; + } + } catch (error) { + console.error('Error during API call:', error); + return null; + } + + const latency = Date.now() - start; + const ttft = firstTokenTime ? firstTokenTime - start : null; + const endResources = await this.getSystemResources(); + const resourceChange = await this.getResourceChange( + startResources, + endResources, + ); + + return { + tokens: this.config.api.parameters.max_tokens, + token_length: tokenCount, // Dynamically calculated token count + latency, + resourceChange, + tpot: tokenCount ? latency / tokenCount : 0, + throughput: tokenCount / (latency / 1000), + ttft, + }; + } + + /** + * Calculate the percentiles of the data + * @param data the data to calculate percentiles for + * @param percentile the percentile to calculate + * @returns the percentile value + */ + private calculatePercentiles(data: number[], percentile: number) { + if (data.length === 0) return null; + const sorted = data + .filter((x: number) => x !== null) + .sort((a: number, b: number) => a - b); + const pos = (percentile / 100) * sorted.length; + if (pos < 1) return sorted[0]; + if (pos >= sorted.length) return sorted[sorted.length - 1]; + const lower = sorted[Math.floor(pos) - 1]; + const upper = sorted[Math.ceil(pos) - 1]; + return lower + (upper - lower) * (pos - Math.floor(pos)); + } + + /** + * Run the benchmarks + */ + private async runBenchmarks(model: Cortex.Models.Model) { + const allResults: any[] = []; + const rounds = this.config.num_rounds || 1; + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(rounds, 0); + + for (let i = 0; i < rounds; i++) { + const roundResults = []; + const hardwareBefore = await this.getSystemResources(); + + for (let j = 0; j < this.config.concurrency; j++) { + const result = await this.benchmarkUser(model); + if (result) { + roundResults.push(result); + } + } + + const hardwareAfter = await this.getSystemResources(); + const hardwareChanges = await this.getResourceChange( + hardwareBefore, + hardwareAfter, + ); + + allResults.push({ + round: i + 1, + results: roundResults, + hardwareChanges, + }); + + bar.update(i + 1); + } + + const metrics: any = { + p50: {}, + p75: {}, + p95: {}, + }; + const keys = ['latency', 'tpot', 'throughput', 'ttft']; + keys.forEach((key) => { + const data = allResults.flatMap((r) => + r.results.map((res: object) => res[key as keyof typeof res]), + ); + metrics.p50[key] = this.calculatePercentiles(data, 50); + metrics.p75[key] = this.calculatePercentiles(data, 75); + metrics.p95[key] = this.calculatePercentiles(data, 95); + }); + + const output = { + hardware: await this.getSystemResources(), + results: allResults, + metrics, + model, + }; + bar.stop(); + + const outputFilePath = join( + await fileManagerService.getBenchmarkPath(), + 'output.json', + ); + await this.telemetryUsecases.sendBenchmarkEvent({ + hardware: { + cpu: output.hardware.cpu, + gpu: output.hardware.gpu, + memLayout: output.hardware.memLayout, + board: output.hardware.board, + disk: output.hardware.disk, + chassis: output.hardware.chassis, + os: output.hardware.os, + }, + results: output.results, + metrics: output.metrics, + model, + }); + writeFileSync(outputFilePath, JSON.stringify(output, null, 2)); + console.log(`Benchmark results and metrics saved to ${outputFilePath}`); + + if (this.config.output === 'table') { + console.log('Results:'); + output.results.forEach((round) => { + console.log('Round ' + round.round + ':'); + console.table(round.results); + }); + console.log('Metrics:'); + console.table(output.metrics); + } else + console.log( + inspect(output, { + showHidden: false, + depth: null, + colors: true, + }), + ); + } +} diff --git a/platform/src/infrastructure/commanders/chat.command.ts b/platform/src/infrastructure/commanders/chat.command.ts new file mode 100644 index 000000000..35b2ef5ba --- /dev/null +++ b/platform/src/infrastructure/commanders/chat.command.ts @@ -0,0 +1,178 @@ +import { existsSync } from 'fs'; +import { SubCommand, Option, InquirerService } from 'nest-commander'; +import { exit } from 'node:process'; +import { SetCommandContext } from './decorators/CommandContext'; +import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; +import { + EventName, + TelemetrySource, +} from '@/domain/telemetry/telemetry.interface'; +import { ContextService } from '../services/context/context.service'; +import { BaseCommand } from './base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { Engines } from './types/engine.interface'; +import { join } from 'path'; +import { fileManagerService } from '../services/file-manager/file-manager.service'; +import { isRemoteEngine } from '@/utils/normalize-model-id'; +import { Cortex } from '@cortexso/cortex.js'; +import { ChatClient } from './services/chat-client'; +import { downloadProgress } from '@/utils/download-progress'; +import { DownloadType } from '@/domain/models/download.interface'; +import { checkRequiredVersion } from '@/utils/model-check'; + +type ChatOptions = { + threadId?: string; + message?: string; + attach: boolean; + preset?: string; +}; + +@SubCommand({ + name: 'chat', + description: 'Send a chat request to a model', + arguments: '[model_id] [message]', + argsDescription: { + model_id: + 'Model ID to chat with. If there is no model_id provided, it will prompt to select from running models.', + message: 'Message to send to the model', + }, +}) +@SetCommandContext() +export class ChatCommand extends BaseCommand { + chatClient: ChatClient; + + constructor( + private readonly inquirerService: InquirerService, + private readonly telemetryUsecases: TelemetryUsecases, + protected readonly cortexUsecases: CortexUsecases, + protected readonly contextService: ContextService, + ) { + super(cortexUsecases); + } + + async runCommand( + passedParams: string[], + options: ChatOptions, + ): Promise { + this.chatClient = new ChatClient(this.cortex); + let modelId = passedParams[0]; + // First attempt to get message from input or options + // Extract input from 1 to end of array + let message = options.message ?? passedParams.slice(1).join(' '); + + // Check for model existing + if (!modelId || !(await this.cortex.models.retrieve(modelId))) { + // Model ID is not provided + // first input might be message input + message = passedParams.length + ? passedParams.join(' ') + : (options.message ?? ''); + // If model ID is not provided, prompt user to select from running models + const { data: models } = await this.cortex.models.list(); + if (models.length === 1) { + modelId = models[0].id; + } else if (models.length > 0) { + modelId = await this.modelInquiry(models); + } else { + exit(1); + } + } + + const existingModel = await this.cortex.models.retrieve(modelId); + if (!existingModel) { + process.exit(1); + } + + const engine = existingModel.engine || Engines.llamaCPP; + // Pull engine if not exist + if ( + !isRemoteEngine(engine) && + !existsSync( + join(await fileManagerService.getCortexCppEnginePath(), engine), + ) + ) { + console.log('Downloading engine...'); + await this.cortex.engines.init(engine); + await downloadProgress(this.cortex, undefined, DownloadType.Engine); + } + + const { version: engineVersion } = + await this.cortex.engines.retrieve(engine); + if ( + existingModel.engine_version && + !checkRequiredVersion(existingModel.engine_version, engineVersion) + ) { + console.log( + `Model engine version ${existingModel.engine_version} is not compatible with engine version ${engineVersion}`, + ); + process.exit(1); + } + + if (!message) options.attach = true; + void this.telemetryUsecases.sendEvent( + [ + { + name: EventName.CHAT, + modelId, + }, + ], + TelemetrySource.CLI, + ); + + const preset = await fileManagerService.getPreset(options.preset); + + return this.chatClient.chat( + modelId, + options.threadId, + message, // Accept both message from inputs or arguments + preset ? preset : {}, + ); + } + + modelInquiry = async (models: Cortex.Model[]) => { + const { model } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'model', + message: 'Select a model to chat with:', + choices: models.map((e) => ({ + name: e.id, + value: e.id, + })), + }); + return model; + }; + + @Option({ + flags: '-t, --thread ', + description: 'Thread Id. If not provided, will create new thread', + }) + parseThreadId(value: string) { + return value; + } + + @Option({ + flags: '-m, --message ', + description: 'Message to send to the model', + }) + parseModelId(value: string) { + return value; + } + + @Option({ + flags: '-a, --attach', + description: 'Attach to interactive chat session', + defaultValue: false, + name: 'attach', + }) + parseAttach() { + return true; + } + + @Option({ + flags: '-p, --preset ', + description: 'Apply a chat preset to the chat session', + }) + parsePreset(value: string) { + return value; + } +} diff --git a/platform/src/infrastructure/commanders/cortex-command.commander.ts b/platform/src/infrastructure/commanders/cortex-command.commander.ts new file mode 100644 index 000000000..f03d9e242 --- /dev/null +++ b/platform/src/infrastructure/commanders/cortex-command.commander.ts @@ -0,0 +1,240 @@ +import pkg from '@/../package.json'; +import { RootCommand, CommandRunner, Option } from 'nest-commander'; +import { ChatCommand } from './chat.command'; +import { ModelsCommand } from './models.command'; +import { RunCommand } from './run.command'; +import { ModelPullCommand } from './models/model-pull.command'; +import { PSCommand } from './ps.command'; +import { PresetCommand } from './presets.command'; +import { TelemetryCommand } from './telemetry.command'; +import { SetCommandContext } from './decorators/CommandContext'; +import { EmbeddingCommand } from './embeddings.command'; +import { BenchmarkCommand } from './benchmark.command'; +import chalk from 'chalk'; +import { ContextService } from '../services/context/context.service'; +import { EnginesCommand } from './engines.command'; +import { + defaultCortexCppPort, + defaultCortexJsHost, + defaultCortexJsPort, +} from '../constants/cortex'; +import { getApp } from '@/app'; +import { fileManagerService } from '../services/file-manager/file-manager.service'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { ServeStopCommand } from './serve-stop.command'; +import ora from 'ora'; +import { EnginesSetCommand } from './engines/engines-set.command'; + +type ServeOptions = { + address?: string; + port?: number; + logs?: boolean; + dataFolder?: string; + version?: boolean; + name?: string; + configPath?: string; + enginePort?: string; +}; + +@RootCommand({ + subCommands: [ + ModelsCommand, + ChatCommand, + RunCommand, + ModelPullCommand, + PSCommand, + PresetCommand, + TelemetryCommand, + EmbeddingCommand, + BenchmarkCommand, + EnginesCommand, + ServeStopCommand, + EnginesSetCommand, + ], + description: 'Cortex CLI', +}) +@SetCommandContext() +export class CortexCommand extends CommandRunner { + host: string; + port: number; + configHost: string; + configPort: number; + enginePort: number; + constructor( + readonly contextService: ContextService, + readonly cortexUseCases: CortexUsecases, + ) { + super(); + } + + async run(passedParams: string[], options?: ServeOptions): Promise { + if (options?.configPath) { + fileManagerService.setConfigPath(options.configPath); + } + if (options?.name) { + fileManagerService.setConfigProfile(options.name); + } + if (options?.name) { + const isProfileConfigExists = fileManagerService.profileConfigExists( + options.name, + ); + if (!isProfileConfigExists) { + await fileManagerService.writeConfigFile({ + ...fileManagerService.defaultConfig(), + apiServerHost: options?.address || defaultCortexJsHost, + apiServerPort: options?.port || defaultCortexJsPort, + cortexCppPort: Number(options?.enginePort) || defaultCortexCppPort, + }); + } + } + const { + apiServerHost: configApiServerHost, + apiServerPort: configApiServerPort, + cortexCppPort: configCortexCppPort, + } = await fileManagerService.getConfig(); + + this.configHost = configApiServerHost || defaultCortexJsHost; + this.configPort = configApiServerPort || defaultCortexJsPort; + + this.host = options?.address || configApiServerHost || defaultCortexJsHost; + this.port = options?.port || configApiServerPort || defaultCortexJsPort; + if (this.host === 'localhost') { + this.host = '127.0.0.1'; + } + this.enginePort = + Number(options?.enginePort) || + configCortexCppPort || + defaultCortexCppPort; + const showLogs = options?.logs || false; + const showVersion = options?.version || false; + const dataFolderPath = options?.dataFolder; + if (showVersion) { + console.log('\n'); + console.log(`Cortex CLI - v${pkg.version}`); + console.log(chalk.blue(`Github: ${pkg.homepage}`)); + return; + } + return this.startServer(showLogs, dataFolderPath); + } + + private async startServer(attach: boolean, dataFolderPath?: string) { + const config = await fileManagerService.getConfig(); + try { + const startEngineSpinner = ora('Starting Cortex engine...'); + await this.cortexUseCases.startCortex().catch((e) => { + startEngineSpinner.fail('Failed to start Cortex engine'); + throw e; + }); + startEngineSpinner.succeed('Cortex started successfully'); + const isServerOnline = await this.cortexUseCases.isAPIServerOnline(); + if (isServerOnline) { + console.log( + `Server is already running at http://${this.configHost}:${this.configPort}. Please use 'cortex stop' to stop the server.`, + ); + process.exit(0); + } + if (dataFolderPath) { + await fileManagerService.writeConfigFile({ + ...config, + dataFolderPath, + }); + // load config again to create the data folder + await fileManagerService.getConfig(dataFolderPath); + } + if (attach) { + const app = await getApp(this.host, this.port); + await app.listen(this.port, this.host); + } else { + await this.cortexUseCases.startServerDetached(this.host, this.port); + } + console.log( + chalk.blue(`Started server at http://${this.host}:${this.port}`), + ); + console.log( + chalk.blue( + `API Playground available at http://${this.host}:${this.port}/api`, + ), + ); + await fileManagerService.writeConfigFile({ + ...config, + apiServerHost: this.host, + apiServerPort: this.port, + dataFolderPath: dataFolderPath || config.dataFolderPath, + cortexCppPort: this.enginePort, + }); + if (!attach) process.exit(0); + } catch (e) { + console.error(e); + // revert the data folder path if it was set + await fileManagerService.writeConfigFile({ + ...config, + }); + console.error(`Failed to start server. Is port ${this.port} in use?`); + process.exit(1); + } + } + + @Option({ + flags: '-a, --address
', + description: 'Address to use', + }) + parseHost(value: string) { + return value; + } + + @Option({ + flags: '-p, --port ', + description: 'Port to serve the application', + }) + parsePort(value: string) { + return parseInt(value, 10); + } + + @Option({ + flags: '-l, --logs', + description: 'Show logs', + }) + parseLogs() { + return true; + } + + @Option({ + flags: '-df, --dataFolder ', + description: 'Set the data folder directory', + }) + parseDataFolder(value: string) { + return value; + } + + @Option({ + flags: '-cp, --configPath ', + description: 'Set the config folder directory', + }) + parseConfigFolder(value: string) { + return value; + } + + @Option({ + flags: '-v, --version', + description: 'Show version', + }) + parseVersion() { + return true; + } + + @Option({ + flags: '-n, --name ', + description: 'Name of the process', + }) + parseName(value: string) { + return value; + } + + @Option({ + flags: '-ep, --engine-port ', + description: 'Port to serve the engine', + }) + parseEnginePort(value: string) { + return value; + } +} diff --git a/platform/src/infrastructure/commanders/decorators/CommandContext.ts b/platform/src/infrastructure/commanders/decorators/CommandContext.ts new file mode 100644 index 000000000..04033c7cd --- /dev/null +++ b/platform/src/infrastructure/commanders/decorators/CommandContext.ts @@ -0,0 +1,34 @@ +type Constructor = new (...args: any[]) => any; + +export const SetCommandContext = () => { + return (constructor: Constructor) => { + const classMethods = Object.getOwnPropertyNames(constructor.prototype); + + classMethods.forEach((methodName) => { + if (methodName !== 'run') return; + const originalMethod = constructor.prototype[methodName]; + if (typeof originalMethod === 'function') { + constructor.prototype[methodName] = async function (...args: any[]) { + if (this.contextService) { + const parentCommandName = this.command.parent + ? this.command.parent.name() + : ''; + const comamndName = this.command.name(); + this.contextService.set( + 'command', + `${parentCommandName} ${comamndName}`, + ); + if (parentCommandName === 'models') { + const modelId = args[0]?.[0]; + if (modelId) { + this.contextService.set('model', modelId); + } + } + } + const result = await originalMethod.apply(this, args); + return result; + }; + } + }); + }; +}; diff --git a/platform/src/infrastructure/commanders/embeddings.command.ts b/platform/src/infrastructure/commanders/embeddings.command.ts new file mode 100644 index 000000000..bf2dc8df3 --- /dev/null +++ b/platform/src/infrastructure/commanders/embeddings.command.ts @@ -0,0 +1,107 @@ +import { InquirerService, Option, SubCommand } from 'nest-commander'; +import { inspect } from 'util'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from './base.command'; +import { Cortex } from '@cortexso/cortex.js'; +import ora from 'ora'; + +interface EmbeddingCommandOptions { + encoding_format?: string; + input?: string; + dimensions?: number; +} + +@SubCommand({ + name: 'embeddings', + arguments: '[model_id]', + description: 'Creates an embedding vector representing the input text.', + argsDescription: { + model_id: + 'Model to use for embedding. If not provided, it will prompt to select from running models.', + }, +}) +export class EmbeddingCommand extends BaseCommand { + constructor( + private readonly inquirerService: InquirerService, + readonly cortexUsecases: CortexUsecases, + ) { + super(cortexUsecases); + } + async runCommand( + passedParams: string[], + options: EmbeddingCommandOptions, + ): Promise { + let model = passedParams[0]; + const checkingSpinner = ora('Checking model...').start(); + // First attempt to get message from input or options + let input: string | string[] = options.input ?? passedParams.splice(1); + + // Check for model existing + if (!model || !(await this.cortex.models.retrieve(model))) { + // Model ID is not provided + // first input might be message input + input = passedParams ?? options.input; + // If model ID is not provided, prompt user to select from running models + const { data: models } = await this.cortex.models.list(); + if (models.length === 1) { + model = models[0].id; + } else if (models.length > 0) { + model = await this.modelInquiry(models); + } else { + checkingSpinner.fail('Model ID is required'); + process.exit(1); + } + } + checkingSpinner.succeed(`Model found`); + return this.cortex.embeddings + .create({ + input, + model, + }) + .then((res) => + inspect(res, { showHidden: false, depth: null, colors: true }), + ) + .then(console.log) + .catch((e) => console.error(e.message ?? e)); + } + + modelInquiry = async (models: Cortex.Model[]) => { + const { model } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'model', + message: 'Select model to chat with:', + choices: models.map((e) => ({ + name: e.id, + value: e.id, + })), + }); + return model; + }; + + @Option({ + flags: '-i, --input ', + description: + 'Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays.', + }) + parseInput(value: string) { + return value; + } + + @Option({ + flags: '-e, --encoding_format ', + description: + 'Encoding format for the embeddings. Supported formats are float and int.', + }) + parseEncodingFormat(value: string) { + return value; + } + + @Option({ + flags: '-d, --dimensions ', + description: + 'The number of dimensions the resulting output embeddings should have. Only supported in some models.', + }) + parseDimensionsFormat(value: string) { + return value; + } +} diff --git a/platform/src/infrastructure/commanders/engines.command.ts b/platform/src/infrastructure/commanders/engines.command.ts new file mode 100644 index 000000000..c4861f325 --- /dev/null +++ b/platform/src/infrastructure/commanders/engines.command.ts @@ -0,0 +1,87 @@ +import { invert } from 'lodash'; +import { Option, SubCommand } from 'nest-commander'; +import { SetCommandContext } from './decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { EnginesListCommand } from './engines/engines-list.command'; +import { EnginesGetCommand } from './engines/engines-get.command'; +import { EnginesInitCommand } from './engines/engines-init.command'; +import { ModuleRef } from '@nestjs/core'; +import { EngineNamesMap } from './types/engine.interface'; +import { BaseCommand } from './base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { EnginesSetCommand } from './engines/engines-set.command'; + +@SubCommand({ + name: 'engines', + subCommands: [EnginesListCommand, EnginesGetCommand, EnginesInitCommand], + description: 'Get cortex engines', + arguments: ' [subcommand]', +}) +@SetCommandContext() +export class EnginesCommand extends BaseCommand { + commandMap: { [key: string]: any } = { + list: EnginesListCommand, + get: EnginesGetCommand, + init: EnginesInitCommand, + set: EnginesSetCommand, + }; + + constructor( + readonly contextService: ContextService, + readonly moduleRef: ModuleRef, + readonly cortexUsecases: CortexUsecases, + ) { + super(cortexUsecases); + } + async runCommand(passedParams: string[], options: { vulkan: boolean }) { + const [parameter, command] = passedParams; + if (command !== 'list' && !parameter) { + console.error('Engine name is required.'); + return; + } + + // Handle the commands accordingly + const commandClass = this.commandMap[command as string]; + if (!commandClass) { + this.command?.help(); + return; + } + const engine = invert(EngineNamesMap)[parameter] || parameter; + await this.runEngineCommand( + commandClass, + [engine, ...passedParams.slice(2)], + options, + ); + } + + private async runEngineCommand( + commandClass: any, + params: string[] = [], + options?: { vulkan: boolean }, + ) { + const commandInstance = this.moduleRef.get(commandClass, { strict: false }); + if (commandInstance) { + commandInstance.setCortex(this.cortex); + await commandInstance.runCommand(params, options); + } else { + console.error('Command not found.'); + } + } + + @Option({ + flags: '-vk, --vulkan', + description: 'Install Vulkan engine', + defaultValue: false, + }) + parseVulkan() { + return true; + } + + @Option({ + flags: '-v, --version ', + description: 'Select version to install', + }) + parseVersion(value: string) { + return value; + } +} diff --git a/platform/src/infrastructure/commanders/engines/engines-get.command.ts b/platform/src/infrastructure/commanders/engines/engines-get.command.ts new file mode 100644 index 000000000..912087a6d --- /dev/null +++ b/platform/src/infrastructure/commanders/engines/engines-get.command.ts @@ -0,0 +1,36 @@ +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { EngineNamesMap, Engines } from '../types/engine.interface'; +import { BaseCommand } from '../base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@SubCommand({ + name: ' get', + description: 'Get an engine', + argsDescription: { + name: 'Engine name to get', + }, +}) +@SetCommandContext() +export class EnginesGetCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUsecases: CortexUsecases, + ) { + super(cortexUsecases); + } + + async runCommand(passedParams: string[]): Promise { + return this.cortex.engines.retrieve(passedParams[0]).then((engine) => { + if (!engine) { + console.error('Engine not found.'); + } else { + console.table({ + ...engine, + name: EngineNamesMap[engine.name as Engines] || engine.name, + }); + } + }); + } +} diff --git a/platform/src/infrastructure/commanders/engines/engines-init.command.ts b/platform/src/infrastructure/commanders/engines/engines-init.command.ts new file mode 100644 index 000000000..28d15a7f1 --- /dev/null +++ b/platform/src/infrastructure/commanders/engines/engines-init.command.ts @@ -0,0 +1,79 @@ +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { Engines } from '../types/engine.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from '../base.command'; +import { defaultInstallationOptions } from '@/utils/init'; +import { Presets, SingleBar } from 'cli-progress'; +import ora from 'ora'; +import { InitEngineDto } from '@/infrastructure/dtos/engines/engines.dto'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +@SubCommand({ + name: ' init', + description: 'Setup engine', + argsDescription: { + name: 'Engine name to setup', + }, +}) +@SetCommandContext() +export class EnginesInitCommand extends BaseCommand { + constructor( + private readonly cortexUsecases: CortexUsecases, + readonly contextService: ContextService, + ) { + super(cortexUsecases); + } + + async runCommand( + passedParams: string[], + options: InitEngineDto, + ): Promise { + const engine = passedParams[0]; + const params = passedParams.includes(Engines.llamaCPP) + ? { + ...(await defaultInstallationOptions()), + ...options, + } + : {}; + + const configs = await fileManagerService.getConfig(); + const host = configs.cortexCppHost; + const port = configs.cortexCppPort; + // Should unload engine before installing engine + const unloadCortexEngineSpinner = ora('Unloading cortex...').start(); + if (await this.cortexUsecases.healthCheck(host, port)) { + await this.cortexUsecases.unloadCortexEngine(engine as Engines); + } + + unloadCortexEngineSpinner.succeed('Cortex unloaded'); + console.log(`Installing engine ${engine}...`); + + await this.cortex.engines.init(engine, params); + const response = await this.cortex.events.downloadEvent(); + + const progressBar = new SingleBar({}, Presets.shades_classic); + progressBar.start(100, 0); + + for await (const stream of response) { + if (stream.length) { + const data = stream[0] as any; + if (data.status === 'downloaded') break; + let totalBytes = 0; + let totalTransferred = 0; + data.children.forEach((child: any) => { + totalBytes += child.size.total; + totalTransferred += child.size.transferred; + }); + progressBar.update(Math.floor((totalTransferred / totalBytes) * 100)); + } + } + progressBar.stop(); + const startCortexSpinner = ora('Starting cortex...').start(); + await this.cortexUsecases.startCortex(); + startCortexSpinner.succeed('Cortex started'); + console.log('Engine installed successfully'); + process.exit(0); + } +} diff --git a/platform/src/infrastructure/commanders/engines/engines-list.command.ts b/platform/src/infrastructure/commanders/engines/engines-list.command.ts new file mode 100644 index 000000000..25f85949f --- /dev/null +++ b/platform/src/infrastructure/commanders/engines/engines-list.command.ts @@ -0,0 +1,30 @@ +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { EngineNamesMap } from '../types/engine.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from '../base.command'; + +@SubCommand({ + name: 'list', + description: 'Get all cortex engines', +}) +@SetCommandContext() +export class EnginesListCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUseCases: CortexUsecases, + ) { + super(cortexUseCases); + } + + async runCommand(): Promise { + return this.cortex.engines.list().then(({ data: engines }) => { + const enginesTable = engines.map((engine) => ({ + ...engine, + name: EngineNamesMap[engine.name as string] || engine.name, + })); + console.table(enginesTable); + }); + } +} diff --git a/platform/src/infrastructure/commanders/engines/engines-set.command.ts b/platform/src/infrastructure/commanders/engines/engines-set.command.ts new file mode 100644 index 000000000..655c7d614 --- /dev/null +++ b/platform/src/infrastructure/commanders/engines/engines-set.command.ts @@ -0,0 +1,28 @@ +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { BaseCommand } from '../base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@SubCommand({ + name: ' set ', + description: 'Update an engine configurations', + argsDescription: { + name: 'Engine name to update', + }, +}) +@SetCommandContext() +export class EnginesSetCommand extends BaseCommand { + constructor(readonly cortexUsecases: CortexUsecases) { + super(cortexUsecases); + } + + async runCommand(passedParams: string[]): Promise { + const engineName = passedParams[0]; + const config = passedParams[1]; + const value = passedParams[2]; + return this.cortex.engines + .update(engineName, { config, value }) + .then(() => console.log('Update engine successfully')) + .catch((error) => console.error(error.message ?? error)); + } +} diff --git a/platform/src/infrastructure/commanders/models.command.ts b/platform/src/infrastructure/commanders/models.command.ts new file mode 100644 index 000000000..d66dd0989 --- /dev/null +++ b/platform/src/infrastructure/commanders/models.command.ts @@ -0,0 +1,87 @@ +import { SubCommand } from 'nest-commander'; +import { ModelStartCommand } from './models/model-start.command'; +import { ModelGetCommand } from './models/model-get.command'; +import { ModelListCommand } from './models/model-list.command'; +import { ModelStopCommand } from './models/model-stop.command'; +import { ModelRemoveCommand } from './models/model-remove.command'; +import { ModelUpdateCommand } from './models/model-update.command'; +import { BaseCommand } from './base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { ModuleRef } from '@nestjs/core'; +import { ModelPullCommand } from './models/model-pull.command'; + +@SubCommand({ + name: 'models', + subCommands: [ + ModelStartCommand, + ModelStopCommand, + ModelListCommand, + ModelGetCommand, + ModelRemoveCommand, + ModelUpdateCommand, + ], + description: 'Subcommands for managing models', +}) +export class ModelsCommand extends BaseCommand { + constructor( + private readonly moduleRef: ModuleRef, + readonly cortexUsecases: CortexUsecases, + ) { + super(cortexUsecases); + } + + commandMap: { [key: string]: any } = { + pull: ModelPullCommand, + start: ModelStartCommand, + stop: ModelStopCommand, + list: ModelListCommand, + get: ModelGetCommand, + remove: ModelRemoveCommand, + update: ModelUpdateCommand, + }; + async runCommand(passedParam: string[], options: any) { + const [parameter, command, ...restParams] = passedParam; + if (command !== 'list' && !parameter) { + console.error('Model id is required.'); + return; + } + + // Handle the commands accordingly + const commandClass = this.commandMap[command as string]; + if (!commandClass) { + this.command?.help(); + return; + } + const modelId = parameter; + await this.runModelCommand( + commandClass, + [modelId, ...(restParams || [])], + options, + ); + } + + private async runModelCommand( + commandClass: any, + params: string[] = [], + options?: any, + ) { + const commandInstance = this.moduleRef.get(commandClass, { strict: false }); + if (commandInstance) { + commandInstance.setCortex(this.cortex); + await commandInstance.runCommand(params, options); + } else { + console.error('Command not found.'); + } + } + + help() { + console.log('Usage: cortex models [subcommand]'); + console.log('Commands:'); + console.log(' start - Start a model by ID'); + console.log(' stop - Stop a model by ID'); + console.log(' list - List all available models'); + console.log(' get - Get model details by ID'); + console.log(' remove - Remove a model by ID'); + console.log(' update - Update a model by ID'); + } +} diff --git a/platform/src/infrastructure/commanders/models/model-get.command.ts b/platform/src/infrastructure/commanders/models/model-get.command.ts new file mode 100644 index 000000000..3bc4e8fa5 --- /dev/null +++ b/platform/src/infrastructure/commanders/models/model-get.command.ts @@ -0,0 +1,29 @@ +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { BaseCommand } from '../base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@SubCommand({ + name: 'get', + description: 'Get a model by ID.', + arguments: '', + argsDescription: { + model_id: 'Model ID to get information about.', + }, +}) +@SetCommandContext() +export class ModelGetCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUseCases: CortexUsecases, + ) { + super(cortexUseCases); + } + + async runCommand(passedParams: string[]): Promise { + const model = await this.cortex.models.retrieve(passedParams[0]); + if (!model) console.error('Model not found'); + else console.log(model); + } +} diff --git a/platform/src/infrastructure/commanders/models/model-list.command.ts b/platform/src/infrastructure/commanders/models/model-list.command.ts new file mode 100644 index 000000000..9e169733a --- /dev/null +++ b/platform/src/infrastructure/commanders/models/model-list.command.ts @@ -0,0 +1,44 @@ +import { SubCommand, Option } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { BaseCommand } from '../base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +interface ModelListOptions { + format: 'table' | 'json'; +} +@SubCommand({ name: 'list', description: 'List all models locally.' }) +@SetCommandContext() +export class ModelListCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUseCases: CortexUsecases, + ) { + super(cortexUseCases); + } + + async runCommand( + passedParams: string[], + option: ModelListOptions, + ): Promise { + const { data: models } = await this.cortex.models.list(); + option.format === 'table' + ? console.table( + models.map((e) => ({ + id: e.id, + engine: e.engine, + version: e.version, + })), + ) + : console.log(models); + } + + @Option({ + flags: '-f, --format ', + defaultValue: 'table', + description: 'Print models list in table or json format', + }) + parseModelId(value: string) { + return value; + } +} diff --git a/platform/src/infrastructure/commanders/models/model-pull.command.ts b/platform/src/infrastructure/commanders/models/model-pull.command.ts new file mode 100644 index 000000000..179484845 --- /dev/null +++ b/platform/src/infrastructure/commanders/models/model-pull.command.ts @@ -0,0 +1,93 @@ +import { exit } from 'node:process'; +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; +import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; +import { + EventName, + TelemetrySource, +} from '@/domain/telemetry/telemetry.interface'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { existsSync } from 'fs'; +import { join } from 'node:path'; +import { checkModelCompatibility } from '@/utils/model-check'; +import { Engines } from '../types/engine.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from '../base.command'; +import { downloadProgress } from '@/utils/download-progress'; +import { DownloadType } from '@/domain/models/download.interface'; +import ora from 'ora'; +import { isRemoteEngine } from '@/utils/normalize-model-id'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +@SubCommand({ + name: 'pull', + aliases: ['download'], + arguments: '', + argsDescription: { model_id: 'Model repo to pull' }, + description: + 'Download a model from a registry. Working with HuggingFace repositories. For available models, please visit https://huggingface.co/cortexso', +}) +@SetCommandContext() +export class ModelPullCommand extends BaseCommand { + constructor( + private readonly telemetryUsecases: TelemetryUsecases, + readonly contextService: ContextService, + readonly cortexUsecases: CortexUsecases, + ) { + super(cortexUsecases); + } + + async runCommand(passedParams: string[]) { + if (passedParams.length < 1) { + console.error('Model Id is required'); + exit(1); + } + const modelId = passedParams[0]; + + await checkModelCompatibility(modelId); + if (await this.cortex.models.retrieve(modelId)) { + console.error('Model already exists.'); + exit(1); + } + + console.log('Downloading model...'); + await this.cortex.models.download(modelId).catch((e: Error) => { + if (e instanceof ModelNotFoundException) + console.error('Model does not exist.'); + else console.error(e.message ?? e); + exit(1); + }); + + await downloadProgress(this.cortex, modelId); + ora().succeed('Model downloaded'); + + const existingModel = await this.cortex.models.retrieve(modelId); + const engine = existingModel?.engine || Engines.llamaCPP; + + // Pull engine if not exist + if ( + !isRemoteEngine(engine) && + !existsSync( + join(await fileManagerService.getCortexCppEnginePath(), engine), + ) + ) { + console.log('\n'); + console.log('Downloading engine...'); + await this.cortex.engines.init(engine); + await downloadProgress(this.cortex, undefined, DownloadType.Engine); + } + this.telemetryUsecases.sendEvent( + [ + { + name: EventName.DOWNLOAD_MODEL, + modelId: passedParams[0], + }, + ], + TelemetrySource.CLI, + ); + console.log('\nDownload complete!'); + + exit(0); + } +} diff --git a/platform/src/infrastructure/commanders/models/model-remove.command.ts b/platform/src/infrastructure/commanders/models/model-remove.command.ts new file mode 100644 index 000000000..52e8aefdc --- /dev/null +++ b/platform/src/infrastructure/commanders/models/model-remove.command.ts @@ -0,0 +1,27 @@ +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from '../base.command'; + +@SubCommand({ + name: 'remove', + description: 'Remove a model by ID locally.', + arguments: '', + argsDescription: { + model_id: 'Model to remove', + }, +}) +@SetCommandContext() +export class ModelRemoveCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUseCases: CortexUsecases, + ) { + super(cortexUseCases); + } + + async runCommand(passedParams: string[]): Promise { + await this.cortex.models.del(passedParams[0]).then(console.log); + } +} diff --git a/platform/src/infrastructure/commanders/models/model-start.command.ts b/platform/src/infrastructure/commanders/models/model-start.command.ts new file mode 100644 index 000000000..f345287c6 --- /dev/null +++ b/platform/src/infrastructure/commanders/models/model-start.command.ts @@ -0,0 +1,115 @@ +import { SubCommand, Option, InquirerService } from 'nest-commander'; +import ora from 'ora'; +import { exit } from 'node:process'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { existsSync } from 'node:fs'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { join } from 'node:path'; +import { Engines } from '../types/engine.interface'; +import { checkModelCompatibility } from '@/utils/model-check'; +import { BaseCommand } from '../base.command'; +import { isRemoteEngine } from '@/utils/normalize-model-id'; +import { downloadProgress } from '@/utils/download-progress'; +import { DownloadType } from '@/domain/models/download.interface'; +import { printLastErrorLines } from '@/utils/logs'; + +type ModelStartOptions = { + preset?: string; +}; + +@SubCommand({ + name: 'start', + description: 'Start a model by ID.', + arguments: '[model_id]', + argsDescription: { + model_id: + 'Model ID to start. If there is no model ID, it will prompt you to select from the available models.', + }, +}) +@SetCommandContext() +export class ModelStartCommand extends BaseCommand { + constructor( + private readonly inquirerService: InquirerService, + readonly cortexUsecases: CortexUsecases, + readonly contextService: ContextService, + ) { + super(cortexUsecases); + } + + async runCommand( + passedParams: string[], + options: ModelStartOptions, + ): Promise { + let modelId = passedParams[0]; + const checkingSpinner = ora('Checking model...').start(); + if (!modelId) { + try { + modelId = await this.modelInquiry(); + } catch { + checkingSpinner.fail('Model ID is required'); + exit(1); + } + } + + const existingModel = await this.cortex.models.retrieve(modelId); + if (!existingModel) { + checkingSpinner.fail( + `Model ${modelId} not found on filesystem.\nPlease try 'cortex pull ${modelId}' first.`, + ); + process.exit(1); + } + + await checkModelCompatibility(modelId); + checkingSpinner.succeed('Model found'); + + const engine = existingModel.engine || Engines.llamaCPP; + // Pull engine if not exist + if ( + !isRemoteEngine(engine) && + !existsSync( + join(await fileManagerService.getCortexCppEnginePath(), engine), + ) + ) { + console.log('Downloading engine...'); + await this.cortex.engines.init(engine); + await downloadProgress(this.cortex, undefined, DownloadType.Engine); + } + + const parsedPreset = await fileManagerService.getPreset(options.preset); + + const startingSpinner = ora('Loading model...').start(); + + await this.cortex.models + .start(modelId, parsedPreset) + .then(() => startingSpinner.succeed('Model loaded')) + .catch(async (error) => { + startingSpinner.fail(error.message ?? error); + printLastErrorLines(await fileManagerService.getLogPath()); + }); + } + + modelInquiry = async () => { + const { data: models } = await this.cortex.models.list(); + if (!models.length) throw 'No models found'; + const { model } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'model', + message: 'Select a model to start:', + choices: models.map((e) => ({ + name: e.id, + value: e.id, + })), + }); + return model; + }; + + @Option({ + flags: '-p, --preset ', + description: 'Apply a chat preset to the chat session', + }) + parseTemplate(value: string) { + return value; + } +} diff --git a/platform/src/infrastructure/commanders/models/model-stop.command.ts b/platform/src/infrastructure/commanders/models/model-stop.command.ts new file mode 100644 index 000000000..2eb65666a --- /dev/null +++ b/platform/src/infrastructure/commanders/models/model-stop.command.ts @@ -0,0 +1,34 @@ +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { BaseCommand } from '../base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import ora from 'ora'; + +@SubCommand({ + name: 'stop', + description: 'Stop a model by ID.', + arguments: '', + argsDescription: { + model_id: 'Model ID to stop.', + }, +}) +@SetCommandContext() +export class ModelStopCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUseCases: CortexUsecases, + ) { + super(cortexUseCases); + } + + async runCommand(passedParams: string[]): Promise { + const loadingSpinner = ora('Unloading model...').start(); + await this.cortex.models + .stop(passedParams[0]) + .then(() => loadingSpinner.succeed('Model unloaded')) + .catch((e) => + loadingSpinner.fail(`Failed to unload model: ${e.message ?? e}`), + ); + } +} diff --git a/platform/src/infrastructure/commanders/models/model-update.command.ts b/platform/src/infrastructure/commanders/models/model-update.command.ts new file mode 100644 index 000000000..a9d5f84ed --- /dev/null +++ b/platform/src/infrastructure/commanders/models/model-update.command.ts @@ -0,0 +1,74 @@ +import { SubCommand, Option } from 'nest-commander'; +import { exit } from 'node:process'; +import { SetCommandContext } from '../decorators/CommandContext'; +import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { BaseCommand } from '../base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +type UpdateOptions = { + model?: string; + options?: string[]; +}; + +@SubCommand({ + name: 'update', + description: 'Update configuration of a model.', + arguments: '', + argsDescription: { + model_id: 'Model ID to update configuration.', + }, +}) +@SetCommandContext() +export class ModelUpdateCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUseCases: CortexUsecases, + ) { + super(cortexUseCases); + } + + async runCommand( + passedParams: string[], + option: UpdateOptions, + ): Promise { + const modelId = option.model; + if (!modelId) { + console.error('Model Id is required'); + exit(1); + } + + const options = option.options; + if (!options || options.length === 0) { + console.log('Nothing to update'); + exit(0); + } + + const toUpdate: UpdateModelDto = {}; + + options.forEach((option) => { + const [key, stringValue] = option.split('='); + Object.assign(toUpdate, { key, stringValue }); + }); + this.cortex.models.update(modelId, toUpdate as any); + } + + @Option({ + flags: '-m, --model ', + required: true, + description: 'Model Id to update', + }) + parseModelId(value: string) { + return value; + } + + @Option({ + flags: '-c, --options ', + description: + 'Specify the options to update the model. Syntax: -c option1=value1 option2=value2. For example: cortex models update -c max_tokens=100 temperature=0.5', + }) + parseOptions(option: string, optionsAccumulator: string[] = []): string[] { + optionsAccumulator.push(option); + return optionsAccumulator; + } +} diff --git a/platform/src/infrastructure/commanders/presets.command.ts b/platform/src/infrastructure/commanders/presets.command.ts new file mode 100644 index 000000000..ba1ea5057 --- /dev/null +++ b/platform/src/infrastructure/commanders/presets.command.ts @@ -0,0 +1,31 @@ +import { readdirSync } from 'fs'; +import { SubCommand } from 'nest-commander'; +import { join } from 'path'; +import { SetCommandContext } from './decorators/CommandContext'; +import { ContextService } from '../services/context/context.service'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from './base.command'; +import { fileManagerService } from '../services/file-manager/file-manager.service'; + +@SubCommand({ + name: 'presets', + description: 'Show all available presets', +}) +@SetCommandContext() +export class PresetCommand extends BaseCommand { + constructor( + readonly contextService: ContextService, + readonly cortexUsecases: CortexUsecases, + ) { + super(cortexUsecases); + } + async runCommand(): Promise { + return console.table( + readdirSync( + join(await fileManagerService.getDataFolderPath(), `presets`), + ).map((e) => ({ + preset: e.replace('.yaml', ''), + })), + ); + } +} diff --git a/platform/src/infrastructure/commanders/ps.command.ts b/platform/src/infrastructure/commanders/ps.command.ts new file mode 100644 index 000000000..f2268cdd6 --- /dev/null +++ b/platform/src/infrastructure/commanders/ps.command.ts @@ -0,0 +1,146 @@ +import ora from 'ora'; +import systeminformation from 'systeminformation'; +import { SubCommand } from 'nest-commander'; +import { SetCommandContext } from './decorators/CommandContext'; +import { ModelStat } from './types/model-stat.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { BaseCommand } from './base.command'; +import { HttpStatus } from '@nestjs/common'; +import { Engines } from './types/engine.interface'; +import { ModelStatResponse } from '../providers/cortex/cortex.provider'; +import { firstValueFrom } from 'rxjs'; +import { HttpService } from '@nestjs/axios'; +import { CORTEX_CPP_MODELS_URL } from '../constants/cortex'; +import { fileManagerService } from '../services/file-manager/file-manager.service'; +import { getMemoryInformation } from '@/utils/system-resource'; + +@SubCommand({ + name: 'ps', + description: 'Show running models and their status', +}) +@SetCommandContext() +export class PSCommand extends BaseCommand { + constructor( + readonly cortexUsecases: CortexUsecases, + private readonly httpService: HttpService, + ) { + super(cortexUsecases); + } + async runCommand(): Promise { + const runningSpinner = ora('Running PS command...').start(); + return this.getModels() + .then((models: ModelStat[]) => { + runningSpinner.succeed(); + if (models.length) console.table(models); + else console.log('No models running'); + }) + .then(async () => { + const cpuUsage = ( + await systeminformation.currentLoad() + ).currentLoad.toFixed(2); + const gpusLoad = []; + const gpus = await systeminformation.graphics(); + for (const gpu of gpus.controllers) { + const totalVram = gpu.vram || 0; + gpusLoad.push({ + totalVram, + }); + } + const { total, used } = await getMemoryInformation() + const memoryUsage = ( + (used / total) * + 100 + ).toFixed(2); + const consumedTable = { + 'CPU Usage': `${cpuUsage}%`, + 'Memory Usage': `${memoryUsage}%`, + } as { + 'CPU Usage': string; + 'Memory Usage': string; + VRAM?: string; + }; + + if ( + gpusLoad.length > 0 && + gpusLoad.filter((gpu) => gpu.totalVram > 0).length > 0 + ) { + consumedTable['VRAM'] = gpusLoad + .map((gpu) => `${gpu.totalVram} MB`) + .join(', '); + } + console.table([consumedTable]); + }); + } + + /** + * Get models running in the Cortex C++ server + */ + async getModels(): Promise { + const configs = await fileManagerService.getConfig(); + const runningSpinner = ora('Getting models...').start(); + return new Promise((resolve, reject) => + firstValueFrom( + this.httpService.get( + CORTEX_CPP_MODELS_URL(configs.cortexCppHost, configs.cortexCppPort), + ), + ) + .then((res) => { + const data = res.data as ModelStatResponse; + if ( + res.status === HttpStatus.OK && + data && + Array.isArray(data.data) && + data.data.length > 0 + ) { + runningSpinner.succeed(); + resolve( + data.data.map((e) => { + const startTime = e.start_time ?? new Date(); + const currentTime = new Date(); + const duration = + currentTime.getTime() - new Date(startTime).getTime(); + return { + modelId: e.id, + engine: e.engine ?? Engines.llamaCPP, + status: 'running', + duration: this.formatDuration(duration), + ram: e.ram ?? '-', + vram: e.vram ?? '-', + }; + }), + ); + } else reject(); + }) + .catch(reject), + ).catch(() => { + runningSpinner.succeed(''); + return []; + }); + } + + private formatDuration(milliseconds: number): string { + const days = Math.floor(milliseconds / (1000 * 60 * 60 * 24)); + const hours = Math.floor( + (milliseconds % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60), + ); + const minutes = Math.floor((milliseconds % (1000 * 60 * 60)) / (1000 * 60)); + const seconds = Math.floor((milliseconds % (1000 * 60)) / 1000); + + let formattedDuration = ''; + + if (days > 0) { + formattedDuration += `${days}d `; + } + if (hours > 0) { + formattedDuration += `${hours}h `; + } + if (minutes > 0) { + formattedDuration += `${minutes}m `; + } + if (seconds > 0) { + formattedDuration += `${seconds}s `; + } + + return formattedDuration.trim(); + } +} diff --git a/platform/src/infrastructure/commanders/questions/init.questions.ts b/platform/src/infrastructure/commanders/questions/init.questions.ts new file mode 100644 index 000000000..ee4675320 --- /dev/null +++ b/platform/src/infrastructure/commanders/questions/init.questions.ts @@ -0,0 +1,40 @@ +import { Question, QuestionSet } from 'nest-commander'; +import { platform } from 'node:process'; + +@QuestionSet({ name: 'init-run-mode-questions' }) +export class InitRunModeQuestions { + @Question({ + type: 'list', + message: 'Select run mode', + name: 'runMode', + default: 'CPU', + choices: ['CPU', 'GPU'], + when: () => platform !== 'darwin', + }) + parseRunMode(val: string) { + return val; + } + + @Question({ + type: 'list', + message: 'Select GPU type', + name: 'gpuType', + default: 'Nvidia', + choices: ['Nvidia', 'Others (Vulkan)'], + when: (answers: any) => answers.runMode === 'GPU', + }) + parseGPUType(val: string) { + return val; + } + + @Question({ + type: 'list', + message: 'Select CPU instructions set', + name: 'instructions', + choices: ['AVX2', 'AVX', 'AVX512'], + when: () => platform !== 'darwin', + }) + parseContent(val: string) { + return val; + } +} diff --git a/platform/src/infrastructure/commanders/run.command.ts b/platform/src/infrastructure/commanders/run.command.ts new file mode 100644 index 000000000..3d30b6a16 --- /dev/null +++ b/platform/src/infrastructure/commanders/run.command.ts @@ -0,0 +1,170 @@ +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { SubCommand, Option, InquirerService } from 'nest-commander'; +import { exit } from 'node:process'; +import ora from 'ora'; +import { existsSync } from 'fs'; +import { join } from 'path'; +import { Engines } from './types/engine.interface'; +import { + checkModelCompatibility, + checkRequiredVersion, +} from '@/utils/model-check'; +import { BaseCommand } from './base.command'; +import { isRemoteEngine } from '@/utils/normalize-model-id'; +import { ChatClient } from './services/chat-client'; +import { downloadProgress } from '@/utils/download-progress'; +import { DownloadType } from '@/domain/models/download.interface'; +import { isLocalFile } from '@/utils/urls'; +import { parse } from 'node:path'; +import { printLastErrorLines } from '@/utils/logs'; +import { fileManagerService } from '../services/file-manager/file-manager.service'; + +type RunOptions = { + threadId?: string; + preset?: string; + chat?: boolean; +}; + +@SubCommand({ + name: 'run', + arguments: '[model_id]', + argsDescription: { + model_id: + 'Model to run. If the model is not available, it will attempt to pull.', + }, + description: 'Shortcut to start a model and chat', +}) +export class RunCommand extends BaseCommand { + chatClient: ChatClient; + constructor( + protected readonly cortexUsecases: CortexUsecases, + private readonly inquirerService: InquirerService, + ) { + super(cortexUsecases); + } + + async runCommand(passedParams: string[], options: RunOptions): Promise { + this.chatClient = new ChatClient(this.cortex); + let modelId = passedParams[0]; + const checkingSpinner = ora('Checking model...').start(); + if (!modelId) { + try { + modelId = await this.modelInquiry(); + } catch { + checkingSpinner.fail('Model ID is required'); + exit(1); + } + } + + // Check model compatibility on this machine + await checkModelCompatibility(modelId, checkingSpinner); + + let existingModel = await this.cortex.models.retrieve(modelId); + // If not exist + // Try Pull + if (!existingModel) { + checkingSpinner.succeed(); + + console.log('Downloading model...'); + await this.cortex.models.download(modelId).catch((e: Error) => { + checkingSpinner.fail(e.message ?? e); + exit(1); + }); + await downloadProgress(this.cortex, modelId); + checkingSpinner.succeed('Model downloaded'); + + // Update to persisted modelId + // TODO: Should be retrieved from the request + if (isLocalFile(modelId)) { + modelId = parse(modelId).name; + } + + // Second check if model is available + existingModel = await this.cortex.models.retrieve(modelId); + if (!existingModel) { + checkingSpinner.fail(`Model is not available`); + process.exit(1); + } + } else { + checkingSpinner.succeed('Model found'); + } + + const engine = existingModel.engine || Engines.llamaCPP; + // Pull engine if not exist + if ( + !isRemoteEngine(engine) && + !existsSync( + join(await fileManagerService.getCortexCppEnginePath(), engine), + ) + ) { + console.log('Downloading engine...'); + await this.cortex.engines.init(engine); + await downloadProgress(this.cortex, undefined, DownloadType.Engine); + } + const { version: engineVersion } = + await this.cortex.engines.retrieve(engine); + if ( + existingModel.engine_version && + !checkRequiredVersion(existingModel.engine_version, engineVersion) + ) { + console.log( + `Model engine version ${existingModel.engine_version} is not compatible with engine version ${engineVersion}`, + ); + process.exit(1); + } + + const startingSpinner = ora('Loading model...').start(); + + return this.cortex.models + .start(modelId, await fileManagerService.getPreset(options.preset)) + .then(() => { + startingSpinner.succeed('Model loaded'); + if (options.chat) this.chatClient.chat(modelId, options.threadId); + else console.log("To start a chat session, use the '--chat' flag"); + }) + .catch(async (e) => { + startingSpinner.fail(e.message ?? e); + + printLastErrorLines(await fileManagerService.getLogPath()); + }); + } + + @Option({ + flags: '-t, --thread ', + description: 'Thread Id. If not provided, will create new thread', + }) + parseThreadId(value: string) { + return value; + } + + @Option({ + flags: '-p, --preset ', + description: 'Apply a chat preset to the chat session', + }) + parseTemplate(value: string) { + return value; + } + + @Option({ + flags: '-c, --chat', + description: 'Start a chat session after starting the model', + }) + parseChat() { + return true; + } + + modelInquiry = async () => { + const { data: models } = await this.cortex.models.list(); + if (!models.length) throw 'No models found'; + const { model } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'model', + message: 'Select a model to start:', + choices: models.map((e) => ({ + name: e.id, + value: e.id, + })), + }); + return model; + }; +} diff --git a/platform/src/infrastructure/commanders/serve-stop.command.ts b/platform/src/infrastructure/commanders/serve-stop.command.ts new file mode 100644 index 000000000..ea9039a4d --- /dev/null +++ b/platform/src/infrastructure/commanders/serve-stop.command.ts @@ -0,0 +1,18 @@ +import { SubCommand } from 'nest-commander'; +import { BaseCommand } from './base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@SubCommand({ + name: 'stop', + description: 'Stop the API server', +}) +export class ServeStopCommand extends BaseCommand { + constructor(private readonly cortexUsecases: CortexUsecases) { + super(cortexUsecases); + } + async runCommand(): Promise { + return this.cortexUsecases + .stopApiServer() + .then(() => console.log('API server stopped')); + } +} diff --git a/platform/src/infrastructure/commanders/services/chat-client.ts b/platform/src/infrastructure/commanders/services/chat-client.ts new file mode 100644 index 000000000..1f9105a5f --- /dev/null +++ b/platform/src/infrastructure/commanders/services/chat-client.ts @@ -0,0 +1,193 @@ +import { exit, stdin, stdout } from 'node:process'; +import * as readline from 'node:readline/promises'; +import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; +import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto'; +import { CreateThreadAssistantDto } from '@/infrastructure/dtos/threads/create-thread-assistant.dto'; +import { ModelParameterParser } from '@/utils/model-parameter.parser'; +import { TextContentBlock } from '@/domain/models/message.interface'; +import { Cortex } from '@cortexso/cortex.js'; + +export class ChatClient { + private exitClause = 'exit()'; + private userIndicator = '>> '; + private exitMessage = 'Bye!'; + private cortex: Cortex; + constructor(readonly cortexInstance: Cortex) { + this.cortex = cortexInstance; + } + + async chat( + modelId: string, + threadId?: string, + message?: string, + settings?: Partial, + ): Promise { + console.log(`In order to exit, type '${this.exitClause}'.`); + const thread = await this.getOrCreateNewThread(modelId, threadId); + const messages: ChatCompletionMessage[] = ( + await this.cortex.beta.threads.messages.list(thread.id, { + limit: 10, + order: 'desc', + }) + ).data.map((message) => ({ + content: (message.content[0] as TextContentBlock).text.value, + role: message.role, + })); + + const rl = readline.createInterface({ + input: stdin, + output: stdout, + prompt: this.userIndicator, + }); + + if (message) + this.sendCompletionMessage( + message, + messages, + modelId, + thread.id, + rl, + settings, + ); + rl.prompt(); + + rl.on('close', async () => { + console.log(this.exitMessage); + exit(0); + }); + + rl.on('line', (input) => + this.sendCompletionMessage( + input, + messages, + modelId, + thread.id, + rl, + settings, + ), + ); + } + + async sendCompletionMessage( + userInput: string, + messages: Cortex.ChatCompletionMessageParam[], + modelId: string, + threadId: string, + rl: readline.Interface, + settings: Partial = {}, + ) { + if (!userInput || userInput.trim() === '') return; + + if (userInput.trim() === this.exitClause) { + rl.close(); + return; + } + + if ( + 'pre_prompt' in settings && + !messages.some((m) => m.role === 'system') + ) { + messages = [ + { + content: settings.pre_prompt as string, + role: 'system', + }, + ...messages, + ]; + } + + const model = await this.cortex.models.retrieve(modelId); + + messages.push({ + content: userInput, + role: 'user', + }); + + const createMessageDto: Cortex.Beta.Threads.Messages.MessageCreateParams = { + role: 'user', + content: userInput, + }; + this.cortex.beta.threads.messages.create(threadId, createMessageDto); + + const parser = new ModelParameterParser(); + const chatDto: Cortex.ChatCompletionCreateParamsStreaming = { + // Default results params + messages, + model: modelId, + max_tokens: 4098, + temperature: 0.7, + ...settings, + // Override with model inference params + ...parser.parseModelInferenceParams(model), + stream: true, + }; + + try { + const stream = await this.cortex.chat.completions.create({ + ...chatDto, + }); + + let assistantResponse = ''; + for await (const part of stream) { + const output = part.choices[0]?.delta?.content || ''; + assistantResponse += output; + stdout.write(output); + } + + messages.push({ + content: assistantResponse, + role: 'assistant', + }); + + this.cortex.beta.threads.messages + .create(threadId, { + role: 'assistant', + content: assistantResponse, + }) + .then(() => { + assistantResponse = ''; + console.log('\n'); + rl.prompt(); + }); + } catch (e) { + stdout.write( + `Something went wrong! Please check model status.\n${e.message}\n`, + ); + rl.prompt(); + } + } + + // Get or create a new thread + private async getOrCreateNewThread( + modelId: string, + threadId?: string, + ): Promise { + if (threadId) { + const thread = await this.cortex.beta.threads.retrieve(threadId); + if (!thread) throw new Error(`Cannot find thread with id: ${threadId}`); + return thread; + } + + const model = await this.cortex.models.retrieve(modelId); + if (!model) throw new Error(`Cannot find model with id: ${modelId}`); + + const assistantDto: CreateThreadAssistantDto = { + avatar: '', + id: 'jan', + object: 'assistant', + created_at: Date.now(), + name: 'Jan', + description: 'A default assistant that can use all downloaded models', + model: modelId, + instructions: '', + tools: [], + metadata: {}, + }; + + const createThreadDto: CreateThreadDto = { + assistants: [assistantDto], + }; + + return this.cortex.beta.threads.create(createThreadDto as any); + } +} diff --git a/platform/src/infrastructure/commanders/services/cortex.client.module.ts b/platform/src/infrastructure/commanders/services/cortex.client.module.ts new file mode 100644 index 000000000..8a363ee27 --- /dev/null +++ b/platform/src/infrastructure/commanders/services/cortex.client.module.ts @@ -0,0 +1,9 @@ +import { Module } from '@nestjs/common'; +import { CortexClient } from './cortex.client'; + +@Module({ + imports: [], + providers: [CortexClient], + exports: [CortexClient], +}) +export class CortexClientModule {} diff --git a/platform/src/infrastructure/commanders/services/cortex.client.ts b/platform/src/infrastructure/commanders/services/cortex.client.ts new file mode 100644 index 000000000..ce921a1f0 --- /dev/null +++ b/platform/src/infrastructure/commanders/services/cortex.client.ts @@ -0,0 +1,19 @@ +import { + cortexNamespace, + cortexServerAPI, +} from '@/infrastructure/constants/cortex'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import Cortex from '@cortexso/cortex.js'; + +export class CortexClient extends Cortex { + serverConfigs: { host: string; port: number }; + + constructor() { + const configs = fileManagerService.getServerConfig(); + super({ + baseURL: cortexServerAPI(configs.host, configs.port), + apiKey: cortexNamespace, + }); + this.serverConfigs = configs; + } +} diff --git a/platform/src/infrastructure/commanders/telemetry.command.ts b/platform/src/infrastructure/commanders/telemetry.command.ts new file mode 100644 index 000000000..b4f3b3ec5 --- /dev/null +++ b/platform/src/infrastructure/commanders/telemetry.command.ts @@ -0,0 +1,44 @@ +import { SubCommand, Option } from 'nest-commander'; +import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; +import { TelemetryOptions } from './types/telemetry-options.interface'; +import { SetCommandContext } from './decorators/CommandContext'; +import { BaseCommand } from './base.command'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@SubCommand({ + name: 'telemetry', + description: 'Get telemetry logs', +}) +@SetCommandContext() +export class TelemetryCommand extends BaseCommand { + constructor( + private readonly telemetryUseCase: TelemetryUsecases, + readonly cortexUseCases: CortexUsecases, + ) { + super(cortexUseCases); + } + + async runCommand( + _input: string[], + options?: TelemetryOptions, + ): Promise { + if (options?.type === 'crash') { + try { + await this.telemetryUseCase.readCrashReports((telemetryEvent) => { + console.log(JSON.stringify(telemetryEvent, null, 2)); + }); + } catch (e) { + console.error('Error reading crash reports', e); + } + } + } + @Option({ + flags: '-t, --type', + description: 'Type of telemetry', + defaultValue: 'crash', + choices: ['crash'], + }) + parseType(value: string) { + return value; + } +} diff --git a/platform/src/infrastructure/commanders/test/helpers.command.spec.ts b/platform/src/infrastructure/commanders/test/helpers.command.spec.ts new file mode 100644 index 000000000..83259eb8b --- /dev/null +++ b/platform/src/infrastructure/commanders/test/helpers.command.spec.ts @@ -0,0 +1,146 @@ +// import { TestingModule } from '@nestjs/testing'; +// import { spy, Stub, stubMethod } from 'hanbi'; +// import { CommandTestFactory } from 'nest-commander-testing'; +// import { CommandModule } from '@/command.module'; +// import { LogService } from '@/infrastructure/commanders/test/log.service'; +// import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +// import { join } from 'path'; +// import { rmSync } from 'fs'; +// import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +// let commandInstance: TestingModule, +// exitSpy: Stub, +// stdoutSpy: Stub, +// stderrSpy: Stub; +export const timeout = 500000; + +// beforeAll( +// () => +// new Promise(async (res) => { +// stubMethod(process.stderr, 'write'); +// exitSpy = stubMethod(process, 'exit'); +// stdoutSpy = stubMethod(process.stdout, 'write'); +// stderrSpy = stubMethod(process.stderr, 'write'); +// commandInstance = await CommandTestFactory.createTestingCommand({ +// imports: [CommandModule], +// }) +// .overrideProvider(LogService) +// .useValue({ log: spy().handler }) +// .compile(); + +// const fileService = +// await commandInstance.resolve(FileManagerService); + +// // Attempt to create test folder +// await fileService.writeConfigFile({ +// dataFolderPath: join(__dirname, 'test_data'), +// cortexCppHost: 'localhost', +// cortexCppPort: 3929, +// }); +// const cortexUseCases = +// await commandInstance.resolve(CortexUsecases); +// jest +// .spyOn(cortexUseCases, 'isAPIServerOnline') +// .mockImplementation(() => Promise.resolve(true)); +// res(); +// }), +// ); + +// afterEach(() => { +// stdoutSpy.reset(); +// stderrSpy.reset(); +// exitSpy.reset(); +// }); + +// afterAll( +// () => +// new Promise(async (res) => { +// // Attempt to clean test folder +// rmSync(join(__dirname, 'test_data'), { +// recursive: true, +// force: true, +// }); +// res(); +// }), +// ); + +describe('Helper commands', () => { + test( + 'Init with hardware auto detection', + async () => { + // // await CommandTestFactory.run(commandInstance, ['init', '-s']); + // // + // // // Wait for a brief period to allow the command to execute + // // await new Promise((resolve) => setTimeout(resolve, 1000)); + // // + // // expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0); + }, + timeout, + ); + + // // test('Chat with option -m', async () => { + // // const logMock = stubMethod(console, 'log'); + // // + // // await CommandTestFactory.run(commandInstance, [ + // // 'chat', + // // // '-m', + // // // 'hello', + // // // '>output.txt', + // // ]); + // // expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'."); + // // // expect(exitSpy.callCount).toBe(1); + // // // expect(exitSpy.firstCall?.args[0]).toBe(1); +}); + +// test( +// 'Show stop running models', +// async () => { +// // const tableMock = stubMethod(console, 'table'); +// // const logMock = stubMethod(console, 'log'); +// // await CommandTestFactory.run(commandInstance, ['stop']); +// // await CommandTestFactory.run(commandInstance, ['ps']); +// // expect(logMock.firstCall?.args[0]).toEqual('API server stopped'); +// // expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array); +// // expect(tableMock.firstCall?.args[0].length).toEqual(0); +// }, +// timeout, +// ); + +// test('Help command return guideline to users', async () => { +// // await CommandTestFactory.run(commandInstance, ['-h']); +// // expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array); +// // expect(stdoutSpy.firstCall?.args.length).toBe(1); +// // expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command'); +// // expect(exitSpy.callCount).toBeGreaterThan(1); +// // expect(exitSpy.firstCall?.args[0]).toBe(0); +// }); + +// test('Should handle missing command', async () => { +// // await CommandTestFactory.run(commandInstance, ['--unknown']); +// // expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option'); +// // expect(stderrSpy.firstCall?.args[0]).toContain('--unknown'); +// // expect(exitSpy.callCount).toBeGreaterThan(0); +// // expect(exitSpy.firstCall?.args[0]).toBe(1); +// }); + +// // test('Local API server via default host/port localhost:1337/api', async () => { +// // await CommandTestFactory.run(commandInstance, ['serve', '--detach']); +// // +// // await new Promise((resolve) => setTimeout(resolve, 2000)); +// // +// // expect(stdoutSpy.firstCall?.args[0]).toContain( +// // 'Started server at http://localhost:1337', +// // ); +// // // Add a delay +// // // Temporally disable for further investigation +// // return new Promise(async (resolve) => { +// // setTimeout(async () => { +// // // Send a request to the API server to check if it's running +// // const response = await axios.get('http://localhost:1337/api'); +// // expect(response.status).toBe(200); +// // resolve(); +// // }, 5000); +// // }); +// // }, 15000); +// }); diff --git a/platform/src/infrastructure/commanders/test/log.service.ts b/platform/src/infrastructure/commanders/test/log.service.ts new file mode 100644 index 000000000..1151f5fb5 --- /dev/null +++ b/platform/src/infrastructure/commanders/test/log.service.ts @@ -0,0 +1,8 @@ +import { Injectable } from '@nestjs/common'; + +@Injectable() +export class LogService { + log(...args: any[]): void { + console.log(...args); + } +} diff --git a/platform/src/infrastructure/commanders/test/models.command.spec.ts b/platform/src/infrastructure/commanders/test/models.command.spec.ts new file mode 100644 index 000000000..ca3932ecd --- /dev/null +++ b/platform/src/infrastructure/commanders/test/models.command.spec.ts @@ -0,0 +1,131 @@ +import { TestingModule } from '@nestjs/testing'; +import { CommandTestFactory } from 'nest-commander-testing'; +import { CommandModule } from '@/command.module'; +import { join } from 'path'; +import { rmSync } from 'fs'; +import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +let commandInstance: TestingModule; + +beforeAll( + () => + new Promise(async (res) => { + commandInstance = await CommandTestFactory.createTestingCommand({ + imports: [CommandModule], + }) + // .overrideProvider(LogService) + // .useValue({}) + .compile(); + const fileService = + await commandInstance.resolve(FileManagerService); + + // Attempt to create test folder + await fileService.writeConfigFile({ + dataFolderPath: join(__dirname, 'test_data'), + cortexCppHost: 'localhost', + cortexCppPort: 3929, + }); + const cortexUseCases = + await commandInstance.resolve(CortexUsecases); + jest + .spyOn(cortexUseCases, 'isAPIServerOnline') + .mockImplementation(() => Promise.resolve(true)); + res(); + }), +); + +afterAll( + () => + new Promise(async (res) => { + // Attempt to clean test folder + rmSync(join(__dirname, 'test_data'), { + recursive: true, + force: true, + }); + res(); + }), +); + +export const modelName = 'tinyllama'; +describe('Action with models', () => { + // test('Init with CPU', async () => { + // const logMock = stubMethod(console, 'log'); + // + // logMock.passThrough(); + // CommandTestFactory.setAnswers(['CPU', '', 'AVX2']); + // + // await CommandTestFactory.run(commandInstance, ['setup']); + // expect(logMock.firstCall?.args[0]).toContain('engine file'); + // }, 50000); + // + + test('Empty model list', async () => { + // const logMock = stubMethod(console, 'table'); + // await CommandTestFactory.run(commandInstance, ['models', 'list']); + // expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); + // expect(logMock.firstCall?.args[0].length).toBe(0); + }, 20000); + + // + // test( + // 'Pull model and check with cortex ps', + // async () => { + // const logMock = stubMethod(console, 'log'); + // + // await CommandTestFactory.run(commandInstance, ['pull', modelName]); + // expect(logMock.lastCall?.args[0]).toContain('Download complete!'); + // + // const tableMock = stubMethod(console, 'table'); + // await CommandTestFactory.run(commandInstance, ['ps']); + // expect(tableMock.firstCall?.args[0].length).toBeGreaterThan(0); + // }, + // timeout, + // ); + // + // test( + // 'Run model and check with cortex ps', + // async () => { + // const logMock = stubMethod(console, 'log'); + // + // await CommandTestFactory.run(commandInstance, ['run', modelName]); + // expect([ + // "Inorder to exit, type 'exit()'.", + // `Model ${modelName} not found. Try pulling model...`, + // ]).toContain(logMock.lastCall?.args[0]); + // + // const tableMock = stubMethod(console, 'table'); + // await CommandTestFactory.run(commandInstance, ['ps']); + // expect(tableMock.firstCall?.args[0].length).toBeGreaterThan(0); + // }, + // timeout, + // ); + // + // test('Get model', async () => { + // const logMock = stubMethod(console, 'log'); + // + // await CommandTestFactory.run(commandInstance, ['models', 'get', modelName]); + // expect(logMock.firstCall?.args[0]).toBeInstanceOf(Object); + // expect(logMock.firstCall?.args[0].files.length).toBe(1); + // }); + // + // test('Many models in the list', async () => { + // const logMock = stubMethod(console, 'table'); + // await CommandTestFactory.run(commandInstance, ['models', 'list']); + // expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); + // expect(logMock.firstCall?.args[0].length).toBe(1); + // expect(logMock.firstCall?.args[0][0].id).toBe(modelName); + // }); + // + // test( + // 'Model already exists', + // async () => { + // const stdoutSpy = stubMethod(process.stdout, 'write'); + // const exitSpy = stubMethod(process, 'exit'); + // await CommandTestFactory.run(commandInstance, ['pull', modelName]); + // expect(stdoutSpy.firstCall?.args[0]).toContain('Model already exists'); + // expect(exitSpy.firstCall?.args[0]).toBe(1); + // }, + // timeout, + // ); +}); diff --git a/platform/src/infrastructure/commanders/types/benchmark-config.interface.ts b/platform/src/infrastructure/commanders/types/benchmark-config.interface.ts new file mode 100644 index 000000000..7a9bf980c --- /dev/null +++ b/platform/src/infrastructure/commanders/types/benchmark-config.interface.ts @@ -0,0 +1,32 @@ +import { Cortex } from '@cortexso/cortex.js'; + +export interface ApiConfig { + base_url: string; + api_key: string; + parameters: ParametersConfig; +} + +export interface ParametersConfig { + messages: Cortex.ChatCompletionMessageParam[]; + model: string; + stream?: boolean; + max_tokens?: number; + stop?: string[]; + frequency_penalty?: number; + presence_penalty?: number; + temperature?: number; + top_p?: number; +} + +export interface BenchmarkConfig { + api: ApiConfig; + prompts?: { + min: number; + max: number; + samples: number; + }; + output: string; + concurrency: number; + num_rounds: number; + hardware: string[]; +} diff --git a/platform/src/infrastructure/commanders/types/engine.interface.ts b/platform/src/infrastructure/commanders/types/engine.interface.ts new file mode 100644 index 000000000..c3562ecdb --- /dev/null +++ b/platform/src/infrastructure/commanders/types/engine.interface.ts @@ -0,0 +1,34 @@ +export enum Engines { + llamaCPP = 'cortex.llamacpp', + onnx = 'cortex.onnx', + tensorrtLLM = 'cortex.tensorrt-llm', + + // Remote engines + groq = 'groq', + mistral = 'mistral', + openai = 'openai', + anthropic = 'anthropic', + openrouter = 'openrouter', + cohere = 'cohere', + martian = 'martian', + nvidia = 'nvidia', +} + +export const EngineNamesMap: { + [key in string]: string; +} = { + [Engines.llamaCPP]: 'llamacpp', + [Engines.onnx]: 'onnx', + [Engines.tensorrtLLM]: 'tensorrt-llm', +}; + +export const RemoteEngines: Engines[] = [ + Engines.groq, + Engines.mistral, + Engines.openai, + Engines.anthropic, + Engines.openrouter, + Engines.cohere, + Engines.martian, + Engines.nvidia, +]; diff --git a/platform/src/infrastructure/commanders/types/init-options.interface.ts b/platform/src/infrastructure/commanders/types/init-options.interface.ts new file mode 100644 index 000000000..c8586a78c --- /dev/null +++ b/platform/src/infrastructure/commanders/types/init-options.interface.ts @@ -0,0 +1,9 @@ +export interface InitOptions { + runMode?: 'CPU' | 'GPU'; + gpuType?: 'Nvidia' | 'Others (Vulkan)'; + instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; + cudaVersion?: '11' | '12'; + silent?: boolean; + vulkan?: boolean; + version?: string; +} diff --git a/platform/src/infrastructure/commanders/types/model-stat.interface.ts b/platform/src/infrastructure/commanders/types/model-stat.interface.ts new file mode 100644 index 000000000..336917b4f --- /dev/null +++ b/platform/src/infrastructure/commanders/types/model-stat.interface.ts @@ -0,0 +1,8 @@ +export interface ModelStat { + modelId: string; + engine?: string; + duration?: string; + status: string; + vram?: string; + ram?: string; +} diff --git a/platform/src/infrastructure/commanders/types/model-tokenizer.interface.ts b/platform/src/infrastructure/commanders/types/model-tokenizer.interface.ts new file mode 100644 index 000000000..60a5b3290 --- /dev/null +++ b/platform/src/infrastructure/commanders/types/model-tokenizer.interface.ts @@ -0,0 +1,8 @@ +export interface ModelMetadata { + contextLength: number; + ngl: number; + stopWord?: string; + promptTemplate: string; + version: number; + name?: string; +} diff --git a/platform/src/infrastructure/commanders/types/telemetry-options.interface.ts b/platform/src/infrastructure/commanders/types/telemetry-options.interface.ts new file mode 100644 index 000000000..de0db5a1e --- /dev/null +++ b/platform/src/infrastructure/commanders/types/telemetry-options.interface.ts @@ -0,0 +1,3 @@ +export interface TelemetryOptions { + type: 'crash'; +} diff --git a/platform/src/infrastructure/constants/benchmark.ts b/platform/src/infrastructure/constants/benchmark.ts new file mode 100644 index 000000000..477c4eba3 --- /dev/null +++ b/platform/src/infrastructure/constants/benchmark.ts @@ -0,0 +1,36 @@ +import { BenchmarkConfig } from '@commanders/types/benchmark-config.interface'; + +export const defaultBenchmarkConfiguration: BenchmarkConfig = { + api: { + base_url: 'http://localhost:1337/v1', + api_key: '', + parameters: { + messages: [ + { + content: 'You are a helpful assistant.', + role: 'system', + }, + { + content: 'Hello!', + role: 'user', + }, + ], + model: 'tinyllama', + stream: true, + max_tokens: 2048, + frequency_penalty: 0, + presence_penalty: 0, + temperature: 0.7, + top_p: 0.95, + }, + }, + prompts: { + min: 1024, + max: 2048, + samples: 10, + }, + output: 'table', + hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'], + concurrency: 1, + num_rounds: 10, +}; diff --git a/platform/src/infrastructure/constants/cortex.ts b/platform/src/infrastructure/constants/cortex.ts new file mode 100644 index 000000000..9e07868f8 --- /dev/null +++ b/platform/src/infrastructure/constants/cortex.ts @@ -0,0 +1,64 @@ +export const cortexNamespace = 'cortex'; + +export const databaseName = 'cortex'; + +export const databaseFile = `${databaseName}.db`; + +export const defaultCortexJsHost = '127.0.0.1'; +export const defaultCortexJsPort = 1337; + +export const defaultCortexCppHost = '127.0.0.1'; +export const defaultCortexCppPort = 3929; + +export const defaultEmbeddingModel = 'nomic-embed-text-v1'; + +export const cortexServerAPI = (host: string, port: number) => + `http://${host}:${port}/v1/`; + +// CORTEX CPP +export const CORTEX_CPP_EMBEDDINGS_URL = ( + host: string = defaultCortexCppHost, + port: number = defaultCortexCppPort, +) => `http://${host}:${port}/inferences/server/embedding`; + +export const CORTEX_CPP_PROCESS_DESTROY_URL = ( + host: string = defaultCortexCppHost, + port: number = defaultCortexCppPort, +) => `http://${host}:${port}/processmanager/destroy`; + +export const CORTEX_CPP_UNLOAD_ENGINE_URL = ( + host: string = defaultCortexCppHost, + port: number = defaultCortexCppPort, +) => `http://${host}:${port}/inferences/server/unloadengine`; + +export const CORTEX_CPP_HEALTH_Z_URL = ( + host: string = defaultCortexCppHost, + port: number = defaultCortexCppPort, +) => `http://${host}:${port}/healthz`; + +export const CORTEX_CPP_MODELS_URL = ( + host: string = defaultCortexCppHost, + port: number = defaultCortexCppPort, +) => `http://${host}:${port}/inferences/server/models`; + +export const CORTEX_JS_SYSTEM_URL = ( + host: string = defaultCortexJsHost, + port: number = defaultCortexJsPort, +) => `http://${host}:${port}/v1/system`; + +export const CORTEX_JS_HEALTH_URL_WITH_API_PATH = (apiUrl: string) => + `${apiUrl}/v1/system`; + +// INITIALIZATION +export const CORTEX_RELEASES_URL = + 'https://api.github.com/repos/janhq/cortex/releases'; + +export const CORTEX_ENGINE_RELEASES_URL = (engine: string) => + `https://api.github.com/repos/janhq/${engine}/releases`; + +export const CUDA_DOWNLOAD_URL = + 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz'; + +export const telemetryServerUrl = 'https://telemetry.jan.ai'; + +export const MIN_CUDA_VERSION = '12.4'; diff --git a/platform/src/infrastructure/constants/huggingface.ts b/platform/src/infrastructure/constants/huggingface.ts new file mode 100644 index 000000000..6f4a2bbe1 --- /dev/null +++ b/platform/src/infrastructure/constants/huggingface.ts @@ -0,0 +1,39 @@ +export const HUGGING_FACE_TREE_REF_URL = ( + repo: string, + tree: string, + path: string, +) => `https://huggingface.co/cortexso/${repo}/resolve/${tree}/${path}`; + +export const HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL = ( + modelId: string, + fileName: string, +) => `https://huggingface.co/${modelId}/resolve/main/${fileName}`; + +export const HUGGING_FACE_REPO_URL = (author: string, repo: string) => + `https://huggingface.co/${author}/${repo}`; + +export const HUGGING_FACE_REPO_MODEL_API_URL = (repo: string) => + `https://huggingface.co/api/models/${repo}`; + +export const AllQuantizations = [ + 'Q3_K_S', + 'Q3_K_M', + 'Q3_K_L', + 'Q4_K_S', + 'Q4_K_M', + 'Q5_K_S', + 'Q5_K_M', + 'Q4_0', + 'Q4_1', + 'Q5_0', + 'Q5_1', + 'IQ2_XXS', + 'IQ2_XS', + 'Q2_K', + 'Q2_K_S', + 'Q6_K', + 'Q8_0', + 'F16', + 'F32', + 'COPY', +]; diff --git a/platform/src/infrastructure/constants/prompt-constants.ts b/platform/src/infrastructure/constants/prompt-constants.ts new file mode 100644 index 000000000..7d5dc522a --- /dev/null +++ b/platform/src/infrastructure/constants/prompt-constants.ts @@ -0,0 +1,45 @@ +//// HF Chat template +export const OPEN_CHAT_3_5_JINJA = `{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}`; + +export const ZEPHYR_JINJA = `{% for message in messages %} +{% if message['role'] == 'user' %} +{{ '<|user|> +' + message['content'] + eos_token }} +{% elif message['role'] == 'system' %} +{{ '<|system|> +' + message['content'] + eos_token }} +{% elif message['role'] == 'assistant' %} +{{ '<|assistant|> +' + message['content'] + eos_token }} +{% endif %} +{% if loop.last and add_generation_prompt %} +{{ '<|assistant|>' }} +{% endif %} +{% endfor %}`; + +export const LLAMA_3_JINJA = `{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %}`; + +//// Corresponding prompt template +export const OPEN_CHAT_3_5 = `GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:`; + +export const ZEPHYR = `<|system|> +{system_message} +<|user|> +{prompt} +<|assistant|> +`; + +export const COMMAND_R = `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{response} +`; + +// getting from https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF +export const LLAMA_2 = `[INST] <> +{system_message} +<> +{prompt}[/INST]`; +export const LLAMA_3 = + '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'; diff --git a/platform/src/infrastructure/controllers/assistants.controller.spec.ts b/platform/src/infrastructure/controllers/assistants.controller.spec.ts new file mode 100644 index 000000000..9d642fcea --- /dev/null +++ b/platform/src/infrastructure/controllers/assistants.controller.spec.ts @@ -0,0 +1,31 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { AssistantsController } from './assistants.controller'; +import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ModelRepositoryModule } from '../repositories/models/model.module'; +import { DownloadManagerModule } from '../services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; + +describe('AssistantsController', () => { + let controller: AssistantsController; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ModelRepositoryModule, + DownloadManagerModule, + ], + controllers: [AssistantsController], + providers: [AssistantsUsecases], + exports: [AssistantsUsecases], + }).compile(); + + controller = module.get(AssistantsController); + }); + + it('should be defined', () => { + expect(controller).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/controllers/assistants.controller.ts b/platform/src/infrastructure/controllers/assistants.controller.ts new file mode 100644 index 000000000..1856a1d77 --- /dev/null +++ b/platform/src/infrastructure/controllers/assistants.controller.ts @@ -0,0 +1,128 @@ +import { + Body, + Controller, + DefaultValuePipe, + Delete, + Get, + Param, + Post, + Query, + UseInterceptors, +} from '@nestjs/common'; +import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases'; +import { CreateAssistantDto } from '@/infrastructure/dtos/assistants/create-assistant.dto'; +import { DeleteAssistantResponseDto } from '@/infrastructure/dtos/assistants/delete-assistant.dto'; +import { + ApiCreatedResponse, + ApiOkResponse, + ApiOperation, + ApiParam, + ApiTags, + ApiResponse, + ApiQuery, +} from '@nestjs/swagger'; +import { AssistantEntity } from '../entities/assistant.entity'; +import { TransformInterceptor } from '../interceptors/transform.interceptor'; + +@ApiTags('Assistants') +@Controller('assistants') +@UseInterceptors(TransformInterceptor) +export class AssistantsController { + constructor(private readonly assistantsService: AssistantsUsecases) {} + + @ApiOperation({ + summary: 'Create assistant', + description: 'Creates a new assistant.', + }) + @ApiCreatedResponse({ + description: 'The assistant has been successfully created.', + }) + @Post() + create(@Body() createAssistantDto: CreateAssistantDto) { + return this.assistantsService.create(createAssistantDto); + } + + @ApiOperation({ + summary: 'List assistants', + description: 'Returns a list of assistants.', + }) + @ApiOkResponse({ + description: 'Ok', + type: [AssistantEntity], + }) + @ApiQuery({ + name: 'limit', + type: Number, + required: false, + description: + 'A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.', + }) + @ApiQuery({ + name: 'order', + type: String, + required: false, + description: + 'Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.', + }) + @ApiQuery({ + name: 'after', + type: String, + required: false, + description: + 'A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.', + }) + @ApiQuery({ + name: 'before', + type: String, + required: false, + description: + 'A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.', + }) + @Get() + findAll( + @Query('limit', new DefaultValuePipe(20)) limit: number, + @Query('order', new DefaultValuePipe('desc')) order: 'asc' | 'desc', + @Query('after') after?: string, + @Query('before') before?: string, + ) { + return this.assistantsService.listAssistants(limit, order, after, before); + } + + @ApiOperation({ + summary: 'Get assistant', + description: + "Retrieves a specific assistant defined by an assistant's `id`.", + }) + @ApiOkResponse({ + description: 'Ok', + type: AssistantEntity, + }) + @ApiParam({ + name: 'id', + required: true, + description: 'The unique identifier of the assistant.', + }) + @Get(':id') + findOne(@Param('id') id: string) { + return this.assistantsService.findOne(id); + } + + @ApiOperation({ + summary: 'Delete assistant', + description: "Deletes a specific assistant defined by an assistant's `id`.", + }) + @ApiResponse({ + status: 200, + description: 'The assistant has been successfully deleted.', + type: DeleteAssistantResponseDto, + }) + @ApiParam({ + name: 'id', + required: true, + description: 'The unique identifier of the assistant.', + }) + @Delete(':id') + remove(@Param('id') id: string) { + return this.assistantsService.remove(id); + } +} diff --git a/platform/src/infrastructure/controllers/chat.controller.spec.ts b/platform/src/infrastructure/controllers/chat.controller.spec.ts new file mode 100644 index 000000000..a372f760f --- /dev/null +++ b/platform/src/infrastructure/controllers/chat.controller.spec.ts @@ -0,0 +1,41 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ChatController } from './chat.controller'; +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { DatabaseModule } from '../database/database.module'; +import { ExtensionModule } from '../repositories/extensions/extension.module'; +import { ModelRepositoryModule } from '../repositories/models/model.module'; +import { HttpModule } from '@nestjs/axios'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; +import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; +import { FileManagerModule } from '../services/file-manager/file-manager.module'; +import { ModelsModule } from '@/usecases/models/models.module'; + +describe('ChatController', () => { + let controller: ChatController; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ExtensionModule, + ModelRepositoryModule, + HttpModule, + DownloadManagerModule, + EventEmitterModule.forRoot(), + TelemetryModule, + FileManagerModule, + ModelsModule, + ], + controllers: [ChatController], + providers: [ChatUsecases], + }).compile(); + + controller = module.get(ChatController); + }); + + it('should be defined', () => { + expect(controller).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/controllers/chat.controller.ts b/platform/src/infrastructure/controllers/chat.controller.ts new file mode 100644 index 000000000..ad04ab015 --- /dev/null +++ b/platform/src/infrastructure/controllers/chat.controller.ts @@ -0,0 +1,103 @@ +import { Body, Controller, Post, Headers, Res, HttpCode } from '@nestjs/common'; +import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { Response } from 'express'; +import { ApiOperation, ApiTags, ApiResponse } from '@nestjs/swagger'; +import { ChatCompletionResponseDto } from '../dtos/chat/chat-completion-response.dto'; +import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; +import { EventName } from '@/domain/telemetry/telemetry.interface'; +import { extractCommonHeaders } from '@/utils/request'; + +@ApiTags('Inference') +@Controller('chat') +export class ChatController { + constructor( + private readonly chatService: ChatUsecases, + private readonly telemetryUsecases: TelemetryUsecases, + ) {} + + @ApiOperation({ + summary: 'Create chat completion', + description: + 'Creates a model response for the given conversation. The following parameters are not working for the `TensorRT-LLM` engine:\n- `frequency_penalty`\n- `presence_penalty`\n- `top_p`', + }) + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: ChatCompletionResponseDto, + }) + @Post('completions') + async create( + @Headers() headers: Record, + @Body() createChatDto: CreateChatCompletionDto, + @Res() res: Response, + ) { + const { stream } = createChatDto; + this.chatService + .inference(createChatDto, extractCommonHeaders(headers)) + .then((response) => { + if (stream) { + res.header('Content-Type', 'text/event-stream'); + response.pipe(res); + } else { + res.header('Content-Type', 'application/json'); + res.json(response); + } + }) + .catch((error) => { + const statusCode = error.response?.status ?? 400; + let errorMessage; + if (!stream) { + const data = error.response?.data; + if (!data) { + return res.status(500).send(error.message || 'An error occurred'); + } + return res + .status(statusCode) + .send( + data.error?.message || + data.message || + error.message || + 'An error occurred', + ); + } + const streamResponse = error.response?.data; + if (!streamResponse) { + return res.status(500).send(error.message || 'An error occurred'); + } + let data = ''; + streamResponse.on('data', (chunk: any) => { + data += chunk; + }); + + streamResponse.on('end', () => { + try { + const jsonResponse = JSON.parse(data); + errorMessage = + jsonResponse.error?.message || + jsonResponse.message || + error.message || + 'An error occurred'; + } catch (err) { + errorMessage = 'An error occurred while processing the response'; + } + return res + .status(error.statusCode ?? 400) + .send(errorMessage || error.message || 'An error occurred'); + }); + + streamResponse.on('error', (streamError: any) => { + errorMessage = streamError.message ?? 'An error occurred'; + return res + .status(error.statusCode ?? 400) + .send(errorMessage || error.message || 'An error occurred'); + }); + }); + + this.telemetryUsecases.addEventToQueue({ + name: EventName.CHAT, + modelId: createChatDto.model, + }); + } +} diff --git a/platform/src/infrastructure/controllers/embeddings.controller.spec.ts b/platform/src/infrastructure/controllers/embeddings.controller.spec.ts new file mode 100644 index 000000000..f60087678 --- /dev/null +++ b/platform/src/infrastructure/controllers/embeddings.controller.spec.ts @@ -0,0 +1,44 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { EmbeddingsController } from './embeddings.controller'; +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { DatabaseModule } from '../database/database.module'; +import { ModelRepositoryModule } from '../repositories/models/model.module'; +import { ExtensionModule } from '../repositories/extensions/extension.module'; +import { HttpModule } from '@nestjs/axios'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; +import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; +import { FileManagerModule } from '../services/file-manager/file-manager.module'; +import { ModelsModule } from '@/usecases/models/models.module'; +import { CortexModule } from '@/usecases/cortex/cortex.module'; + +describe('EmbeddingsController', () => { + let controller: EmbeddingsController; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ModelRepositoryModule, + ExtensionModule, + HttpModule, + DownloadManagerModule, + EventEmitterModule.forRoot(), + TelemetryModule, + FileManagerModule, + ModelsModule, + CortexModule, + ], + controllers: [EmbeddingsController], + providers: [ChatUsecases], + exports: [ChatUsecases], + }).compile(); + + controller = module.get(EmbeddingsController); + }); + + it('should be defined', () => { + expect(controller).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/controllers/embeddings.controller.ts b/platform/src/infrastructure/controllers/embeddings.controller.ts new file mode 100644 index 000000000..005c7327f --- /dev/null +++ b/platform/src/infrastructure/controllers/embeddings.controller.ts @@ -0,0 +1,26 @@ +import { Body, Controller, Post, HttpCode, Res } from '@nestjs/common'; +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { ApiOperation, ApiTags, ApiResponse } from '@nestjs/swagger'; +import { CreateEmbeddingsDto } from '../dtos/embeddings/embeddings-request.dto'; +import { EmbeddingsResponseDto } from '../dtos/chat/embeddings-response.dto'; + +@ApiTags('Embeddings') +@Controller('embeddings') +export class EmbeddingsController { + constructor(private readonly chatService: ChatUsecases) {} + + @ApiOperation({ + summary: 'Create embedding vector', + description: 'Creates an embedding vector representing the input text.', + }) + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: EmbeddingsResponseDto, + }) + @Post() + async create(@Body() createEmbeddingsDto: CreateEmbeddingsDto) { + return this.chatService.embeddings(createEmbeddingsDto); + } +} diff --git a/platform/src/infrastructure/controllers/engines.controller.spec.ts b/platform/src/infrastructure/controllers/engines.controller.spec.ts new file mode 100644 index 000000000..6eb91ff36 --- /dev/null +++ b/platform/src/infrastructure/controllers/engines.controller.spec.ts @@ -0,0 +1,43 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { DatabaseModule } from '../database/database.module'; +import { ExtensionModule } from '../repositories/extensions/extension.module'; +import { ModelRepositoryModule } from '../repositories/models/model.module'; +import { HttpModule } from '@nestjs/axios'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; +import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; +import { FileManagerModule } from '../services/file-manager/file-manager.module'; +import { EnginesController } from './engines.controller'; +import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; +import { EnginesModule } from '@/usecases/engines/engines.module'; +import { ConfigsModule } from '@/usecases/configs/configs.module'; + +describe('ConfigsController', () => { + let controller: EnginesController; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ExtensionModule, + ModelRepositoryModule, + HttpModule, + DownloadManagerModule, + EventEmitterModule.forRoot(), + TelemetryModule, + FileManagerModule, + EnginesModule, + ConfigsModule, + ], + controllers: [EnginesController], + providers: [EnginesUsecases], + }).compile(); + + controller = module.get(EnginesController); + }); + + it('should be defined', () => { + expect(controller).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/controllers/engines.controller.ts b/platform/src/infrastructure/controllers/engines.controller.ts new file mode 100644 index 000000000..9eb7fdcb5 --- /dev/null +++ b/platform/src/infrastructure/controllers/engines.controller.ts @@ -0,0 +1,122 @@ +import { + Controller, + Get, + Param, + HttpCode, + UseInterceptors, + Post, + Body, + Patch, + Res, +} from '@nestjs/common'; +import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; +import { Response } from 'express'; +import { TransformInterceptor } from '../interceptors/transform.interceptor'; +import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; +import { EngineDto, InitEngineDto } from '../dtos/engines/engines.dto'; +import { CommonResponseDto } from '../dtos/common/common-response.dto'; +import { ConfigUpdateDto } from '../dtos/configs/config-update.dto'; + +@ApiTags('Engines') +@Controller('engines') +@UseInterceptors(TransformInterceptor) +export class EnginesController { + constructor( + private readonly enginesUsecases: EnginesUsecases, + private readonly initUsescases: EnginesUsecases, + ) {} + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: [EngineDto], + }) + @ApiOperation({ + summary: 'List available engines', + description: + 'Lists the currently available engines, including local and remote engines.', + }) + @Get() + findAll() { + return this.enginesUsecases.getEngines(); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: EngineDto, + }) + @ApiOperation({ + summary: 'Get an engine', + description: + 'Retrieves an engine instance, providing basic information about the engine.', + }) + @ApiParam({ + name: 'name', + required: true, + description: 'The unique identifier of the engine.', + }) + @Get(':name(*)') + findOne(@Param('name') name: string) { + return this.enginesUsecases.getEngine(name); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: CommonResponseDto, + }) + @ApiOperation({ + summary: 'Initialize an engine', + description: + 'Initializes an engine instance with the given name. It will download the engine if it is not available locally.', + }) + @ApiParam({ + name: 'name', + required: true, + description: 'The unique identifier of the engine.', + }) + @Post(':name(*)/init') + initialize( + @Param('name') name: string, + @Body() body: InitEngineDto | undefined, + @Res() res: Response, + ) { + try { + this.initUsescases.installEngine(body, name, true); + res.json({ + message: 'Engine initialization started successfully.', + }); + } catch (error) { + res.status(400).send(error.message); + } + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: CommonResponseDto, + }) + @ApiOperation({ + summary: 'Update an engine', + description: 'Updates the engine with configurations.', + }) + @ApiParam({ + name: 'name', + required: true, + description: 'The unique identifier of the engine.', + }) + @Patch(':name(*)') + update(@Param('name') name: string, @Body() configs?: any | undefined) { + console.log('configs', configs); + return this.enginesUsecases.updateConfigs( + configs.config, + configs.value, + name, + ); + } +} diff --git a/platform/src/infrastructure/controllers/models.controller.spec.ts b/platform/src/infrastructure/controllers/models.controller.spec.ts new file mode 100644 index 000000000..5f9cc2741 --- /dev/null +++ b/platform/src/infrastructure/controllers/models.controller.spec.ts @@ -0,0 +1,44 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ModelsController } from './models.controller'; +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { DatabaseModule } from '../database/database.module'; +import { ExtensionModule } from '../repositories/extensions/extension.module'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { HttpModule } from '@nestjs/axios'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { ModelRepositoryModule } from '../repositories/models/model.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; +import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; +import { ContextModule } from '../services/context/context.module'; + +describe('ModelsController', () => { + let controller: ModelsController; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ExtensionModule, + FileManagerModule, + HttpModule, + DownloadManagerModule, + ModelRepositoryModule, + DownloadManagerModule, + EventEmitterModule.forRoot(), + TelemetryModule, + ContextModule, + ], + controllers: [ModelsController], + providers: [ModelsUsecases, CortexUsecases], + exports: [ModelsUsecases], + }).compile(); + + controller = module.get(ModelsController); + }); + + it('should be defined', () => { + expect(controller).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/controllers/models.controller.ts b/platform/src/infrastructure/controllers/models.controller.ts new file mode 100644 index 000000000..a2f2ac80c --- /dev/null +++ b/platform/src/infrastructure/controllers/models.controller.ts @@ -0,0 +1,303 @@ +import { + Controller, + Get, + Post, + Body, + Param, + Delete, + HttpCode, + UseInterceptors, + BadRequestException, + Patch, + Res, + HttpStatus, +} from '@nestjs/common'; +import { Response } from 'express'; +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; +import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; +import { ModelDto } from '@/infrastructure/dtos/models/model.dto'; +import { ListModelsResponseDto } from '@/infrastructure/dtos/models/list-model-response.dto'; +import { DeleteModelResponseDto } from '@/infrastructure/dtos/models/delete-model.dto'; +import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; +import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; +import { TransformInterceptor } from '../interceptors/transform.interceptor'; +import { ModelSettingsDto } from '../dtos/models/model-settings.dto'; +import { EventName } from '@/domain/telemetry/telemetry.interface'; +import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; +import { CommonResponseDto } from '../dtos/common/common-response.dto'; +import { HuggingFaceRepoSibling } from '@/domain/models/huggingface.interface'; + +@ApiTags('Models') +@Controller('models') +@UseInterceptors(TransformInterceptor) +export class ModelsController { + constructor( + private readonly modelsUsecases: ModelsUsecases, + private readonly telemetryUsecases: TelemetryUsecases, + ) {} + + @HttpCode(201) + @ApiResponse({ + status: 201, + description: 'The model has been successfully created.', + type: StartModelSuccessDto, + }) + @ApiOperation({ + summary: 'Create model', + description: 'Creates a model `.json` instance file manually.', + }) + @Post() + create(@Body() createModelDto: CreateModelDto) { + return this.modelsUsecases.create(createModelDto); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'The model has been successfully started.', + type: StartModelSuccessDto, + }) + @ApiOperation({ + summary: 'Start model', + description: 'Starts a model operation defined by a model `id`.', + }) + @ApiParam({ + name: 'modelId', + required: true, + description: 'The unique identifier of the model.', + }) + @Post(':modelId(*)/start') + startModel( + @Param('modelId') modelId: string, + @Body() params: ModelSettingsDto, + ) { + return this.modelsUsecases.startModel(modelId, params); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'The model has been successfully started.', + type: StartModelSuccessDto, + }) + @ApiOperation({ + summary: 'Start model by file path', + description: + 'Starts a model operation defined by a model `id` with a file path.', + }) + @ApiParam({ + name: 'modelId', + required: true, + description: 'The unique identifier of the model.', + }) + @Post(':modelId(*)/start-by-file') + startModelByFilePath( + @Param('modelId') modelId: string, + @Body() + params: ModelSettingsDto & { filePath: string; metadataPath: string }, + ) { + const { filePath, metadataPath, ...settings } = params; + return this.modelsUsecases.startModel( + modelId, + settings, + filePath, + metadataPath, + ); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'The model has been successfully stopped.', + type: StartModelSuccessDto, + }) + @ApiOperation({ + summary: 'Stop model', + description: 'Stops a model operation defined by a model `id`.', + }) + @ApiParam({ + name: 'modelId', + required: true, + description: 'The unique identifier of the model.', + }) + @Post(':modelId(*)/stop') + stopModel(@Param('modelId') modelId: string) { + return this.modelsUsecases.stopModel(modelId); + } + + @ApiOperation({ + summary: 'Abort model pull', + description: 'Abort the model pull operation.', + parameters: [ + { + in: 'path', + name: 'pull_id', + required: true, + description: 'The unique identifier of the pull.', + }, + ], + }) + @Delete(':pull_id(*)/pull') + abortPullModel(@Param('pull_id') pullId: string) { + return this.modelsUsecases.abortDownloadModel(pullId); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: CommonResponseDto, + }) + @ApiOperation({ + summary: 'Pull a model', + description: + 'Pulls a model from cortex hub or huggingface and downloads it.', + }) + @ApiParam({ + name: 'modelId', + required: true, + description: 'The unique identifier of the model.', + }) + @ApiParam({ + name: 'fileName', + required: false, + description: 'The file name of the model to download.', + }) + @ApiParam({ + name: 'persistedModelId', + required: false, + description: 'The unique identifier of the model in your local storage.', + }) + @Post(':modelId(*)/pull') + pullModel( + @Res() res: Response, + @Param('modelId') modelId: string, + @Body() + body?: { + fileName?: string; + persistedModelId?: string; + }, + ) { + const { fileName, persistedModelId } = body || {}; + return this.modelsUsecases + .pullModel( + modelId, + false, + (files) => { + return new Promise( + async (resolve, reject) => { + const file = files.find( + (e) => + e.quantization && (!fileName || e.rfilename === fileName), + ); + if (!file) { + return reject(new BadRequestException('File not found')); + } + return resolve(file); + }, + ); + }, + persistedModelId, + ) + .then(() => + this.telemetryUsecases.addEventToQueue({ + name: EventName.DOWNLOAD_MODEL, + modelId, + }), + ) + .then(() => + res.status(HttpStatus.OK).json({ + message: 'Download model started successfully.', + }), + ) + .catch((e) => + res + .status(HttpStatus.CONFLICT) + .json({ error: { message: e.message ?? e }, code: 409 }), + ); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: ListModelsResponseDto, + }) + @ApiOperation({ + summary: 'List models', + description: + "Lists the currently available models, and provides basic information about each one such as the owner and availability. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/list).", + }) + @Get() + findAll() { + return this.modelsUsecases.findAll(); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: ModelDto, + }) + @ApiOperation({ + summary: 'Get model', + description: + "Retrieves a model instance, providing basic information about the model such as the owner and permissions. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/retrieve).", + }) + @ApiParam({ + name: 'id', + required: true, + description: 'The unique identifier of the model.', + }) + @Get(':id(*)') + findOne(@Param('id') id: string) { + return this.modelsUsecases.findOne(id); + } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'The model has been successfully updated.', + type: UpdateModelDto, + }) + @ApiOperation({ + summary: 'Update model', + description: "Updates a model instance defined by a model's `id`.", + parameters: [ + { + in: 'path', + name: 'model', + required: true, + description: 'The unique identifier of the model.', + }, + ], + }) + @Patch(':model(*)') + async update( + @Param('model') model: string, + @Body() updateModelDto: UpdateModelDto, + ) { + return this.modelsUsecases.update(model, updateModelDto); + } + + @ApiResponse({ + status: 200, + description: 'The model has been successfully deleted.', + type: DeleteModelResponseDto, + }) + @ApiOperation({ + summary: 'Delete model', + description: + "Deletes a model. [Equivalent to OpenAI's delete model](https://platform.openai.com/docs/api-reference/models/delete).", + }) + @ApiParam({ + name: 'id', + required: true, + description: 'The unique identifier of the model.', + }) + @Delete(':id(*)') + remove(@Param('id') id: string) { + return this.modelsUsecases.remove(id); + } +} diff --git a/platform/src/infrastructure/controllers/system.controller.ts b/platform/src/infrastructure/controllers/system.controller.ts new file mode 100644 index 000000000..b6a52ff20 --- /dev/null +++ b/platform/src/infrastructure/controllers/system.controller.ts @@ -0,0 +1,138 @@ +import { + DownloadState, + DownloadStateEvent, +} from '@/domain/models/download.interface'; +import { + EmptyModelEvent, + ModelEvent, + ModelId, + ModelStatus, + ModelStatusAndEvent, +} from '@/domain/models/model.event'; +import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { Controller, Delete, Get, HttpCode, Sse } from '@nestjs/common'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { ApiOperation, ApiResponse, ApiTags } from '@nestjs/swagger'; +import { + Observable, + catchError, + combineLatest, + distinctUntilChanged, + from, + fromEvent, + interval, + map, + merge, + of, + startWith, + switchMap, +} from 'rxjs'; +import { ResourcesManagerService } from '../services/resources-manager/resources-manager.service'; +import { ResourceEvent } from '@/domain/models/resource.interface'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@ApiTags('System') +@Controller('system') +export class SystemController { + constructor( + private readonly downloadManagerService: DownloadManagerService, + private readonly modelsUsecases: ModelsUsecases, + private readonly cortexUsecases: CortexUsecases, + private readonly eventEmitter: EventEmitter2, + private readonly resourcesManagerService: ResourcesManagerService, + ) {} + + @ApiOperation({ + summary: 'Stop api server', + description: 'Stops the Cortex API endpoint server for the detached mode.', + }) + @Delete() + async delete() { + await this.cortexUsecases.stopCortex().catch(() => {}); + process.exit(0); + } + + @ApiOperation({ + summary: 'Get health status', + description: "Retrieves the health status of your Cortex's system.", + }) + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + }) + @Get() + async get() { + return 'OK'; + } + + @ApiOperation({ + summary: 'Get download status', + description: "Retrieves the model's download status.", + }) + @Sse('events/download') + downloadEvent(): Observable { + const latestDownloadState$: Observable = of({ + data: this.downloadManagerService.getDownloadStates(), + }); + const downloadEvent$ = fromEvent( + this.eventEmitter, + 'download.event', + ).pipe( + map((downloadState) => ({ data: downloadState })), + distinctUntilChanged(), + ); + + return merge(latestDownloadState$, downloadEvent$).pipe(); + } + + @ApiOperation({ + summary: 'Get model status', + description: 'Retrieves all the available model statuses within Cortex.', + }) + @Sse('events/model') + modelEvent(): Observable { + const latestModelStatus$: Observable> = of( + this.modelsUsecases.getModelStatuses(), + ); + + const modelEvent$ = fromEvent( + this.eventEmitter, + 'model.event', + ).pipe(startWith(EmptyModelEvent)); + + return combineLatest([latestModelStatus$, modelEvent$]).pipe( + map(([status, event]) => ({ data: { status, event } })), + ); + } + + @ApiOperation({ + summary: 'Get resources status', + description: 'Retrieves the resources status of the system.', + }) + @Sse('events/resources') + resourcesEvent(): Observable { + const initialData$ = from( + this.resourcesManagerService.getResourceStatuses(), + ).pipe( + map((data) => ({ data: data })), + catchError((error) => { + console.error('Error fetching initial resource statuses', error); + return of(); // Ensure the stream is kept alive even if initial fetch fails + }), + ); + + const getResourceStatuses$ = interval(2000).pipe( + switchMap(() => this.resourcesManagerService.getResourceStatuses()), + map((data) => ({ data: data })), + catchError((error) => { + console.error('Error fetching resource statuses', error); + return of(); // Keep the stream alive on error + }), + ); + + // Merge the initial data with the interval updates + return merge(initialData$, getResourceStatuses$); + } +} diff --git a/platform/src/infrastructure/controllers/threads.controller.spec.ts b/platform/src/infrastructure/controllers/threads.controller.spec.ts new file mode 100644 index 000000000..1cf065219 --- /dev/null +++ b/platform/src/infrastructure/controllers/threads.controller.spec.ts @@ -0,0 +1,22 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ThreadsController } from './threads.controller'; +import { ThreadsUsecases } from '@/usecases/threads/threads.usecases'; +import { DatabaseModule } from '../database/database.module'; + +describe('ThreadsController', () => { + let controller: ThreadsController; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [DatabaseModule], + controllers: [ThreadsController], + providers: [ThreadsUsecases], + }).compile(); + + controller = module.get(ThreadsController); + }); + + it('should be defined', () => { + expect(controller).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/controllers/threads.controller.ts b/platform/src/infrastructure/controllers/threads.controller.ts new file mode 100644 index 000000000..dfa47a37d --- /dev/null +++ b/platform/src/infrastructure/controllers/threads.controller.ts @@ -0,0 +1,342 @@ +import { + Controller, + Get, + Post, + Body, + Param, + Delete, + UseInterceptors, + Query, + DefaultValuePipe, +} from '@nestjs/common'; +import { ThreadsUsecases } from '@/usecases/threads/threads.usecases'; +import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto'; +import { UpdateThreadDto } from '@/infrastructure/dtos/threads/update-thread.dto'; +import { DeleteThreadResponseDto } from '@/infrastructure/dtos/threads/delete-thread.dto'; +import { GetThreadResponseDto } from '@/infrastructure/dtos/threads/get-thread.dto'; +import { ApiOkResponse, ApiOperation, ApiResponse } from '@nestjs/swagger'; +import { TransformInterceptor } from '../interceptors/transform.interceptor'; +import { ListMessagesResponseDto } from '../dtos/messages/list-message.dto'; +import { CreateMessageDto } from '../dtos/threads/create-message.dto'; +import { UpdateMessageDto } from '../dtos/threads/update-message.dto'; +import DeleteMessageDto from '../dtos/threads/delete-message.dto'; +import { GetMessageResponseDto } from '../dtos/messages/get-message.dto'; + +@Controller('threads') +@UseInterceptors(TransformInterceptor) +export class ThreadsController { + constructor(private readonly threadsUsecases: ThreadsUsecases) {} + + @ApiOperation({ + tags: ['Threads'], + summary: 'Create thread', + description: 'Creates a new thread.', + }) + @Post() + create(@Body() createThreadDto: CreateThreadDto) { + return this.threadsUsecases.create(createThreadDto); + } + + @ApiOperation({ + tags: ['Threads'], + summary: 'List threads', + description: + 'Lists all the available threads along with its configurations.', + }) + @Get() + findAll( + @Query('limit', new DefaultValuePipe(20)) limit: number, + @Query('order', new DefaultValuePipe('desc')) order: 'asc' | 'desc', + @Query('after') after?: string, + @Query('before') before?: string, + ) { + return this.threadsUsecases.findAll(limit, order, after, before); + } + + @ApiOperation({ + tags: ['Messages'], + summary: 'Retrieve message', + description: 'Retrieves a message.', + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The ID of the thread to which this message belongs.', + }, + { + in: 'path', + name: 'message_id', + required: true, + description: 'The ID of the message to retrieve.', + }, + ], + }) + @ApiOkResponse({ + description: 'The message object matching the specified ID.', + type: GetMessageResponseDto, + }) + @Get(':thread_id/messages/:message_id') + async retrieveMessage( + @Param('thread_id') threadId: string, + @Param('message_id') messageId: string, + ) { + return this.threadsUsecases.retrieveMessage(threadId, messageId); + } + + @ApiOperation({ + tags: ['Messages'], + summary: 'List messages', + description: 'Returns a list of messages for a given thread.', + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The ID of the thread the messages belong to.', + }, + { + in: 'query', + name: 'limit', + required: false, + description: + 'A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.', + }, + { + in: 'query', + name: 'order', + required: false, + description: + 'Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.', + }, + { + in: 'query', + name: 'after', + required: false, + description: + 'A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.', + }, + { + in: 'query', + name: 'before', + required: false, + description: + 'A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.', + }, + { + in: 'query', + name: 'run_id', + required: false, + description: 'Filter messages by the run ID that generated them.', + }, + ], + }) + @ApiOkResponse({ + description: 'A list of message objects.', + type: ListMessagesResponseDto, + }) + @Get(':thread_id/messages') + getMessagesOfThread( + @Param('thread_id') threadId: string, + @Query('limit', new DefaultValuePipe(20)) limit: number, + @Query('order', new DefaultValuePipe('desc')) order: 'asc' | 'desc', + @Query('after') after?: string, + @Query('before') before?: string, + @Query('run_id') runId?: string, + ) { + return this.threadsUsecases.getMessagesOfThread( + threadId, + limit, + order, + after, + before, + runId, + ); + } + + @ApiOperation({ + tags: ['Messages'], + summary: 'Create message', + description: 'Create a message.', + responses: { + '201': { + description: 'A message object.', + }, + }, + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The ID of the thread to create a message for.', + }, + ], + }) + @Post(':thread_id/messages') + createMessageInThread( + @Param('thread_id') threadId: string, + @Body() createMessageDto: CreateMessageDto, + ) { + return this.threadsUsecases.createMessageInThread( + threadId, + createMessageDto, + ); + } + + @ApiOperation({ + tags: ['Messages'], + summary: 'Modify message', + description: 'Modifies a message.', + responses: { + '200': { + description: 'The modified message object.', + }, + }, + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The ID of the thread to which this message belongs.', + }, + { + in: 'path', + name: 'message_id', + required: true, + description: 'The ID of the message to modify.', + }, + ], + }) + @Post(':thread_id/messages/:message_id') + updateMessage( + @Param('thread_id') threadId: string, + @Param('message_id') messageId: string, + @Body() updateMessageDto: UpdateMessageDto, + ) { + return this.threadsUsecases.updateMessage( + threadId, + messageId, + updateMessageDto, + ); + } + + @ApiOperation({ + summary: 'Clean thread', + description: 'Deletes all messages in a thread.', + tags: ['Threads'], + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The ID of the thread to clean.', + }, + ], + }) + @Post(':thread_id/clean') + async cleanThread(@Param('thread_id') threadId: string) { + return this.threadsUsecases.clean(threadId); + } + + @ApiOperation({ + summary: 'Delete message', + description: 'Deletes a message.', + tags: ['Messages'], + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The ID of the thread to which this message belongs.', + }, + { + in: 'path', + name: 'message_id', + required: true, + description: 'The ID of the message to delete.', + }, + ], + }) + @ApiOkResponse({ + description: 'Deletion status.', + type: DeleteMessageDto, + }) + @Delete(':thread_id/messages/:message_id') + deleteMessage( + @Param('thread_id') threadId: string, + @Param('message_id') messageId: string, + ) { + return this.threadsUsecases.deleteMessage(threadId, messageId); + } + + @ApiOperation({ + tags: ['Threads'], + summary: 'Retrieve thread', + description: 'Retrieves a thread.', + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The unique identifier of the thread.', + }, + ], + }) + @ApiOkResponse({ + description: 'Retrieves a thread.', + type: GetThreadResponseDto, + }) + @Get(':thread_id') + retrieveThread(@Param('thread_id') threadId: string) { + return this.threadsUsecases.findOne(threadId); + } + + @ApiResponse({ + status: 200, + description: 'The thread has been successfully updated.', + type: UpdateThreadDto, + }) + @ApiOperation({ + tags: ['Threads'], + summary: 'Modify thread', + description: 'Modifies a thread.', + parameters: [ + { + in: 'path', + name: 'thread_id', + required: true, + description: 'The unique identifier of the thread.', + }, + ], + }) + @Post(':thread_id') + modifyThread( + @Param('thread_id') threadId: string, + @Body() updateThreadDto: UpdateThreadDto, + ) { + return this.threadsUsecases.update(threadId, updateThreadDto); + } + + @ApiResponse({ + status: 200, + description: 'The thread has been successfully deleted.', + type: DeleteThreadResponseDto, + }) + @ApiOperation({ + tags: ['Threads'], + summary: 'Delete thread', + description: 'Deletes a specific thread defined by a thread `id` .', + parameters: [ + { + in: 'path', + name: 'id', + required: true, + description: 'The unique identifier of the thread.', + }, + ], + }) + @Delete(':thread_id') + remove(@Param('thread_id') threadId: string) { + return this.threadsUsecases.remove(threadId); + } +} diff --git a/platform/src/infrastructure/database/database.module.ts b/platform/src/infrastructure/database/database.module.ts new file mode 100644 index 000000000..e3a05c85b --- /dev/null +++ b/platform/src/infrastructure/database/database.module.ts @@ -0,0 +1,18 @@ +import { Module } from '@nestjs/common'; +import { threadProviders } from './providers/thread.providers'; +import { sqliteDatabaseProviders } from './sqlite-database.providers'; +import { assistantProviders } from './providers/assistant.providers'; +import { messageProviders } from './providers/message.providers'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; + +@Module({ + imports: [FileManagerModule], + providers: [ + ...sqliteDatabaseProviders, + ...threadProviders, + ...assistantProviders, + ...messageProviders, + ], + exports: [...threadProviders, ...assistantProviders, ...messageProviders], +}) +export class DatabaseModule {} diff --git a/platform/src/infrastructure/database/providers/assistant.providers.ts b/platform/src/infrastructure/database/providers/assistant.providers.ts new file mode 100644 index 000000000..7a7dc9462 --- /dev/null +++ b/platform/src/infrastructure/database/providers/assistant.providers.ts @@ -0,0 +1,12 @@ +import { AssistantEntity } from '@/infrastructure/entities/assistant.entity'; +import { Sequelize } from 'sequelize-typescript'; + +export const assistantProviders = [ + { + provide: 'ASSISTANT_REPOSITORY', + useFactory: async (sequelize: Sequelize) => { + return sequelize.getRepository(AssistantEntity); + }, + inject: ['DATA_SOURCE'], + }, +]; diff --git a/platform/src/infrastructure/database/providers/message.providers.ts b/platform/src/infrastructure/database/providers/message.providers.ts new file mode 100644 index 000000000..9b1b50227 --- /dev/null +++ b/platform/src/infrastructure/database/providers/message.providers.ts @@ -0,0 +1,12 @@ +import { MessageEntity } from '@/infrastructure/entities/message.entity'; +import { Sequelize } from 'sequelize-typescript'; + +export const messageProviders = [ + { + provide: 'MESSAGE_REPOSITORY', + useFactory: async (sequelize: Sequelize) => { + return sequelize.getRepository(MessageEntity); + }, + inject: ['DATA_SOURCE'], + }, +]; diff --git a/platform/src/infrastructure/database/providers/thread.providers.ts b/platform/src/infrastructure/database/providers/thread.providers.ts new file mode 100644 index 000000000..0db54a6dd --- /dev/null +++ b/platform/src/infrastructure/database/providers/thread.providers.ts @@ -0,0 +1,12 @@ +import { ThreadEntity } from '@/infrastructure/entities/thread.entity'; +import { Sequelize } from 'sequelize-typescript'; + +export const threadProviders = [ + { + provide: 'THREAD_REPOSITORY', + useFactory: async (sequelize: Sequelize) => { + return sequelize.getRepository(ThreadEntity); + }, + inject: ['DATA_SOURCE'], + }, +]; diff --git a/platform/src/infrastructure/database/sqlite-database.providers.ts b/platform/src/infrastructure/database/sqlite-database.providers.ts new file mode 100644 index 000000000..8956abea0 --- /dev/null +++ b/platform/src/infrastructure/database/sqlite-database.providers.ts @@ -0,0 +1,28 @@ +import { databaseFile } from '@/infrastructure/constants/cortex'; +import { join } from 'path'; +import { ThreadEntity } from '../entities/thread.entity'; +import { MessageEntity } from '../entities/message.entity'; +import { AssistantEntity } from '../entities/assistant.entity'; +import { Sequelize } from 'sequelize-typescript'; +import { fileManagerService } from '../services/file-manager/file-manager.service'; + +export const sqliteDatabaseProviders = [ + { + provide: 'DATA_SOURCE', + inject: [], + useFactory: async () => { + const dataFolderPath = await fileManagerService.getDataFolderPath(); + const sqlitePath = join(dataFolderPath, databaseFile); + const sequelize = new Sequelize({ + dialect: 'sqlite', + storage: sqlitePath, + logging: false, + }); + sequelize.addModels([ThreadEntity, MessageEntity, AssistantEntity]); + await ThreadEntity.sync(); + await MessageEntity.sync(); + await AssistantEntity.sync(); + return sequelize; + }, + }, +]; diff --git a/platform/src/infrastructure/dtos/assistants/create-assistant.dto.ts b/platform/src/infrastructure/dtos/assistants/create-assistant.dto.ts new file mode 100644 index 000000000..65e387a26 --- /dev/null +++ b/platform/src/infrastructure/dtos/assistants/create-assistant.dto.ts @@ -0,0 +1,84 @@ +import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; +import { Assistant } from '@/domain/models/assistant.interface'; +import { ApiProperty } from '@nestjs/swagger'; + +export class CreateAssistantDto implements Partial { + @ApiProperty({ + description: 'The unique identifier of the assistant.', + example: 'jan', + default: 'jan', + }) + @IsString() + id: string; + + @ApiProperty({ + description: 'The avatar of the assistant.', + example: '', + default: '', + }) + @IsOptional() + @IsString() + avatar?: string; + + @ApiProperty({ + description: 'The name of the assistant.', + example: 'Jan', + default: 'Jan', + }) + @IsString() + name: string; + + @ApiProperty({ + description: 'The description of the assistant.', + example: 'A default assistant that can use all downloaded models', + default: 'A default assistant that can use all downloaded models', + }) + @IsString() + description: string; + + @ApiProperty({ + description: 'The model of the assistant.', + }) + @IsString() + model: string; + + @ApiProperty({ + description: 'The instructions for the assistant.', + example: '', + default: '', + }) + @IsString() + instructions: string; + + @ApiProperty({ + description: 'The tools associated with the assistant.', + example: [], + default: [], + }) + @IsArray() + tools: any[]; + + @ApiProperty({ + description: 'The metadata of the assistant.', + }) + @IsOptional() + metadata: unknown | null; + + @ApiProperty({ + description: 'Top p.', + example: '0.7', + default: '0.7', + }) + @IsOptional() + @IsNumber() + top_p?: number; + + @ApiProperty({ + description: 'Temperature.', + example: '0.7', + default: '0.7', + }) + @IsOptional() + @IsNumber() + temperature?: number; +} diff --git a/platform/src/infrastructure/dtos/assistants/delete-assistant.dto.ts b/platform/src/infrastructure/dtos/assistants/delete-assistant.dto.ts new file mode 100644 index 000000000..a6010342f --- /dev/null +++ b/platform/src/infrastructure/dtos/assistants/delete-assistant.dto.ts @@ -0,0 +1,22 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class DeleteAssistantResponseDto { + @ApiProperty({ + example: 'assistant_123', + description: 'The identifier of the assistant that was deleted.', + }) + id: string; + + @ApiProperty({ + example: 'assistant', + description: "Type of the object, indicating it's a assistant.", + default: 'assistant', + }) + object: string; + + @ApiProperty({ + example: true, + description: 'Indicates whether the assistant was successfully deleted.', + }) + deleted: boolean; +} diff --git a/platform/src/infrastructure/dtos/assistants/model-setting.dto.ts b/platform/src/infrastructure/dtos/assistants/model-setting.dto.ts new file mode 100644 index 000000000..53c06d497 --- /dev/null +++ b/platform/src/infrastructure/dtos/assistants/model-setting.dto.ts @@ -0,0 +1,208 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsArray, IsOptional } from 'class-validator'; + +export class ModelSettingDto { + @ApiProperty({ + type: 'number', + minimum: 0, + maximum: 1, + required: false, + default: 1, + description: `What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.`, + }) + temperature: number; + + @ApiProperty({ + type: 'number', + minimum: 0, + maximum: 1, + required: false, + default: 1, + description: `An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.`, + }) + top_p: number; + + @ApiProperty({ + required: false, + example: '', + description: 'GGUF metadata: tokenizer.chat_template', + }) + prompt_template?: string; + + @ApiProperty({ + required: false, + example: [], + description: + 'Defines specific tokens or phrases at which the model will stop generating further output.', + default: [], + }) + @IsArray() + @IsOptional() + stop?: string[]; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + description: + 'Adjusts the likelihood of the model repeating words or phrases in its output.', + }) + frequency_penalty?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + description: + 'Influences the generation of new and varied concepts in the model’s output.', + }) + presence_penalty?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 4096, + default: 4096, + description: + 'The context length for model operations varies; the maximum depends on the specific model used.', + }) + ctx_len?: number; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'Enable real-time data processing for faster predictions.', + }) + stream?: boolean; + + @ApiProperty({ + required: false, + type: 'number', + example: 2048, + default: 2048, + description: + 'The maximum number of tokens the model will generate in a single response.', + }) + max_tokens?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 1, + default: 1, + description: 'The number of layers to load onto the GPU for acceleration.', + }) + ngl?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 1, + default: 1, + description: 'Number of parallel sequences to decode', + }) + n_parallel?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 1, + default: 1, + description: + 'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)', + }) + cpu_threads?: number; + + @ApiProperty({ + required: false, + type: 'string', + example: '', + default: '', + description: 'The prompt to use for internal configuration', + }) + pre_prompt?: string; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + default: 0, + description: 'The batch size for prompt eval step', + }) + n_batch?: number; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'To enable prompt caching or not', + }) + caching_enabled?: boolean; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + default: 0, + description: 'Group attention factor in self-extend', + }) + grp_attn_n?: number; + + @ApiProperty({ + required: false, + type: 'number', + example: 0, + default: 0, + description: 'Group attention width in self-extend', + }) + grp_attn_w?: number; + + @ApiProperty({ + required: false, + type: 'boolean', + example: false, + default: false, + description: 'Prevent system swapping of the model to disk in macOS', + }) + mlock?: boolean; + + @ApiProperty({ + required: false, + type: 'string', + example: '', + default: '', + description: + 'You can constrain the sampling using GBNF grammars by providing path to a grammar file', + }) + grammar_file?: string; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'To enable Flash Attention, default is true', + }) + flash_attn?: boolean; + + @ApiProperty({ + required: false, + type: 'string', + example: '', + default: '', + description: 'KV cache type: f16, q8_0, q4_0, default is f16', + }) + cache_type?: string; + + @ApiProperty({ + required: false, + type: 'boolean', + example: true, + default: true, + description: 'To enable mmap, default is true', + }) + use_mmap?: boolean; +} diff --git a/platform/src/infrastructure/dtos/chat/chat-completion-message.dto.ts b/platform/src/infrastructure/dtos/chat/chat-completion-message.dto.ts new file mode 100644 index 000000000..19fd17999 --- /dev/null +++ b/platform/src/infrastructure/dtos/chat/chat-completion-message.dto.ts @@ -0,0 +1,14 @@ +import { IsString } from 'class-validator'; +import { ApiProperty } from '@nestjs/swagger'; + +export class ChatCompletionMessage { + @ApiProperty({ description: 'The Content of the chat message.' }) + @IsString() + content: string; + + @ApiProperty({ + description: 'The role of the entity in the chat completion.', + example: 'user', + }) + role: 'user' | 'assistant'; +} diff --git a/platform/src/infrastructure/dtos/chat/chat-completion-response.dto.ts b/platform/src/infrastructure/dtos/chat/chat-completion-response.dto.ts new file mode 100644 index 000000000..ed1ba7a42 --- /dev/null +++ b/platform/src/infrastructure/dtos/chat/chat-completion-response.dto.ts @@ -0,0 +1,52 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { UsageDto } from './usage.dto'; +import { ChoiceDto } from './choice.dto'; + +export class ChatCompletionResponseDto { + @ApiProperty({ + description: 'A list of choices generated by the chat model.', + type: [ChoiceDto], + }) + choices: ChoiceDto[]; + + @ApiProperty({ + description: + 'The timestamp of when the chat completion was created, expressed as a Unix timestamp.', + type: Number, + }) + created: number; + + @ApiProperty({ + description: 'The unique identifier for the chat completion.', + type: String, + }) + id: string; + + @ApiProperty({ + description: + 'The identifier of the model used to generate the chat completion.', + type: String, + }) + model: string; + + @ApiProperty({ + description: + "The type of object, typically set to 'chat_completion' to denote the nature of the API response.", + type: String, + }) + object: string; + + @ApiProperty({ + description: + 'A unique fingerprint that identifies the system configuration used during the chat completion.', + type: String, + }) + system_fingerprint: string; + + @ApiProperty({ + description: + 'An object representing the usage statistics of the model for the current completion.', + type: UsageDto, + }) + usage: UsageDto; +} diff --git a/platform/src/infrastructure/dtos/chat/choice.dto.ts b/platform/src/infrastructure/dtos/chat/choice.dto.ts new file mode 100644 index 000000000..0594506f7 --- /dev/null +++ b/platform/src/infrastructure/dtos/chat/choice.dto.ts @@ -0,0 +1,25 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { MessageDto } from './message.dto'; + +export class ChoiceDto { + @ApiProperty({ + description: + 'The reason the chat completion ended, typically indicating whether the model completed the text naturally or was cut off.', + type: String, + }) + finish_reason: string; + + @ApiProperty({ + description: + 'The index of the completion relative to other generated completions, useful for identifying its order in a batch request.', + type: Number, + }) + index: number; + + @ApiProperty({ + description: + 'An object representing the message details involved in the chat completion, encapsulated within a MessageDto.', + type: MessageDto, + }) + message: MessageDto; +} diff --git a/platform/src/infrastructure/dtos/chat/create-chat-completion.dto.ts b/platform/src/infrastructure/dtos/chat/create-chat-completion.dto.ts new file mode 100644 index 000000000..07b2f429c --- /dev/null +++ b/platform/src/infrastructure/dtos/chat/create-chat-completion.dto.ts @@ -0,0 +1,90 @@ +import { + IsArray, + IsBoolean, + IsNumber, + IsOptional, + IsString, + ValidateNested, +} from 'class-validator'; +import { ChatCompletionMessage } from './chat-completion-message.dto'; +import { Type } from 'class-transformer'; +import { ApiProperty } from '@nestjs/swagger'; + +export class CreateChatCompletionDto { + @ApiProperty({ + description: + 'Array of chat messages to be used for generating the chat completion.', + }) + @IsArray() + @ValidateNested({ each: true }) + @Type(() => ChatCompletionMessage) + messages: ChatCompletionMessage[]; + + @ApiProperty({ + description: 'The unique identifier of the model.', + example: 'mistral', + }) + @IsString() + model: string; + + @ApiProperty({ + description: + 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', + example: true, + }) + @IsOptional() + @IsBoolean() + stream?: boolean; + + @ApiProperty({ + description: + 'Sets the upper limit on the number of tokens the model can generate in a single output.', + example: 4096, + }) + @IsOptional() + @IsNumber() + max_tokens?: number; + + @ApiProperty({ + description: + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + example: ['End'], + }) + @IsOptional() + @IsArray() + stop?: string[]; + + @ApiProperty({ + description: + 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', + example: 0.2, + }) + @IsOptional() + @IsNumber() + frequency_penalty?: number; + + @ApiProperty({ + description: + 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', + example: 0.6, + }) + @IsOptional() + @IsNumber() + presence_penalty?: number; + + @ApiProperty({ + description: "Influences the randomness of the model's output.", + example: 0.8, + }) + @IsOptional() + @IsNumber() + temperature?: number; + + @ApiProperty({ + description: 'Sets probability threshold for more relevant outputs.', + example: 0.95, + }) + @IsOptional() + @IsNumber() + top_p?: number; +} diff --git a/platform/src/infrastructure/dtos/chat/embeddings-response.dto.ts b/platform/src/infrastructure/dtos/chat/embeddings-response.dto.ts new file mode 100644 index 000000000..dac89dcfc --- /dev/null +++ b/platform/src/infrastructure/dtos/chat/embeddings-response.dto.ts @@ -0,0 +1,30 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { UsageDto } from './usage.dto'; + +export class EmbeddingsResponseDto { + @ApiProperty({ + description: 'Type of the result object.', + type: String, + }) + object: string; + + @ApiProperty({ + description: 'Identifier of the model utilized for generating embeddings.', + type: String, + }) + model: string; + + @ApiProperty({ + description: + 'The embedding vector represented as an array of floating-point numbers. ', + type: [Number], + }) + embedding: [number]; + + @ApiProperty({ + description: + 'Details of token usage, including prompt_tokens and total_tokens.', + type: UsageDto, + }) + usage: UsageDto; +} diff --git a/platform/src/infrastructure/dtos/chat/message.dto.ts b/platform/src/infrastructure/dtos/chat/message.dto.ts new file mode 100644 index 000000000..d4450165d --- /dev/null +++ b/platform/src/infrastructure/dtos/chat/message.dto.ts @@ -0,0 +1,17 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class MessageDto { + @ApiProperty({ + description: + 'The textual content of the chat message or completion generated by the model.', + type: String, + }) + content: string; + + @ApiProperty({ + description: + "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message.", + type: String, + }) + role: string; +} diff --git a/platform/src/infrastructure/dtos/chat/usage.dto.ts b/platform/src/infrastructure/dtos/chat/usage.dto.ts new file mode 100644 index 000000000..95b71ceea --- /dev/null +++ b/platform/src/infrastructure/dtos/chat/usage.dto.ts @@ -0,0 +1,24 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class UsageDto { + @ApiProperty({ + description: + 'The number of tokens used in the completion part of the response generated by the model.', + type: Number, + }) + completion_tokens: number; + + @ApiProperty({ + description: + 'The number of tokens used in the prompt part of the chat input, which is provided to the model.', + type: Number, + }) + prompt_tokens: number; + + @ApiProperty({ + description: + 'The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation.', + type: Number, + }) + total_tokens: number; +} diff --git a/platform/src/infrastructure/dtos/common/common-response.dto.ts b/platform/src/infrastructure/dtos/common/common-response.dto.ts new file mode 100644 index 000000000..dd60f4892 --- /dev/null +++ b/platform/src/infrastructure/dtos/common/common-response.dto.ts @@ -0,0 +1,10 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsString } from 'class-validator'; + +export class CommonResponseDto { + @ApiProperty({ + description: 'The response success or error message.', + }) + @IsString() + message: string; +} diff --git a/platform/src/infrastructure/dtos/configs/config-update.dto.ts b/platform/src/infrastructure/dtos/configs/config-update.dto.ts new file mode 100644 index 000000000..8e31cbcb1 --- /dev/null +++ b/platform/src/infrastructure/dtos/configs/config-update.dto.ts @@ -0,0 +1,22 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsOptional, IsString } from 'class-validator'; + +export class ConfigUpdateDto { + @ApiProperty({ + example: 'apiKey', + description: 'The configuration API key.', + }) + @IsString() + @IsOptional() + config: string; + + // Prompt Settings + @ApiProperty({ + type: String, + example: 'sk-xxxxxx', + description: 'The value of the configuration API key.', + }) + @IsString() + @IsOptional() + value: string; +} diff --git a/platform/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts b/platform/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts new file mode 100644 index 000000000..88d58283d --- /dev/null +++ b/platform/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts @@ -0,0 +1,13 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class CortexOperationSuccessfullyDto { + @ApiProperty({ + description: 'The cortex operation status message.', + }) + message: string; + + @ApiProperty({ + description: 'The cortex operation status code.', + }) + status: string; +} diff --git a/platform/src/infrastructure/dtos/cortex/start-cortex.dto.ts b/platform/src/infrastructure/dtos/cortex/start-cortex.dto.ts new file mode 100644 index 000000000..49e143521 --- /dev/null +++ b/platform/src/infrastructure/dtos/cortex/start-cortex.dto.ts @@ -0,0 +1,27 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsIP, IsNumber, IsString, Max, Min } from 'class-validator'; +import { + defaultCortexCppHost, + defaultCortexCppPort, +} from '@/infrastructure/constants/cortex'; + +export class StartCortexDto { + @ApiProperty({ + name: 'host', + description: 'The Cortexcpp host.', + default: defaultCortexCppHost, + }) + @IsString() + @IsIP() + host: string; + + @ApiProperty({ + name: 'port', + description: 'The Cortexcpp port.', + default: defaultCortexCppPort, + }) + @IsNumber() + @Min(0) + @Max(65535) + port: number; +} diff --git a/platform/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts b/platform/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts new file mode 100644 index 000000000..f6517af78 --- /dev/null +++ b/platform/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts @@ -0,0 +1,37 @@ +import { Optional } from '@nestjs/common'; +import { ApiProperty } from '@nestjs/swagger'; + +export class CreateEmbeddingsDto { + @ApiProperty({ + example: 'mistral', + description: 'The name of the embedding model to be used.', + type: String, + }) + model: string; + + @ApiProperty({ + example: ['Hello World'], + description: + 'The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.', + type: [String], + }) + input: string | string[]; + + @ApiProperty({ + example: 'float', + description: + 'Specifies the format for the embeddings. Supported formats include `float` and `int`. This field is optional.', + type: String, + }) + @Optional() + encoding_format?: string; + + @ApiProperty({ + example: 3, + description: + 'Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional.', + type: Number, + }) + @Optional() + dimensions?: number; +} diff --git a/platform/src/infrastructure/dtos/engines/engines.dto.ts b/platform/src/infrastructure/dtos/engines/engines.dto.ts new file mode 100644 index 000000000..5180f7335 --- /dev/null +++ b/platform/src/infrastructure/dtos/engines/engines.dto.ts @@ -0,0 +1,106 @@ +import { Extension } from '@/domain/abstracts/extension.abstract'; +import { ApiProperty } from '@nestjs/swagger'; +import { IsBoolean, IsOptional, IsString } from 'class-validator'; + +export class EngineDto implements Partial { + @ApiProperty({ + type: String, + example: 'cortex.llamacpp', + description: 'The name of the engine that you want to retrieve.', + }) + @IsString() + name: string; + + // Prompt Settings + @ApiProperty({ + type: String, + example: 'Cortex', + description: 'The display name of the engine.', + }) + @IsString() + @IsOptional() + productName?: string; + + @ApiProperty({ + type: String, + example: 'Cortex engine', + description: 'The description of the engine.', + }) + @IsString() + @IsOptional() + description?: string; + + @ApiProperty({ + type: String, + example: '0.0.1', + description: 'The version of the engine.', + }) + @IsString() + @IsOptional() + version?: string; + + @ApiProperty({ + type: String, + example: true, + description: 'The status of the engine.', + }) + @IsBoolean() + status?: string; +} +export class InitEngineDto { + @ApiProperty({ + type: String, + example: 'CPU', + description: 'The mode that you want to run the engine.', + }) + @IsString() + runMode?: 'CPU' | 'GPU'; + + @ApiProperty({ + type: String, + example: 'Nvidia', + description: 'The type of GPU that you want to use.', + }) + @IsString() + gpuType?: 'Nvidia' | 'Others (Vulkan)'; + + @ApiProperty({ + type: String, + example: 'AVX', + description: 'The instructions that you want to use.', + }) + @IsString() + instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; + + @ApiProperty({ + type: String, + example: '11', + description: 'The version of CUDA that you want to use.', + }) + @IsString() + cudaVersion?: '11' | '12'; + + @ApiProperty({ + type: Boolean, + example: true, + description: 'Silent mode.', + }) + @IsBoolean() + silent?: boolean; + + @ApiProperty({ + type: Boolean, + example: true, + description: 'Install Vulkan engine.', + }) + @IsBoolean() + vulkan?: boolean; + + @ApiProperty({ + type: String, + example: true, + description: 'Engine version.', + }) + @IsBoolean() + version?: string; +} diff --git a/platform/src/infrastructure/dtos/messages/create-message.dto.ts b/platform/src/infrastructure/dtos/messages/create-message.dto.ts new file mode 100644 index 000000000..a99ad86b6 --- /dev/null +++ b/platform/src/infrastructure/dtos/messages/create-message.dto.ts @@ -0,0 +1,58 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsArray, IsString } from 'class-validator'; +import { Message, MessageContent } from '@/domain/models/message.interface'; + +export class CreateMessageDto implements Partial { + @ApiProperty({ + example: 'thread_456', + description: 'The ID of the thread to which the message will be posted.', + }) + @IsString() + thread_id: string; + + @ApiProperty({ + example: 'assistant_789', + description: "The assistant's unique identifier.", + }) + @IsString() + assistant_id?: string; + + @ApiProperty({ + example: 'user', + description: 'The sources of the messages.', + }) + @IsString() + role: 'user' | 'assistant'; + + @ApiProperty({ + example: [ + { + type: 'text', + data: 'Hello, how can I help you today?', + }, + ], + description: 'The content of the messages.', + }) + @IsArray() + content: MessageContent[]; + + @ApiProperty({ + example: 'in_progress', + description: 'Current status of the message.', + }) + status: 'in_progress' | 'incomplete' | 'completed'; + + @ApiProperty({ + example: { urgency: 'high', tags: ['customer_support'] }, + description: + 'Optional dictionary for additional unstructured message information.', + }) + metadata?: Record; + + @ApiProperty({ + example: 'text', + description: 'Type of the message.', + }) + @IsString() + type?: string; +} diff --git a/platform/src/infrastructure/dtos/messages/delete-message.dto.ts b/platform/src/infrastructure/dtos/messages/delete-message.dto.ts new file mode 100644 index 000000000..4aa9b90b0 --- /dev/null +++ b/platform/src/infrastructure/dtos/messages/delete-message.dto.ts @@ -0,0 +1,22 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class DeleteMessageResponseDto { + @ApiProperty({ + example: 'message_123', + description: 'The identifier of the message that was deleted.', + }) + id: string; + + @ApiProperty({ + example: 'message', + description: "Type of the object, indicating it's a message.", + default: 'message', + }) + object: string; + + @ApiProperty({ + example: true, + description: 'Indicates whether the message was successfully deleted.', + }) + deleted: boolean; +} diff --git a/platform/src/infrastructure/dtos/messages/get-message.dto.ts b/platform/src/infrastructure/dtos/messages/get-message.dto.ts new file mode 100644 index 000000000..a96193435 --- /dev/null +++ b/platform/src/infrastructure/dtos/messages/get-message.dto.ts @@ -0,0 +1,91 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class ContentDto { + @ApiProperty({ + example: 'text', + description: 'Type of content, e.g., "text".', + }) + type: string; + + @ApiProperty({ + type: 'object', + example: { + value: 'How does AI work? Explain it in simple terms.', + annotations: [], + }, + description: 'Text content of the message along with any annotations.', + }) + text: { + value: string; + annotations: string[]; + }; +} + +export class GetMessageResponseDto { + @ApiProperty({ + example: 'msg_abc123', + description: 'The identifier of the message.', + }) + id: string; + + @ApiProperty({ + example: 'thread.message', + description: "Type of the object, indicating it's a thread message.", + default: 'thread.message', + }) + object: string; + + @ApiProperty({ + example: 1699017614, + description: + 'Unix timestamp representing the creation time of the message.', + type: 'integer', + }) + created_at: number; + + @ApiProperty({ + example: 'thread_abc123', + description: 'Identifier of the thread to which this message belongs.', + }) + thread_id: string; + + @ApiProperty({ + example: 'user', + description: "Role of the sender, either 'user' or 'assistant'.", + }) + role: string; + + @ApiProperty({ + type: [ContentDto], + description: 'Array of content objects detailing the message content.', + }) + content: ContentDto[]; + + @ApiProperty({ + example: [], + description: 'Array of file IDs associated with the message, if any.', + type: [String], + }) + file_ids: string[]; + + @ApiProperty({ + nullable: true, + example: null, + description: + 'Identifier of the assistant involved in the message, if applicable.', + }) + assistant_id: string | null; + + @ApiProperty({ + nullable: true, + example: null, + description: 'Run ID associated with the message, if applicable.', + }) + run_id: string | null; + + @ApiProperty({ + example: {}, + description: 'Metadata associated with the message.', + }) + metadata: object; +} diff --git a/platform/src/infrastructure/dtos/messages/list-message.dto.ts b/platform/src/infrastructure/dtos/messages/list-message.dto.ts new file mode 100644 index 000000000..d7d2de66d --- /dev/null +++ b/platform/src/infrastructure/dtos/messages/list-message.dto.ts @@ -0,0 +1,95 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class ListMessageObjectDto { + @ApiProperty({ + example: 'msg_abc123', + description: 'The identifier of the message.', + }) + id: string; + + @ApiProperty({ + example: 'thread.message', + description: "Type of the object, indicating it's a thread message.", + }) + object: string; + + @ApiProperty({ + example: 1699017614, + description: + 'Unix timestamp representing the creation time of the message.', + type: 'integer', + }) + created_at: number; + + @ApiProperty({ + example: 'thread_abc123', + description: 'Identifier of the thread to which this message belongs.', + }) + thread_id: string; + + @ApiProperty({ + example: 'user', + description: "Role of the sender, either 'user' or 'assistant'.", + }) + role: string; + + @ApiProperty({ + description: 'Array of file IDs associated with the message, if any.', + type: [String], + example: [], + }) + file_ids: string[]; + + @ApiProperty({ + nullable: true, + description: + 'Identifier of the assistant involved in the message, if applicable.', + example: null, + }) + assistant_id: string | null; + + @ApiProperty({ + nullable: true, + description: 'Run ID associated with the message, if applicable.', + example: null, + }) + run_id: string | null; + + @ApiProperty({ + example: {}, + description: 'Metadata associated with the message.', + }) + metadata: object; +} + +export class ListMessagesResponseDto { + @ApiProperty({ + example: 'list', + description: "Type of the object, indicating it's a list.", + }) + object: string; + + @ApiProperty({ + type: [ListMessageObjectDto], + description: 'Array of message objects.', + }) + data: ListMessageObjectDto[]; + + @ApiProperty({ + example: 'msg_abc123', + description: 'Identifier of the first message in the list.', + }) + first_id: string; + + @ApiProperty({ + example: 'msg_abc456', + description: 'Identifier of the last message in the list.', + }) + last_id: string; + + @ApiProperty({ + example: false, + description: 'Indicates whether there are more messages to retrieve.', + }) + has_more: boolean; +} diff --git a/platform/src/infrastructure/dtos/messages/update-message.dto.ts b/platform/src/infrastructure/dtos/messages/update-message.dto.ts new file mode 100644 index 000000000..a8b82f010 --- /dev/null +++ b/platform/src/infrastructure/dtos/messages/update-message.dto.ts @@ -0,0 +1,4 @@ +import { PartialType } from '@nestjs/mapped-types'; +import { CreateMessageDto } from './create-message.dto'; + +export class UpdateMessageDto extends PartialType(CreateMessageDto) {} diff --git a/platform/src/infrastructure/dtos/models/create-model.dto.ts b/platform/src/infrastructure/dtos/models/create-model.dto.ts new file mode 100644 index 000000000..19cca03c1 --- /dev/null +++ b/platform/src/infrastructure/dtos/models/create-model.dto.ts @@ -0,0 +1,158 @@ +import { + IsArray, + IsBoolean, + IsNumber, + IsOptional, + IsString, + Min, +} from 'class-validator'; +import { Model } from '@/domain/models/model.interface'; +import { ModelArtifactDto } from './model-artifact.dto'; +import { ApiProperty, getSchemaPath } from '@nestjs/swagger'; + +export class CreateModelDto implements Partial { + // Cortex Meta + @ApiProperty({ + description: 'The unique identifier of the model.', + example: 'mistral', + }) + @IsString() + model: string; + + @ApiProperty({ description: 'The name of the model.', example: 'mistral' }) + @IsString() + name?: string; + + @ApiProperty({ + description: 'The URL sources from which the model downloaded or accessed.', + example: ['https://huggingface.co/cortexso/mistral/tree/gguf'], + oneOf: [ + { type: 'array', items: { type: 'string' } }, + { $ref: getSchemaPath(ModelArtifactDto) }, + ], + }) + @IsArray() + files: string[] | ModelArtifactDto; + + // Model Input / Output Syntax + @ApiProperty({ + description: + "A predefined text or framework that guides the AI model's response generation.", + example: ` + You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.`, + }) + @IsOptional() + @IsString() + prompt_template?: string; + + @ApiProperty({ + description: + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + example: ['End'], + }) + @IsOptional() + @IsArray() + stop?: string[]; + + // Results Preferences + @ApiProperty({ + description: + 'Sets the upper limit on the number of tokens the model can generate in a single output.', + example: 4096, + }) + @IsOptional() + @IsNumber() + max_tokens?: number; + + @ApiProperty({ + description: 'Sets probability threshold for more relevant outputs.', + example: 0.9, + }) + @IsOptional() + @IsNumber() + top_p?: number; + + @ApiProperty({ + description: "Influences the randomness of the model's output.", + example: 0.7, + }) + @IsOptional() + @IsNumber() + temperature?: number; + + @ApiProperty({ + description: + 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', + example: 0.5, + }) + @IsOptional() + @IsNumber() + frequency_penalty?: number; + + @ApiProperty({ + description: + 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', + example: 0.6, + }) + @IsOptional() + @IsNumber() + presence_penalty?: number; + + @ApiProperty({ + description: + 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', + example: true, + }) + @IsOptional() + @IsBoolean() + stream?: boolean; + + // Engine Settings + @ApiProperty({ + description: + 'Sets the maximum input the model can use to generate a response, it varies with the model used.', + example: 4096, + }) + @IsOptional() + @IsNumber() + ctx_len?: number; + + @ApiProperty({ description: 'Determines GPU layer usage.', example: 32 }) + @IsOptional() + @IsNumber() + ngl?: number; + + @ApiProperty({ + description: 'Number of parallel processing units to use.', + example: 1, + }) + @IsOptional() + @IsNumber() + @Min(1) + n_parallel?: number; + + @ApiProperty({ + description: + 'Determines CPU inference threads, limited by hardware and OS. ', + example: 10, + }) + @IsOptional() + @IsNumber() + @Min(1) + cpu_threads?: number; + + @ApiProperty({ + description: 'The engine used to run the model.', + example: 'cortex.llamacpp', + }) + @IsOptional() + @IsString() + engine?: string; + + @ApiProperty({ + description: 'The owner of the model.', + example: '', + default: '', + }) + owned_by?: string; +} diff --git a/platform/src/infrastructure/dtos/models/delete-model.dto.ts b/platform/src/infrastructure/dtos/models/delete-model.dto.ts new file mode 100644 index 000000000..4043ed411 --- /dev/null +++ b/platform/src/infrastructure/dtos/models/delete-model.dto.ts @@ -0,0 +1,22 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class DeleteModelResponseDto { + @ApiProperty({ + example: 'mistral-ins-7b-q4', + description: 'The identifier of the model that was deleted.', + }) + id: string; + + @ApiProperty({ + example: 'model', + description: "Type of the object, indicating it's a model.", + default: 'model', + }) + object: string; + + @ApiProperty({ + example: true, + description: 'Indicates whether the model was successfully deleted.', + }) + deleted: boolean; +} diff --git a/platform/src/infrastructure/dtos/models/download-model.dto.ts b/platform/src/infrastructure/dtos/models/download-model.dto.ts new file mode 100644 index 000000000..e4f1692c2 --- /dev/null +++ b/platform/src/infrastructure/dtos/models/download-model.dto.ts @@ -0,0 +1,9 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class DownloadModelResponseDto { + @ApiProperty({ + example: 'Starting download mistral-ins-7b-q4', + description: 'Message indicates Jan starting download corresponding model.', + }) + message: string; +} diff --git a/platform/src/infrastructure/dtos/models/list-model-response.dto.ts b/platform/src/infrastructure/dtos/models/list-model-response.dto.ts new file mode 100644 index 000000000..ed4c1e98d --- /dev/null +++ b/platform/src/infrastructure/dtos/models/list-model-response.dto.ts @@ -0,0 +1,10 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { ModelDto } from './model.dto'; // Import the ModelDto class + +export class ListModelsResponseDto { + @ApiProperty({ example: 'list', enum: ['list'] }) + object: string; + + @ApiProperty({ type: [ModelDto], description: 'List of models' }) + data: ModelDto[]; +} diff --git a/platform/src/infrastructure/dtos/models/model-artifact.dto.ts b/platform/src/infrastructure/dtos/models/model-artifact.dto.ts new file mode 100644 index 000000000..b450966ba --- /dev/null +++ b/platform/src/infrastructure/dtos/models/model-artifact.dto.ts @@ -0,0 +1,17 @@ +import { IsString } from 'class-validator'; +import { ModelArtifact } from '@/domain/models/model.interface'; +import { ApiProperty } from '@nestjs/swagger'; + +export class ModelArtifactDto implements ModelArtifact { + @ApiProperty({ description: 'The mmproj bin file url.' }) + @IsString() + mmproj?: string; + + @ApiProperty({ description: 'The llama model bin file url (legacy).' }) + @IsString() + llama_model_path?: string; + + @ApiProperty({ description: 'The gguf model bin file url.' }) + @IsString() + model_path?: string; +} diff --git a/platform/src/infrastructure/dtos/models/model-settings.dto.ts b/platform/src/infrastructure/dtos/models/model-settings.dto.ts new file mode 100644 index 000000000..90431b03b --- /dev/null +++ b/platform/src/infrastructure/dtos/models/model-settings.dto.ts @@ -0,0 +1,142 @@ +import { ModelSettingParams } from '@/domain/models/model.interface'; +import { ApiProperty } from '@nestjs/swagger'; +import { + IsArray, + IsBoolean, + IsNumber, + IsOptional, + IsString, + Min, +} from 'class-validator'; + +export class ModelSettingsDto implements ModelSettingParams { + // Prompt Settings + @ApiProperty({ + example: 'system\n{system_message}\nuser\n{prompt}\nassistant', + description: + "A predefined text or framework that guides the AI model's response generation.", + }) + @IsOptional() + prompt_template?: string; + + @ApiProperty({ + type: [String], + example: [], + description: + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + }) + @IsArray() + @IsOptional() + stop?: string[]; + + // Engine Settings + @ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 }) + @IsOptional() + @IsNumber() + ngl?: number; + + @ApiProperty({ + description: + 'The context length for model operations varies; the maximum depends on the specific model used.', + example: 4096, + }) + @IsOptional() + @IsNumber() + ctx_len?: number; + + @ApiProperty({ + description: + 'Determines CPU inference threads, limited by hardware and OS. ', + example: 10, + }) + @IsOptional() + @IsNumber() + @Min(1) + cpu_threads?: number; + + @ApiProperty({ + description: 'The prompt to use for internal configuration', + }) + @IsOptional() + @IsString() + pre_prompt?: string; + + @ApiProperty({ + description: 'The batch size for prompt eval step', + example: 2048, + }) + @IsOptional() + @IsNumber() + n_batch?: number; + + @ApiProperty({ + description: 'To enable prompt caching or not', + example: true, + }) + @IsOptional() + @IsBoolean() + caching_enabled?: boolean; + + @ApiProperty({ + description: 'Group attention factor in self-extend', + example: 1, + }) + @IsOptional() + @IsNumber() + grp_attn_n?: number; + + @ApiProperty({ + description: 'Group attention width in self-extend', + example: 512, + }) + @IsOptional() + @IsNumber() + grp_attn_w?: number; + + @ApiProperty({ + description: 'Prevent system swapping of the model to disk in macOS', + example: false, + }) + @IsOptional() + @IsBoolean() + mlock?: boolean; + + @ApiProperty({ + description: + 'You can constrain the sampling using GBNF grammars by providing path to a grammar file', + }) + @IsOptional() + @IsString() + grammar_file?: string; + + @ApiProperty({ + description: 'To enable Flash Attention, default is true', + example: true, + }) + @IsOptional() + @IsBoolean() + flash_attn?: boolean; + + @ApiProperty({ + description: 'KV cache type: f16, q8_0, q4_0, default is f16', + example: 'f16', + }) + @IsOptional() + @IsString() + cache_type?: string; + + @ApiProperty({ + description: 'To enable mmap, default is true', + example: true, + }) + @IsOptional() + @IsBoolean() + use_mmap?: boolean; + + @ApiProperty({ + example: 'cortex.llamacpp', + description: 'The engine to use.', + }) + @IsOptional() + engine?: string; +} diff --git a/platform/src/infrastructure/dtos/models/model.dto.ts b/platform/src/infrastructure/dtos/models/model.dto.ts new file mode 100644 index 000000000..4054a8b14 --- /dev/null +++ b/platform/src/infrastructure/dtos/models/model.dto.ts @@ -0,0 +1,204 @@ +import { Model } from '@/domain/models/model.interface'; +import { ApiProperty } from '@nestjs/swagger'; +import { + IsArray, + IsBoolean, + IsNumber, + IsOptional, + IsString, +} from 'class-validator'; + +export class ModelDto implements Partial { + @ApiProperty({ + example: 'mistral', + description: + 'The model identifier, which can be referenced in the API endpoints.', + }) + @IsOptional() + id: string; + + // Prompt Settings + @ApiProperty({ + example: `You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.`, + description: + "A predefined text or framework that guides the AI model's response generation.", + }) + @IsOptional() + prompt_template?: string; + + @ApiProperty({ + type: [String], + example: ['End'], + description: + 'Defines specific tokens or phrases that signal the model to stop producing further output.', + }) + @IsArray() + @IsOptional() + stop?: string[]; + + // Results Preferences + + @ApiProperty({ + example: 4096, + description: + 'Sets the upper limit on the number of tokens the model can generate in a single output.', + }) + @IsOptional() + @IsNumber() + max_tokens?: number; + + @ApiProperty({ + example: 0.7, + description: "Influences the randomness of the model's output.", + }) + @IsOptional() + @IsNumber() + temperature?: number; + + @ApiProperty({ + example: 0.95, + description: 'Sets probability threshold for more relevant outputs', + }) + @IsOptional() + @IsNumber() + top_p?: number; + + @ApiProperty({ + example: true, + description: + 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', + }) + @IsOptional() + @IsBoolean() + stream?: boolean; + + @ApiProperty({ + example: 0, + description: + 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', + }) + @IsOptional() + @IsNumber() + frequency_penalty?: number; + + @ApiProperty({ + example: 0, + description: + 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', + }) + @IsOptional() + @IsNumber() + presence_penalty?: number; + + // Engine Settings + @ApiProperty({ description: 'Determines GPU layer usage.', example: 32 }) + @IsOptional() + @IsNumber() + ngl?: number; + + @ApiProperty({ + description: + 'The context length for model operations varies; the maximum depends on the specific model used.', + example: 4096, + }) + @IsOptional() + @IsNumber() + ctx_len?: number; + + @ApiProperty({ + description: + 'Determines CPU inference threads, limited by hardware and OS.', + example: 10, + }) + @IsOptional() + @IsNumber() + cpu_threads?: number; + + @ApiProperty({ + description: 'The prompt to use for internal configuration', + example: + 'You are an assistant with expert knowledge in {subject}. Please provide a detailed and accurate response to the following query: {query}. Ensure that your response is clear, concise, and informative.', + }) + @IsOptional() + @IsString() + pre_prompt?: string; + + @ApiProperty({ + description: 'The batch size for prompt eval step', + example: 512, + }) + @IsOptional() + @IsNumber() + n_batch?: number; + + @ApiProperty({ + description: 'To enable prompt caching or not', + example: true, + }) + @IsOptional() + @IsBoolean() + caching_enabled?: boolean; + + @ApiProperty({ + description: 'Group attention factor in self-extend', + example: 1, + }) + @IsOptional() + @IsNumber() + grp_attn_n?: number; + + @ApiProperty({ + description: 'Group attention width in self-extend', + example: 512, + }) + @IsOptional() + @IsNumber() + grp_attn_w?: number; + + @ApiProperty({ + description: 'Prevent system swapping of the model to disk in macOS', + example: false, + }) + @IsOptional() + @IsBoolean() + mlock?: boolean; + + @ApiProperty({ + description: + 'You can constrain the sampling using GBNF grammars by providing path to a grammar file', + }) + @IsOptional() + @IsString() + grammar_file?: string; + + @ApiProperty({ + description: 'To enable Flash Attention, default is true', + example: true, + }) + @IsOptional() + @IsBoolean() + flash_attn?: boolean; + + @ApiProperty({ + description: 'KV cache type: f16, q8_0, q4_0, default is f16', + example: 'f16', + }) + @IsOptional() + @IsString() + cache_type?: string; + + @ApiProperty({ + description: 'To enable mmap, default is true', + example: true, + }) + @IsOptional() + @IsBoolean() + use_mmap?: boolean; + + @ApiProperty({ + description: 'The engine to use.', + example: 'cortex.llamacpp', + }) + @IsOptional() + engine?: string; +} diff --git a/platform/src/infrastructure/dtos/models/start-model-success.dto.ts b/platform/src/infrastructure/dtos/models/start-model-success.dto.ts new file mode 100644 index 000000000..203d9e6f5 --- /dev/null +++ b/platform/src/infrastructure/dtos/models/start-model-success.dto.ts @@ -0,0 +1,15 @@ +import { IsString } from 'class-validator'; +import { ApiProperty } from '@nestjs/swagger'; + +export class StartModelSuccessDto { + @ApiProperty({ + description: + 'The success or error message displayed when a model is successfully loaded or fails to load.', + }) + @IsString() + message: string; + + @ApiProperty({ description: 'The unique identifier of the model.' }) + @IsString() + modelId: string; +} diff --git a/platform/src/infrastructure/dtos/models/update-model.dto.ts b/platform/src/infrastructure/dtos/models/update-model.dto.ts new file mode 100644 index 000000000..8db8180b2 --- /dev/null +++ b/platform/src/infrastructure/dtos/models/update-model.dto.ts @@ -0,0 +1,4 @@ +import { PartialType } from '@nestjs/mapped-types'; +import { CreateModelDto } from './create-model.dto'; + +export class UpdateModelDto extends PartialType(CreateModelDto) {} diff --git a/platform/src/infrastructure/dtos/page.dto.ts b/platform/src/infrastructure/dtos/page.dto.ts new file mode 100644 index 000000000..f81f3878e --- /dev/null +++ b/platform/src/infrastructure/dtos/page.dto.ts @@ -0,0 +1,28 @@ +import { ApiProperty } from '@nestjs/swagger'; +import { IsArray } from 'class-validator'; + +export class PageDto { + @ApiProperty() + readonly object: string; + + @IsArray() + @ApiProperty({ isArray: true }) + readonly data: T[]; + + @ApiProperty() + readonly first_id: string | undefined; + + @ApiProperty() + readonly last_id: string | undefined; + + @ApiProperty() + readonly has_more: boolean; + + constructor(data: T[], hasMore: boolean, firstId?: string, lastId?: string) { + this.object = 'list'; + this.data = data; + this.first_id = firstId; + this.last_id = lastId; + this.has_more = hasMore; + } +} diff --git a/platform/src/infrastructure/dtos/threads/create-message.dto.ts b/platform/src/infrastructure/dtos/threads/create-message.dto.ts new file mode 100644 index 000000000..cd18e4534 --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/create-message.dto.ts @@ -0,0 +1,17 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class CreateMessageDto { + @ApiProperty({ + example: 'user', + description: `The role of the entity that is creating the message. Allowed values include: + - user: Indicates the message is sent by an actual user and should be used in most cases to represent user-generated messages. + - assistant: Indicates the message is generated by the assistant. Use this value to insert messages from the assistant into the conversation.`, + }) + role: 'user' | 'assistant'; + + @ApiProperty({ + example: 'Tell me a joke', + description: 'The text contents of the message.', + }) + content: string; +} diff --git a/platform/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts b/platform/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts new file mode 100644 index 000000000..fbca1138c --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts @@ -0,0 +1,134 @@ +import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; +import { ApiProperty } from '@nestjs/swagger'; +import type { + Assistant, + AssistantResponseFormatOption, + AssistantToolResources, +} from '@/domain/models/assistant.interface'; + +export class CreateThreadAssistantDto implements Assistant { + @ApiProperty({ + example: 'thread_123', + description: 'The unique identifier of the assistant.', + type: 'string', + }) + @IsString() + id: string; + + @ApiProperty({ + example: 'https://example.com/avatar.png', + description: "URL of the assistant's avatar image.", + type: 'string', + }) + @IsOptional() + @IsString() + avatar?: string; + + @ApiProperty({ + example: 'Virtual Helper', + description: 'The name of the assistant.', + type: 'string', + }) + @IsString() + name: string; + + @ApiProperty({ + example: 'mistral', + description: "The model's unique identifier and settings.", + type: 'string', + }) + @IsString() + model: string; + + @ApiProperty({ + example: + 'Assist with customer queries and provide information based on the company database.', + description: "The assistant's specific instructions.", + type: 'string', + }) + @IsString() + instructions: string; + + @ApiProperty({ + example: [ + { + name: 'Knowledge Retrieval', + settings: { + source: 'internal', + endpoint: 'https://api.example.com/knowledge', + }, + }, + ], + description: "The thread's tool(Knowledge Retrieval) configurations.", + type: 'array', + }) + @IsOptional() + @IsArray() + tools: any; + + @ApiProperty({ + example: + 'This assistant helps with customer support by retrieving relevant information.', + description: 'The description of the assistant.', + type: 'string', + }) + @IsString() + @IsOptional() + description: string | null; + + @ApiProperty({ + example: { department: 'support', version: '1.0' }, + description: 'Additional metadata for the assistant.', + type: 'object', + }) + @IsOptional() + metadata: Record | null; + + @ApiProperty({ + example: 'assistant', + description: 'The object type, always "assistant".', + type: 'string', + }) + object: 'assistant'; + + @ApiProperty({ + example: 0.7, + description: 'Sampling temperature for the assistant.', + type: 'number', + }) + @IsNumber() + @IsOptional() + temperature?: number | null; + + @ApiProperty({ + example: 0.9, + description: 'Top-p sampling value for the assistant.', + type: 'number', + }) + @IsNumber() + @IsOptional() + top_p?: number | null; + + @ApiProperty({ + example: 1622470423, + description: 'Timestamp of when the assistant was created.', + type: 'number', + }) + created_at: number; + + @ApiProperty({ + example: { format: 'json' }, + description: 'The response format option for the assistant.', + type: 'object', + }) + @IsOptional() + response_format?: AssistantResponseFormatOption; + + @ApiProperty({ + example: { resources: ['database1', 'database2'] }, + description: 'Tool resources for the assistant.', + type: 'object', + }) + @IsOptional() + tool_resources?: AssistantToolResources; +} diff --git a/platform/src/infrastructure/dtos/threads/create-thread.dto.ts b/platform/src/infrastructure/dtos/threads/create-thread.dto.ts new file mode 100644 index 000000000..7ae7f3694 --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/create-thread.dto.ts @@ -0,0 +1,10 @@ +import { IsArray } from 'class-validator'; +import { Thread } from '@/domain/models/thread.interface'; +import { CreateThreadAssistantDto } from './create-thread-assistant.dto'; +import { ApiProperty } from '@nestjs/swagger'; + +export class CreateThreadDto implements Partial { + @ApiProperty({ description: "The details of the thread's settings." }) + @IsArray() + assistants: CreateThreadAssistantDto[]; +} diff --git a/platform/src/infrastructure/dtos/threads/delete-message.dto.ts b/platform/src/infrastructure/dtos/threads/delete-message.dto.ts new file mode 100644 index 000000000..4277143c3 --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/delete-message.dto.ts @@ -0,0 +1,22 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export default class DeleteMessageDto { + @ApiProperty({ + example: 'message_123', + description: 'The identifier of the message that was deleted.', + }) + id: string; + + @ApiProperty({ + example: 'message', + description: "Type of the object, indicating it's a message.", + default: 'message', + }) + object: string; + + @ApiProperty({ + example: true, + description: 'Indicates whether the message was successfully deleted.', + }) + deleted: boolean; +} diff --git a/platform/src/infrastructure/dtos/threads/delete-thread.dto.ts b/platform/src/infrastructure/dtos/threads/delete-thread.dto.ts new file mode 100644 index 000000000..5e045cf64 --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/delete-thread.dto.ts @@ -0,0 +1,22 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class DeleteThreadResponseDto { + @ApiProperty({ + example: 'thread_123', + description: 'The identifier of the thread that was deleted.', + }) + id: string; + + @ApiProperty({ + example: 'thread', + description: "Type of the object, indicating it's a thread.", + default: 'thread', + }) + object: string; + + @ApiProperty({ + example: true, + description: 'Indicates whether the thread was successfully deleted.', + }) + deleted: boolean; +} diff --git a/platform/src/infrastructure/dtos/threads/get-thread.dto.ts b/platform/src/infrastructure/dtos/threads/get-thread.dto.ts new file mode 100644 index 000000000..19016a1ac --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/get-thread.dto.ts @@ -0,0 +1,39 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class GetThreadResponseDto { + @ApiProperty({ + example: 'thread_abc123', + description: 'The identifier of the thread.', + }) + id: string; + + @ApiProperty({ example: 'thread', description: 'Type of the object' }) + object: string; + + @ApiProperty({ + example: 1699014083, + description: 'Unix timestamp representing the creation time of the thread.', + type: 'integer', + }) + created_at: number; + + @ApiProperty({ + example: ['assistant-001'], + description: 'List of assistants involved in the thread.', + type: [String], + }) + assistants: string[]; + + @ApiProperty({ + example: {}, + description: 'Metadata associated with the thread.', + }) + metadata: object; + + @ApiProperty({ + example: [], + description: 'List of messages within the thread.', + type: [String], + }) + messages: string[]; +} diff --git a/platform/src/infrastructure/dtos/threads/update-message.dto.ts b/platform/src/infrastructure/dtos/threads/update-message.dto.ts new file mode 100644 index 000000000..1ced3e4cb --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/update-message.dto.ts @@ -0,0 +1,4 @@ +import { PartialType } from '@nestjs/mapped-types'; +import { MessageEntity } from '@/infrastructure/entities/message.entity'; + +export class UpdateMessageDto extends PartialType(MessageEntity) {} diff --git a/platform/src/infrastructure/dtos/threads/update-thread.dto.ts b/platform/src/infrastructure/dtos/threads/update-thread.dto.ts new file mode 100644 index 000000000..c97704689 --- /dev/null +++ b/platform/src/infrastructure/dtos/threads/update-thread.dto.ts @@ -0,0 +1,4 @@ +import { PartialType } from '@nestjs/mapped-types'; +import { CreateThreadDto } from './create-thread.dto'; + +export class UpdateThreadDto extends PartialType(CreateThreadDto) {} diff --git a/platform/src/infrastructure/entities/assistant.entity.ts b/platform/src/infrastructure/entities/assistant.entity.ts new file mode 100644 index 000000000..132a14ad9 --- /dev/null +++ b/platform/src/infrastructure/entities/assistant.entity.ts @@ -0,0 +1,96 @@ +import { + Table, + Column, + Model, + PrimaryKey, + DataType, +} from 'sequelize-typescript'; +import { Assistant } from '@/domain/models/assistant.interface'; +import type { + AssistantToolResources, + AssistantResponseFormatOption, +} from '@/domain/models/assistant.interface'; + +@Table({ tableName: 'assistants', timestamps: false }) +export class AssistantEntity extends Model implements Assistant { + @PrimaryKey + @Column({ + type: DataType.STRING, + }) + id: string; + + @Column({ + type: DataType.STRING, + allowNull: true, + }) + avatar?: string; + + @Column({ + type: DataType.STRING, + defaultValue: 'assistant', + }) + object: 'assistant'; + + @Column({ + type: DataType.INTEGER, + }) + created_at: number; + + @Column({ + type: DataType.STRING, + allowNull: true, + }) + name: string | null; + + @Column({ + type: DataType.STRING, + allowNull: true, + }) + description: string | null; + + @Column({ + type: DataType.STRING, + }) + model: string; + + @Column({ + type: DataType.STRING, + allowNull: true, + }) + instructions: string | null; + + @Column({ + type: DataType.JSON, + }) + tools: any; + + @Column({ + type: DataType.JSON, + allowNull: true, + }) + metadata: any | null; + + @Column({ + type: DataType.FLOAT, + allowNull: true, + }) + top_p: number | null; + + @Column({ + type: DataType.FLOAT, + allowNull: true, + }) + temperature: number | null; + + @Column({ + type: DataType.JSON, + allowNull: true, + }) + response_format: AssistantResponseFormatOption | null; + + @Column({ + type: DataType.JSON, + allowNull: true, + }) + tool_resources: AssistantToolResources | null; +} diff --git a/platform/src/infrastructure/entities/message.entity.ts b/platform/src/infrastructure/entities/message.entity.ts new file mode 100644 index 000000000..c40beaecc --- /dev/null +++ b/platform/src/infrastructure/entities/message.entity.ts @@ -0,0 +1,94 @@ +import { + Table, + Column, + Model, + PrimaryKey, + DataType, +} from 'sequelize-typescript'; +import type { + Message, + MessageContent, + MessageIncompleteDetails, + MessageAttachment, +} from '@/domain/models/message.interface'; + +@Table({ tableName: 'messages', timestamps: false }) +export class MessageEntity extends Model implements Message { + @PrimaryKey + @Column({ + type: DataType.STRING, + }) + id: string; + + @Column({ + type: DataType.STRING, + defaultValue: 'thread.message', + }) + object: 'thread.message'; + + @Column({ + type: DataType.STRING, + }) + thread_id: string; + + @Column({ + type: DataType.STRING, + allowNull: true, + }) + assistant_id: string | null; + + @Column({ + type: DataType.STRING, + }) + role: 'user' | 'assistant'; + + @Column({ + type: DataType.STRING, + }) + status: 'in_progress' | 'incomplete' | 'completed'; + + @Column({ + type: DataType.JSON, + allowNull: true, + }) + metadata: any | null; + + @Column({ + type: DataType.STRING, + allowNull: true, + }) + run_id: string | null; + + @Column({ + type: DataType.INTEGER, + allowNull: true, + }) + completed_at: number | null; + + @Column({ + type: DataType.JSON, + }) + content: MessageContent[]; + + @Column({ + type: DataType.JSON, + allowNull: true, + }) + incomplete_details: MessageIncompleteDetails | null; + + @Column({ + type: DataType.INTEGER, + }) + created_at: number; + + @Column({ + type: DataType.JSON, + }) + attachments: MessageAttachment[]; + + @Column({ + type: DataType.INTEGER, + allowNull: true, + }) + incomplete_at: number | null; +} diff --git a/platform/src/infrastructure/entities/thread.entity.ts b/platform/src/infrastructure/entities/thread.entity.ts new file mode 100644 index 000000000..2f02e5ff8 --- /dev/null +++ b/platform/src/infrastructure/entities/thread.entity.ts @@ -0,0 +1,54 @@ +import { + Table, + Column, + Model, + PrimaryKey, + DataType, +} from 'sequelize-typescript'; +import type { + Thread, + ThreadToolResources, +} from '@/domain/models/thread.interface'; +import { AssistantEntity } from './assistant.entity'; + +@Table({ tableName: 'threads', timestamps: false }) +export class ThreadEntity extends Model implements Thread { + @PrimaryKey + @Column({ + type: DataType.STRING, + }) + id: string; + + @Column({ + type: DataType.STRING, + defaultValue: 'thread', + }) + object: 'thread'; + + @Column({ + type: DataType.STRING, + }) + title: string; + + @Column({ + type: DataType.JSON, + }) + assistants: AssistantEntity[]; + + @Column({ + type: DataType.INTEGER, + }) + created_at: number; + + @Column({ + type: DataType.JSON, + allowNull: true, + }) + tool_resources: ThreadToolResources | null; + + @Column({ + type: DataType.JSON, + allowNull: true, + }) + metadata: any | null; +} diff --git a/platform/src/infrastructure/exception/duplicate-assistant.exception.ts b/platform/src/infrastructure/exception/duplicate-assistant.exception.ts new file mode 100644 index 000000000..8b328b9c4 --- /dev/null +++ b/platform/src/infrastructure/exception/duplicate-assistant.exception.ts @@ -0,0 +1,10 @@ +import { HttpException, HttpStatus } from '@nestjs/common'; + +export class DuplicateAssistantException extends HttpException { + constructor(assistantId: string) { + super( + `Assistant with the id ${assistantId} is already exists.`, + HttpStatus.CONFLICT, + ); + } +} diff --git a/platform/src/infrastructure/exception/global.exception.ts b/platform/src/infrastructure/exception/global.exception.ts new file mode 100644 index 000000000..3c254aa03 --- /dev/null +++ b/platform/src/infrastructure/exception/global.exception.ts @@ -0,0 +1,39 @@ +import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; +import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; +import { + ArgumentsHost, + Catch, + ExceptionFilter, + HttpException, + HttpStatus, +} from '@nestjs/common'; +import { Response } from 'express'; + +@Catch() +export class GlobalExceptionFilter implements ExceptionFilter { + constructor(private readonly telemetryService: TelemetryUsecases) {} + async catch(exception: HttpException | Error, host: ArgumentsHost) { + const isHttpException = exception instanceof HttpException; + const httpStatus = isHttpException + ? exception.getStatus() + : HttpStatus.INTERNAL_SERVER_ERROR; + + const message = isHttpException + ? exception.message + : 'Internal Server Error'; + + const response = host.switchToHttp().getResponse(); + + if (!isHttpException || httpStatus === HttpStatus.INTERNAL_SERVER_ERROR) { + await this.telemetryService.createCrashReport( + exception, + TelemetrySource.CORTEX_SERVER, + ); + await this.telemetryService.sendCrashReport(); + } + response.status(httpStatus).json({ + statusCode: httpStatus, + message, + }); + } +} diff --git a/platform/src/infrastructure/exception/model-not-found.exception.ts b/platform/src/infrastructure/exception/model-not-found.exception.ts new file mode 100644 index 000000000..f71cb8a1c --- /dev/null +++ b/platform/src/infrastructure/exception/model-not-found.exception.ts @@ -0,0 +1,7 @@ +import { HttpException, HttpStatus } from '@nestjs/common'; + +export class ModelNotFoundException extends HttpException { + constructor(modelId: string) { + super(`Model ${modelId} not found!`, HttpStatus.NOT_FOUND); + } +} diff --git a/platform/src/infrastructure/exception/model-setting-not-found.exception.ts b/platform/src/infrastructure/exception/model-setting-not-found.exception.ts new file mode 100644 index 000000000..3301cfc80 --- /dev/null +++ b/platform/src/infrastructure/exception/model-setting-not-found.exception.ts @@ -0,0 +1,7 @@ +import { HttpException, HttpStatus } from '@nestjs/common'; + +export class ModelSettingNotFoundException extends HttpException { + constructor(engine: string) { + super(`Model setting for ${engine} not found!`, HttpStatus.NOT_FOUND); + } +} diff --git a/platform/src/infrastructure/interceptors/transform.interceptor.ts b/platform/src/infrastructure/interceptors/transform.interceptor.ts new file mode 100644 index 000000000..1cc5a44fe --- /dev/null +++ b/platform/src/infrastructure/interceptors/transform.interceptor.ts @@ -0,0 +1,33 @@ +import { + CallHandler, + ExecutionContext, + Injectable, + NestInterceptor, +} from '@nestjs/common'; +import { Observable } from 'rxjs'; +import { map } from 'rxjs/operators'; + +export interface Response { + data: T; +} + +@Injectable() +export class TransformInterceptor + implements NestInterceptor> +{ + intercept( + context: ExecutionContext, + next: CallHandler, + ): Observable> { + return next.handle().pipe( + map((data) => { + return Array.isArray(data) + ? { + object: 'list', + data, + } + : data; + }), + ); + } +} diff --git a/platform/src/infrastructure/middlewares/app.logger.middleware.ts b/platform/src/infrastructure/middlewares/app.logger.middleware.ts new file mode 100644 index 000000000..1dbbd0a1e --- /dev/null +++ b/platform/src/infrastructure/middlewares/app.logger.middleware.ts @@ -0,0 +1,40 @@ +import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; +import { Injectable, NestMiddleware, Logger } from '@nestjs/common'; + +import { Request, Response, NextFunction } from 'express'; +import { ContextService } from '../services/context/context.service'; + +@Injectable() +export class AppLoggerMiddleware implements NestMiddleware { + constructor(private readonly contextService: ContextService) {} + private logger = new Logger('HTTP'); + + use(req: Request, res: Response, next: NextFunction): void { + const userAgent = req.get('user-agent') ?? ''; + const { ip, method, path: url, originalUrl } = req; + //Setting the x-correlation-id + const correlationHeader = req.header('x-correlation-id') ?? ''; + req.headers['x-correlation-id'] = correlationHeader; + res.set('X-Correlation-Id', correlationHeader); + res.on('close', () => { + const { statusCode } = res; + const contentLength = res.get('content-length'); + this.logger.log( + JSON.stringify({ + method: method, + path: originalUrl ?? url, + statusCode: statusCode, + ip: ip, + content_length: contentLength, + user_agent: userAgent, + x_correlation_id: req.headers['x-correlation-id'], + }), + ); + }); + this.contextService.init(() => { + this.contextService.set('endpoint', originalUrl ?? url); + this.contextService.set('source', TelemetrySource.CORTEX_SERVER); + next(); + }); + } +} diff --git a/platform/src/infrastructure/providers/cortex/cortex.module.ts b/platform/src/infrastructure/providers/cortex/cortex.module.ts new file mode 100644 index 000000000..42d80effd --- /dev/null +++ b/platform/src/infrastructure/providers/cortex/cortex.module.ts @@ -0,0 +1,30 @@ +import { Module } from '@nestjs/common'; +import CortexProvider from './cortex.provider'; +import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import Onnxprovider from './onnx.provider'; +import LlamaCPPProvider from './llamacpp.provider'; +import TensorrtLLMProvider from './tensorrtllm.provider'; + +@Module({ + imports: [HttpModule, FileManagerModule], + providers: [ + { + provide: 'CORTEX_PROVIDER', + useClass: CortexProvider, + }, + Onnxprovider, + LlamaCPPProvider, + TensorrtLLMProvider, + ], + exports: [ + { + provide: 'CORTEX_PROVIDER', + useClass: CortexProvider, + }, + Onnxprovider, + LlamaCPPProvider, + TensorrtLLMProvider, + ], +}) +export class CortexProviderModule {} diff --git a/platform/src/infrastructure/providers/cortex/cortex.provider.ts b/platform/src/infrastructure/providers/cortex/cortex.provider.ts new file mode 100644 index 000000000..810aa8b92 --- /dev/null +++ b/platform/src/infrastructure/providers/cortex/cortex.provider.ts @@ -0,0 +1,183 @@ +import { HttpStatus, Injectable } from '@nestjs/common'; +import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract'; +import { PromptTemplate } from '@/domain/models/prompt-template.interface'; +import { join } from 'path'; +import { Model, ModelSettingParams } from '@/domain/models/model.interface'; +import { HttpService } from '@nestjs/axios'; +import { + CORTEX_CPP_MODELS_URL, + defaultCortexCppHost, + defaultCortexCppPort, +} from '@/infrastructure/constants/cortex'; +import { readdirSync } from 'node:fs'; +import { normalizeModelId } from '@/utils/normalize-model-id'; +import { firstValueFrom } from 'rxjs'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { existsSync, readFileSync } from 'fs'; + +export interface ModelStatResponse { + object: string; + data: any; +} + +@Injectable() +export default class CortexProvider extends OAIEngineExtension { + apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`; + name = 'cortex'; + productName = 'Cortex Inference Engine'; + description = + 'This extension enables chat completion API calls using the Cortex engine'; + version = '0.0.1'; + apiKey?: string | undefined; + + private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`; + private unloadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/unloadmodel`; + + constructor(protected readonly httpService: HttpService) { + super(httpService); + this.persistEngineVersion(); + } + + // Override the inference method to make an inference request to the engine + override async loadModel( + model: Model, + settings?: ModelSettingParams, + ): Promise { + const modelsContainerDir = await fileManagerService.getModelsPath(); + + let llama_model_path = settings?.llama_model_path; + if (!llama_model_path) { + const modelFolderFullPath = join( + modelsContainerDir, + normalizeModelId(model.model), + ); + const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => { + return file.endsWith('.gguf'); + }); + + if (ggufFiles.length === 0) { + throw new Error('Model binary not found'); + } + + const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]); + llama_model_path = modelBinaryLocalPath; + } + + const cpuThreadCount = 1; // TODO: Math.max(1, nitroResourceProbe.numCpuPhysicalCore); + const modelSettings = { + // This is critical and requires real CPU physical core count (or performance core) + cpu_threads: cpuThreadCount, + ...model, + ...settings, + llama_model_path, + ...('mmproj' in model.files && + model.files.mmproj && { + mmproj: settings?.mmproj, + }), + }; + + // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt + if (model.prompt_template) { + const promptTemplate = model.prompt_template; + const prompt = this.promptTemplateConverter(promptTemplate); + if (prompt?.error) { + throw new Error(prompt.error); + } + modelSettings.system_prompt = prompt.system_prompt; + modelSettings.user_prompt = prompt.user_prompt; + modelSettings.ai_prompt = prompt.ai_prompt; + } + return firstValueFrom( + this.httpService.post(this.loadModelUrl, modelSettings), + ).then(); // pipe error or void instead of throwing + } + + override async unloadModel(modelId: string, engine?: string): Promise { + return firstValueFrom( + this.httpService.post(this.unloadModelUrl, { model: modelId, engine }), + ).then(); // pipe error or void instead of throwing + } + + // Override the isModelRunning method to check if the model is running + override async isModelRunning(modelId: string): Promise { + const configs = await fileManagerService.getConfig(); + + return firstValueFrom( + this.httpService.get( + CORTEX_CPP_MODELS_URL(configs.cortexCppHost, configs.cortexCppPort), + ), + ) + .then((res) => { + const data = res.data as ModelStatResponse; + if ( + res.status === HttpStatus.OK && + data && + Array.isArray(data.data) && + data.data.length > 0 + ) { + return data.data.find((e) => e.id === modelId); + } + return false; + }) + .catch(() => false); + } + + private readonly promptTemplateConverter = ( + promptTemplate: string, + ): PromptTemplate => { + // Split the string using the markers + const systemMarker = '{system_message}'; + const promptMarker = '{prompt}'; + + if ( + promptTemplate.includes(systemMarker) && + promptTemplate.includes(promptMarker) + ) { + // Find the indices of the markers + const systemIndex = promptTemplate.indexOf(systemMarker); + const promptIndex = promptTemplate.indexOf(promptMarker); + + // Extract the parts of the string + const system_prompt = promptTemplate.substring(0, systemIndex); + const user_prompt = promptTemplate.substring( + systemIndex + systemMarker.length, + promptIndex, + ); + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length, + ); + + // Return the split parts + return { system_prompt, user_prompt, ai_prompt }; + } else if (promptTemplate.includes(promptMarker)) { + // Extract the parts of the string for the case where only promptMarker is present + const promptIndex = promptTemplate.indexOf(promptMarker); + const user_prompt = promptTemplate.substring(0, promptIndex); + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length, + ); + + // Return the split parts + return { user_prompt, ai_prompt }; + } + + // Return an error if none of the conditions are met + return { error: 'Cannot split prompt template' }; + }; + + public setUrls(host: string, port: number): void { + this.apiUrl = `http://${host}:${port}/inferences/server/chat_completion`; + this.loadModelUrl = `http://${host}:${port}/inferences/server/loadmodel`; + this.unloadModelUrl = `http://${host}:${port}/inferences/server/unloadmodel`; + } + + private persistEngineVersion = async () => { + const versionFilePath = join( + await fileManagerService.getCortexCppEnginePath(), + this.name, + 'version.txt', + ); + if (existsSync(versionFilePath)) + this.version = readFileSync(versionFilePath, 'utf-8'); + }; +} diff --git a/platform/src/infrastructure/providers/cortex/llamacpp.provider.ts b/platform/src/infrastructure/providers/cortex/llamacpp.provider.ts new file mode 100644 index 000000000..85a76c55b --- /dev/null +++ b/platform/src/infrastructure/providers/cortex/llamacpp.provider.ts @@ -0,0 +1,11 @@ +import { Injectable } from '@nestjs/common'; +import CortexProvider from './cortex.provider'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; + +@Injectable() +export default class LlamaCPPProvider extends CortexProvider { + name = Engines.llamaCPP; + productName = 'LlamaCPP Inference Engine'; + description = + 'This extension enables chat completion API calls using the LlamaCPP engine'; +} diff --git a/platform/src/infrastructure/providers/cortex/onnx.provider.ts b/platform/src/infrastructure/providers/cortex/onnx.provider.ts new file mode 100644 index 000000000..4a3ef2fa9 --- /dev/null +++ b/platform/src/infrastructure/providers/cortex/onnx.provider.ts @@ -0,0 +1,11 @@ +import { Injectable } from '@nestjs/common'; +import CortexProvider from './cortex.provider'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; + +@Injectable() +export default class Onnxprovider extends CortexProvider { + name = Engines.onnx; + productName = 'Onnx Inference Engine'; + description = + 'This extension enables chat completion API calls using the Onnx engine'; +} diff --git a/platform/src/infrastructure/providers/cortex/tensorrtllm.provider.ts b/platform/src/infrastructure/providers/cortex/tensorrtllm.provider.ts new file mode 100644 index 000000000..65c63b489 --- /dev/null +++ b/platform/src/infrastructure/providers/cortex/tensorrtllm.provider.ts @@ -0,0 +1,11 @@ +import { Injectable } from '@nestjs/common'; +import CortexProvider from './cortex.provider'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; + +@Injectable() +export default class TensorrtLLMProvider extends CortexProvider { + name = Engines.tensorrtLLM; + productName = 'TensorrtLLM Inference Engine'; + description = + 'This extension enables chat completion API calls using the TensorrtLLM engine'; +} diff --git a/platform/src/infrastructure/repositories/extensions/extension.module.ts b/platform/src/infrastructure/repositories/extensions/extension.module.ts new file mode 100644 index 000000000..0c29d756c --- /dev/null +++ b/platform/src/infrastructure/repositories/extensions/extension.module.ts @@ -0,0 +1,24 @@ +import { Module } from '@nestjs/common'; +import { ExtensionRepositoryImpl } from './extension.repository'; +import { ExtensionRepository } from '@/domain/repositories/extension.interface'; +import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module'; +import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { ExtensionsModule } from '@/extensions/extensions.module'; + +@Module({ + imports: [ + CortexProviderModule, + HttpModule, + FileManagerModule, + ExtensionsModule, + ], + providers: [ + { + provide: ExtensionRepository, + useClass: ExtensionRepositoryImpl, + }, + ], + exports: [ExtensionRepository], +}) +export class ExtensionModule {} diff --git a/platform/src/infrastructure/repositories/extensions/extension.repository.ts b/platform/src/infrastructure/repositories/extensions/extension.repository.ts new file mode 100644 index 000000000..86ddef9d9 --- /dev/null +++ b/platform/src/infrastructure/repositories/extensions/extension.repository.ts @@ -0,0 +1,159 @@ +import { Inject, Injectable } from '@nestjs/common'; +import { ExtensionRepository } from '@/domain/repositories/extension.interface'; +import { Extension } from '@/domain/abstracts/extension.abstract'; +import { readdir, lstat } from 'fs/promises'; +import { join } from 'path'; +import { + fileManagerService, + FileManagerService, +} from '@/infrastructure/services/file-manager/file-manager.service'; +import { existsSync, mkdirSync, watch } from 'fs'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; +import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract'; +import { HttpService } from '@nestjs/axios'; +import LlamaCPPProvider from '@/infrastructure/providers/cortex/llamacpp.provider'; +import Onnxprovider from '@/infrastructure/providers/cortex/onnx.provider'; +import TensorrtLLMProvider from '@/infrastructure/providers/cortex/tensorrtllm.provider'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; + +@Injectable() +export class ExtensionRepositoryImpl implements ExtensionRepository { + // Initialize the Extensions Map with the key-value pairs of the core providers. + extensions = new Map(); + + constructor( + @Inject('EXTENSIONS_PROVIDER') + private readonly coreExtensions: OAIEngineExtension[], + private readonly httpService: HttpService, + ) { + this.loadCoreExtensions(); + this.loadExternalExtensions(); + + // Watch engine folder only for now + fileManagerService.getCortexCppEnginePath().then((path) => { + if (!existsSync(path)) mkdirSync(path); + watch(path, () => { + this.extensions.clear(); + this.loadCoreExtensions(); + this.loadExternalExtensions(); + }); + }); + } + /** + * Persist extension to the extensions map + * @param object + * @returns + */ + create(object: Extension): Promise { + this.extensions.set(object.name ?? '', object); + return Promise.resolve(object); + } + + /** + * Find all extensions + * @returns + */ + findAll(): Promise { + return Promise.resolve(Array.from(this.extensions.values())); + } + + /** + * Find one extension by id + * @param id + * @returns + */ + findOne(id: string): Promise { + return Promise.resolve(this.extensions.get(id) ?? null); + } + + /** + * Update extension + * It is not applicable + */ + update(): Promise { + throw new Error('Method not implemented.'); + } + + /** + * Remove extension from the extensions map + * @param id + * @returns + */ + remove(id: string): Promise { + this.extensions.delete(id); + return Promise.resolve(); + } + + private async loadCoreExtensions() { + const { cortexCppPort, cortexCppHost } = + await fileManagerService.getConfig(); + const llamaCPPEngine = new LlamaCPPProvider(this.httpService); + llamaCPPEngine.setUrls(cortexCppHost, cortexCppPort); + llamaCPPEngine.status = existsSync( + join(await fileManagerService.getCortexCppEnginePath(), Engines.llamaCPP), + ) + ? EngineStatus.READY + : EngineStatus.NOT_INITIALIZED; + + const onnxEngine = new Onnxprovider(this.httpService); + onnxEngine.setUrls(cortexCppHost, cortexCppPort); + onnxEngine.status = + existsSync( + join(await fileManagerService.getCortexCppEnginePath(), Engines.onnx), + ) && process.platform === 'win32' + ? EngineStatus.READY + : process.platform !== 'win32' + ? EngineStatus.NOT_SUPPORTED + : EngineStatus.NOT_INITIALIZED; + + const tensorrtLLMEngine = new TensorrtLLMProvider(this.httpService); + onnxEngine.setUrls(cortexCppHost, cortexCppPort); + tensorrtLLMEngine.status = + existsSync( + join( + await fileManagerService.getCortexCppEnginePath(), + Engines.tensorrtLLM, + ), + ) && process.platform !== 'darwin' + ? EngineStatus.READY + : process.platform === 'darwin' + ? EngineStatus.NOT_SUPPORTED + : EngineStatus.NOT_INITIALIZED; + + await llamaCPPEngine.onLoad(); + await onnxEngine.onLoad(); + await tensorrtLLMEngine.onLoad(); + + this.extensions.set(Engines.llamaCPP, llamaCPPEngine); + this.extensions.set(Engines.onnx, onnxEngine); + this.extensions.set(Engines.tensorrtLLM, tensorrtLLMEngine); + + for (const extension of this.coreExtensions) { + await extension.onLoad(); + this.extensions.set(extension.name, extension); + } + } + + private async loadExternalExtensions() { + const extensionsPath = + process.env.EXTENSIONS_PATH ?? + (await fileManagerService.getExtensionsPath()); + this.loadExtensions(extensionsPath); + } + + private async loadExtensions(extensionsPath: string) { + if (!existsSync(extensionsPath)) return; + + readdir(extensionsPath).then((files) => { + files.forEach(async (extension) => { + const extensionFullPath = join(extensionsPath, extension); + if (!(await lstat(extensionFullPath)).isDirectory()) return; + + import(extensionFullPath).then((extensionClass) => { + const newExtension = new extensionClass.default(); + this.extensions.set(extension, newExtension); + }); + }); + }); + } +} diff --git a/platform/src/infrastructure/repositories/models/model.module.ts b/platform/src/infrastructure/repositories/models/model.module.ts new file mode 100644 index 000000000..c5bcda706 --- /dev/null +++ b/platform/src/infrastructure/repositories/models/model.module.ts @@ -0,0 +1,24 @@ +import { Module } from '@nestjs/common'; +import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module'; +import { HttpModule } from '@nestjs/axios'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { ModelRepositoryImpl } from './model.repository'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; + +@Module({ + imports: [ + CortexProviderModule, + HttpModule, + FileManagerModule, + DownloadManagerModule, + ], + providers: [ + { + provide: ModelRepository, + useClass: ModelRepositoryImpl, + }, + ], + exports: [ModelRepository], +}) +export class ModelRepositoryModule {} diff --git a/platform/src/infrastructure/repositories/models/model.repository.ts b/platform/src/infrastructure/repositories/models/model.repository.ts new file mode 100644 index 000000000..c2f9fdc04 --- /dev/null +++ b/platform/src/infrastructure/repositories/models/model.repository.ts @@ -0,0 +1,194 @@ +import { Injectable } from '@nestjs/common'; +import { join, extname, basename } from 'path'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { Model } from '@/domain/models/model.interface'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { + existsSync, + mkdirSync, + readFileSync, + readdirSync, + rmSync, + writeFileSync, + watch, +} from 'fs'; +import { load, dump } from 'js-yaml'; +import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id'; + +@Injectable() +export class ModelRepositoryImpl implements ModelRepository { + // Initialize the Extensions Map with the key-value pairs of the core providers. + models = new Map([]); + // Map between files and models. E.g. llama3:7b -> llama3-7b.yaml + fileModel = new Map([]); + // Check whether the models have been loaded or not. + loaded = false; + + constructor() { + this.loadModels(); + fileManagerService.getModelsPath().then((path) => { + if (!existsSync(path)) mkdirSync(path); + watch(path, (eventType, filename) => { + this.loadModels(true); + }); + }); + } + + /** + * Create a new model + * This would persist the model yaml file to the models folder + * @param object + * @returns the created model + */ + async create(object: Model): Promise { + const modelsFolderPath = join( + await fileManagerService.getDataFolderPath(), + 'models', + ); + const modelYaml = dump(object); + if (!existsSync(modelsFolderPath)) mkdirSync(modelsFolderPath); + const modelsPath = + process.env.EXTENSIONS_PATH ?? (await fileManagerService.getModelsPath()); + writeFileSync( + join(modelsPath, `${normalizeModelId(object.model)}.yaml`), + modelYaml, + ); + + this.models.set(object.model ?? '', object); + return Promise.resolve(object); + } + + /** + * Find all models + * This would load all the models from the models folder + * @param object + * @returns the created model + */ + findAll(): Promise { + return this.loadModels().then((res) => + res.filter((model) => isLocalModel(model.files)), + ); + } + /** + * Find one model by id + * @param id model id + * @returns the model + */ + findOne(id: string): Promise { + return this.loadModels().then(() => this.models.get(id) ?? null); + } + + /** + * Update a model + * This would update the model yaml file in the models folder + * @param id model id + * @param object model object + */ + async update(id: string, object: Partial): Promise { + const originalModel = await this.findOne(id); + if (!originalModel) throw new Error('Model not found'); + + const updatedModel = { + ...originalModel, + ...object, + } satisfies Model; + + const modelYaml = dump(updatedModel); + const modelsPath = + process.env.EXTENSIONS_PATH ?? (await fileManagerService.getModelsPath()); + + writeFileSync( + join( + modelsPath, + this.fileModel.get(id) ?? `${normalizeModelId(id)}.yaml`, + ), + modelYaml, + ); + + this.models.set(id ?? '', updatedModel); + } + + /** + * Remove a model + * This would remove the model yaml file from the models folder + * @param id model id + */ + async remove(id: string): Promise { + this.models.delete(id); + const yamlFilePath = join( + await fileManagerService.getModelsPath(), + this.fileModel.get(id) ?? id, + ); + if (existsSync(yamlFilePath)) rmSync(yamlFilePath); + return Promise.resolve(); + } + + /** + * Load all models + * This would load all the models from the models folder + * @returns the list of models + */ + private async loadModels(forceReload: boolean = false): Promise { + if (this.loaded && !forceReload) return Array.from(this.models.values()); + const modelsPath = + process.env.EXTENSIONS_PATH ?? (await fileManagerService.getModelsPath()); + + this.models.clear(); + this.fileModel.clear(); + + if (!existsSync(modelsPath)) return []; + + const modelFiles = readdirSync(modelsPath) + .filter( + (file) => + extname(file).toLowerCase() === '.yaml' || + extname(file).toLowerCase() === '.yml', + ) + .map((file) => join(modelsPath, file)); + + modelFiles.forEach(async (modelFile) => { + const model = readFileSync(modelFile, 'utf8'); + const yamlObject = load(model) as Model; + const fileName = basename(modelFile); + + if (yamlObject) { + this.fileModel.set(yamlObject.model, fileName); + this.models.set(yamlObject.model, yamlObject); + } + }); + this.loaded = true; + return Array.from(this.models.values()); + } + + /** + * Load a model by file + * This would load a model from a file + * @returns the model + */ + async loadModelByFile( + modelId: string, + modelPath: string, + modelFile: string, + ): Promise { + const checkExists = await this.findOne(modelId); + if (checkExists) return checkExists; + if (!existsSync(modelPath)) return null; + + const model = readFileSync(modelPath, 'utf8'); + const yamlObject = load(model) as Model; + const fileName = basename(modelId); + const modelObject = { + ...yamlObject, + model: modelId, + llama_model_path: modelFile, + model_path: modelFile, + files: [modelFile], + }; + if (modelObject) { + this.fileModel.set(modelId, fileName); + this.models.set(modelId, modelObject); + } + this.loaded = true; + return modelObject; + } +} diff --git a/platform/src/infrastructure/repositories/telemetry/telemetry.repository.ts b/platform/src/infrastructure/repositories/telemetry/telemetry.repository.ts new file mode 100644 index 000000000..d276e01a9 --- /dev/null +++ b/platform/src/infrastructure/repositories/telemetry/telemetry.repository.ts @@ -0,0 +1,276 @@ +import * as cypto from 'crypto'; +import os from 'os'; +import systemInformation from 'systeminformation'; +import { TelemetryRepository } from '@/domain/repositories/telemetry.interface'; +import { + Attribute, + CrashReportAttributes, + TelemetrySource, + TelemetryEvent, + TelemetryResource, + Telemetry, + TelemetryEventMetadata, + EventAttributes, + TelemetryAnonymized, + BenchmarkHardware, +} from '@/domain/telemetry/telemetry.interface'; +import { Injectable } from '@nestjs/common'; +import { join } from 'path'; +import packageJson from '@/../package.json'; +import axios from 'axios'; +import { telemetryServerUrl } from '@/infrastructure/constants/cortex'; +import { ModelStat } from '@/infrastructure/commanders/types/model-stat.interface'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +// refactor using convert to dto +@Injectable() +export class TelemetryRepositoryImpl implements TelemetryRepository { + private readonly telemetryResource: TelemetryResource = { + osName: process.platform, + osVersion: process.version, + architecture: process.arch, + appVersion: packageJson.version, + }; + + private readonly crashReportFileName = 'crash-report.jsonl'; + private readonly anonymizedDataFileName = 'session.json'; + constructor() {} + + private async getTelemetryDirectory(): Promise { + const dataFolderPath = await fileManagerService.getDataFolderPath(); + await fileManagerService.createFolderIfNotExistInDataFolder('telemetry'); + return join(dataFolderPath, 'telemetry'); + } + + private generateChecksum(data: any): string { + const hash = cypto.createHash('sha256'); + hash.update(JSON.stringify(data)); + return hash.digest('hex'); + } + + async sendTelemetryToServer( + telemetryEvent: Telemetry['event'], + type: 'crash-report' | 'metrics' = 'crash-report', + ): Promise { + try { + await axios.post(`${telemetryServerUrl}/api/v1/${type}`, telemetryEvent, { + headers: { + 'Content-Type': 'application/json', + 'cortex-checksum': this.generateChecksum(telemetryEvent), + timeout: 1000, + }, + }); + } catch (error) {} + } + + async sendBenchmarkToServer(data: { + hardware: BenchmarkHardware; + results: any; + metrics: any; + model: ModelStat; + sessionId: string; + }): Promise { + try { + await axios.post(`${telemetryServerUrl}/api/v1/benchmark`, data, { + headers: { + 'Content-Type': 'application/json', + 'cortex-checksum': this.generateChecksum(data), + }, + }); + } catch (error) {} + } + + async sendTelemetryToOTelCollector(endpoint: string, telemetry: Telemetry) { + try { + console.log('Sending telemetry to OTel collector'); + await axios.post(`${endpoint}/v1/logs`, telemetry.event, { + headers: { + 'Content-Type': 'application/json', + }, + }); + } catch (error) {} + } + + async markLastCrashReportAsSent(): Promise { + try { + const { data, position } = await fileManagerService.getLastLine( + join(await this.getTelemetryDirectory(), this.crashReportFileName), + ); + const Telemetry = JSON.parse(data) as Telemetry; + Telemetry.metadata.sentAt = new Date().toISOString(); + await fileManagerService.modifyLine( + join(await this.getTelemetryDirectory(), this.crashReportFileName), + JSON.stringify(Telemetry), + position, + ); + } catch (error) {} + } + + async readCrashReports(callback: (Telemetry: Telemetry) => void) { + fileManagerService.readLines( + join(await this.getTelemetryDirectory(), this.crashReportFileName), + (line: string) => { + const data = JSON.parse(line) as Telemetry; + callback(data); + }, + ); + } + + async getLastCrashReport(): Promise { + try { + await fileManagerService.createFolderIfNotExistInDataFolder('telemetry'); + const { data } = await fileManagerService.getLastLine( + join(await this.getTelemetryDirectory(), this.crashReportFileName), + ); + if (!data) { + return null; + } + return JSON.parse(data) as Telemetry; + } catch (error) { + return null; + } + } + + async createCrashReport( + crashReport: CrashReportAttributes, + source: TelemetrySource, + ): Promise { + const telemetryEvent = await this.convertToTelemetryEvent({ + attributes: crashReport, + source, + type: 'crash-report', + }); + const metadata: TelemetryEventMetadata = { + createdAt: new Date().toISOString(), + sentAt: null, + }; + return fileManagerService.append( + join(await this.getTelemetryDirectory(), this.crashReportFileName), + JSON.stringify({ + metadata, + event: { + resourceLogs: [telemetryEvent], + }, + } as Telemetry), + ); + } + + private async convertToTelemetryEvent({ + attributes, + source, + type, + }: { + attributes: CrashReportAttributes | EventAttributes; + source: TelemetrySource; + type: 'crash-report' | 'metrics'; + }): Promise { + const gpus = (await systemInformation.graphics()).controllers.map( + ({ model, vendor, vram, vramDynamic }) => ({ + model, + vendor, + vram, + vramDynamic, + }), + ); + + const body = + type === 'crash-report' + ? (attributes as CrashReportAttributes).message + : (attributes as EventAttributes).name; + + const severity = type === 'crash-report' ? 'ERROR' : 'INFO'; + + const resourceAttributes: Attribute[] = Object.entries({ + ...this.telemetryResource, + 'service.name': type, + cpu: os.cpus()[0].model, + gpus: JSON.stringify(gpus), + source, + }).map(([key, value]) => ({ + key, + value: { stringValue: value }, + })); + const telemetryLogAttributes: Attribute[] = Object.entries(attributes).map( + ([key, value]) => { + if (typeof value === 'object') { + return { + key, + value: { + kvlist_value: { + values: Object.entries(value).map(([k, v]) => ({ + key: k, + value: { stringValue: v as string }, + })), + }, + }, + }; + } + return { + key, + value: { stringValue: value }, + }; + }, + ); + + return { + resource: { + attributes: resourceAttributes, + }, + scopeLogs: [ + { + scope: {}, + logRecords: [ + { + traceId: cypto.randomBytes(16).toString('hex'), + timeUnixNano: (BigInt(Date.now()) * BigInt(1000000)).toString(), + body: { stringValue: body }, + severityText: severity, + attributes: telemetryLogAttributes, + }, + ], + }, + ], + }; + } + + async sendEvent( + events: EventAttributes[], + source: TelemetrySource, + ): Promise { + const telemetryEvents = await Promise.all( + events.map(async (event) => + this.convertToTelemetryEvent({ + attributes: event, + source, + type: 'metrics', + }), + ), + ); + await this.sendTelemetryToServer( + { + resourceLogs: telemetryEvents, + }, + 'metrics', + ); + } + + async getAnonymizedData(): Promise { + const content = await fileManagerService.readFile( + join(await this.getTelemetryDirectory(), this.anonymizedDataFileName), + ); + + if (!content) { + return null; + } + + const data = JSON.parse(content) as TelemetryAnonymized; + return data; + } + + async updateAnonymousData(data: TelemetryAnonymized): Promise { + return fileManagerService.writeFile( + join(await this.getTelemetryDirectory(), this.anonymizedDataFileName), + JSON.stringify(data), + ); + } +} diff --git a/platform/src/infrastructure/services/context/context.module.ts b/platform/src/infrastructure/services/context/context.module.ts new file mode 100644 index 000000000..a583baa25 --- /dev/null +++ b/platform/src/infrastructure/services/context/context.module.ts @@ -0,0 +1,9 @@ +import { Module } from '@nestjs/common'; +import { ContextService } from './context.service'; + +@Module({ + //move context service to usecase? + providers: [ContextService], + exports: [ContextService], +}) +export class ContextModule {} diff --git a/platform/src/infrastructure/services/context/context.service.ts b/platform/src/infrastructure/services/context/context.service.ts new file mode 100644 index 000000000..e69ba06fd --- /dev/null +++ b/platform/src/infrastructure/services/context/context.service.ts @@ -0,0 +1,21 @@ +import { Injectable } from '@nestjs/common'; +import { AsyncLocalStorage } from 'async_hooks'; + +@Injectable() +export class ContextService { + private readonly localStorage = new AsyncLocalStorage(); + + init(callback: () => T): T { + return this.localStorage.run(new Map(), callback); + } + + get(key: string): T | null { + const context = this.localStorage.getStore() as Map; + return context?.get(key) ?? null; + } + + set(key: string, value: T): void { + const context = this.localStorage.getStore() as Map; + context?.set(key, value); + } +} diff --git a/platform/src/infrastructure/services/download-manager/download-manager.module.ts b/platform/src/infrastructure/services/download-manager/download-manager.module.ts new file mode 100644 index 000000000..5d94cc006 --- /dev/null +++ b/platform/src/infrastructure/services/download-manager/download-manager.module.ts @@ -0,0 +1,10 @@ +import { Module } from '@nestjs/common'; +import { DownloadManagerService } from './download-manager.service'; +import { HttpModule } from '@nestjs/axios'; + +@Module({ + imports: [HttpModule], + providers: [DownloadManagerService], + exports: [DownloadManagerService], +}) +export class DownloadManagerModule {} diff --git a/platform/src/infrastructure/services/download-manager/download-manager.service.spec.ts b/platform/src/infrastructure/services/download-manager/download-manager.service.spec.ts new file mode 100644 index 000000000..8263a632d --- /dev/null +++ b/platform/src/infrastructure/services/download-manager/download-manager.service.spec.ts @@ -0,0 +1,21 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { DownloadManagerService } from './download-manager.service'; +import { HttpModule } from '@nestjs/axios'; +import { EventEmitterModule } from '@nestjs/event-emitter'; + +describe('DownloadManagerService', () => { + let service: DownloadManagerService; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [HttpModule, EventEmitterModule.forRoot()], + providers: [DownloadManagerService], + }).compile(); + + service = module.get(DownloadManagerService); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/services/download-manager/download-manager.service.ts b/platform/src/infrastructure/services/download-manager/download-manager.service.ts new file mode 100644 index 000000000..6ae6550fb --- /dev/null +++ b/platform/src/infrastructure/services/download-manager/download-manager.service.ts @@ -0,0 +1,348 @@ +import { + DownloadItem, + DownloadState, + DownloadStatus, + DownloadType, +} from '@/domain/models/download.interface'; +import { HttpService } from '@nestjs/axios'; +import { Injectable } from '@nestjs/common'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { Presets, SingleBar } from 'cli-progress'; +import { createWriteStream, unlinkSync } from 'node:fs'; +import { basename } from 'node:path'; +import { firstValueFrom } from 'rxjs'; +import crypto from 'crypto'; + +@Injectable() +export class DownloadManagerService { + private allDownloadStates: DownloadState[] = []; + private abortControllers: Record> = + {}; + private timeouts: Record = {}; + + constructor( + private readonly httpService: HttpService, + private readonly eventEmitter: EventEmitter2, + ) {} + + async abortDownload(downloadId: string) { + if (!this.abortControllers[downloadId]) { + return; + } + clearTimeout(this.timeouts[downloadId]); + Object.keys(this.abortControllers[downloadId]).forEach((destination) => { + this.abortControllers[downloadId][destination].abort(); + }); + delete this.abortControllers[downloadId]; + + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + this.allDownloadStates = this.allDownloadStates.filter( + (downloadState) => downloadState.id !== downloadId, + ); + + if (currentDownloadState) { + this.deleteDownloadStateFiles(currentDownloadState); + } + this.eventEmitter.emit('download.event', this.allDownloadStates); + } + + async submitDownloadRequest( + downloadId: string, + title: string, + downloadType: DownloadType, + urlToDestination: Record< + string, + { + destination: string; + checksum?: string; + } + >, + finishedCallback?: () => Promise, + inSequence: boolean = true, + ) { + if ( + this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ) + ) { + return; + } + + const downloadItems: DownloadItem[] = Object.keys(urlToDestination).map( + (url) => { + const { destination, checksum } = urlToDestination[url]; + const downloadItem: DownloadItem = { + id: destination, + time: { + elapsed: 0, + remaining: 0, + }, + size: { + total: 0, + transferred: 0, + }, + progress: 0, + status: DownloadStatus.Downloading, + checksum, + }; + + return downloadItem; + }, + ); + + const downloadState: DownloadState = { + id: downloadId, + title: title, + type: downloadType, + progress: 0, + status: DownloadStatus.Downloading, + children: downloadItems, + }; + + this.allDownloadStates.push(downloadState); + this.abortControllers[downloadId] = {}; + + const callBack = async () => { + // Await post processing callback + await finishedCallback?.(); + + // Finished - update the current downloading states + delete this.abortControllers[downloadId]; + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (currentDownloadState) { + currentDownloadState.status = DownloadStatus.Downloaded; + this.eventEmitter.emit('download.event', this.allDownloadStates); + + // remove download state if all children is downloaded + this.allDownloadStates = this.allDownloadStates.filter( + (downloadState) => downloadState.id !== downloadId, + ); + } + this.eventEmitter.emit('download.event', this.allDownloadStates); + }; + if (!inSequence) { + Promise.all( + Object.keys(urlToDestination).map((url) => { + const { destination, checksum } = urlToDestination[url]; + return this.downloadFile(downloadId, url, destination, checksum); + }), + ).then(callBack); + } else { + // Download model file in sequence + for (const url of Object.keys(urlToDestination)) { + const { destination, checksum } = urlToDestination[url]; + await this.downloadFile(downloadId, url, destination, checksum); + } + return callBack(); + } + } + + private async downloadFile( + downloadId: string, + url: string, + destination: string, + checksum?: string, + ) { + console.log('Downloading', { + downloadId, + url, + destination, + checksum, + }); + const controller = new AbortController(); + // adding to abort controllers + this.abortControllers[downloadId][destination] = controller; + return new Promise(async (resolve) => { + const response = await firstValueFrom( + this.httpService.get(url, { + responseType: 'stream', + signal: controller.signal, + }), + ); + + // check if response is success + if (!response) { + throw new Error('Failed to download model'); + } + + const writer = createWriteStream(destination); + const hash = crypto.createHash('sha256'); + const totalBytes = Number(response.headers['content-length']); + + // update download state + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) { + resolve(); + return; + } + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + if (downloadItem) { + downloadItem.size.total = totalBytes; + } + + console.log('Downloading', basename(destination)); + + const timeout = 20000; // Timeout period for receiving new data + let timeoutId: NodeJS.Timeout; + const resetTimeout = () => { + if (timeoutId) { + clearTimeout(timeoutId); + delete this.timeouts[downloadId]; + } + timeoutId = setTimeout(() => { + try { + this.handleError( + new Error('Download timeout'), + downloadId, + destination, + ); + } finally { + bar.stop(); + resolve(); + } + }, timeout); + this.timeouts[downloadId] = timeoutId; + }; + + let transferredBytes = 0; + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + writer.on('finish', () => { + try { + if (timeoutId) clearTimeout(timeoutId); + // delete the abort controller + delete this.abortControllers[downloadId][destination]; + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) return; + + // update current child status to downloaded, find by destination as id + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + const isFileBroken = checksum && checksum === hash.digest('hex'); + if (downloadItem) { + downloadItem.status = isFileBroken + ? DownloadStatus.Error + : DownloadStatus.Downloaded; + if (isFileBroken) { + downloadItem.error = 'Checksum is not matched'; + this.handleError( + new Error('Checksum is not matched'), + downloadId, + destination, + ); + } + } + if (isFileBroken) { + currentDownloadState.status = DownloadStatus.Error; + currentDownloadState.error = 'Checksum is not matched'; + } + this.eventEmitter.emit('download.event', this.allDownloadStates); + } finally { + bar.stop(); + resolve(); + } + }); + writer.on('error', (error) => { + try { + if (timeoutId) clearTimeout(timeoutId); + this.handleError(error, downloadId, destination); + } finally { + bar.stop(); + resolve(); + } + }); + + response.data.on('data', (chunk: any) => { + hash.update(chunk); + resetTimeout(); + transferredBytes += chunk.length; + + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) return; + + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + if (downloadItem) { + downloadItem.size.transferred = transferredBytes; + downloadItem.progress = Math.round( + (transferredBytes / totalBytes) * 100, + ); + bar.update(downloadItem.progress); + } + const lastProgress = currentDownloadState.progress; + currentDownloadState.progress = Math.round( + (currentDownloadState.children.reduce( + (pre, curr) => pre + curr.size.transferred, + 0, + ) / + Math.max( + currentDownloadState.children.reduce( + (pre, curr) => pre + curr.size.total, + 0, + ), + 1, + )) * + 100, + ); + // console.log(currentDownloadState.progress); + if (currentDownloadState.progress !== lastProgress) + this.eventEmitter.emit('download.event', this.allDownloadStates); + }); + + response.data.pipe(writer); + }); + } + + getDownloadStates() { + return this.allDownloadStates; + } + + private handleError(error: Error, downloadId: string, destination: string) { + delete this.abortControllers[downloadId][destination]; + const currentDownloadState = this.allDownloadStates.find( + (downloadState) => downloadState.id === downloadId, + ); + if (!currentDownloadState) return; + + const downloadItem = currentDownloadState?.children.find( + (downloadItem) => downloadItem.id === destination, + ); + if (downloadItem) { + downloadItem.status = DownloadStatus.Error; + downloadItem.error = error.message; + } + + currentDownloadState.status = DownloadStatus.Error; + currentDownloadState.error = error.message; + + // remove download state if all children is downloaded + this.allDownloadStates = this.allDownloadStates.filter( + (downloadState) => downloadState.id !== downloadId, + ); + this.deleteDownloadStateFiles(currentDownloadState); + this.eventEmitter.emit('download.event', [currentDownloadState]); + this.eventEmitter.emit('download.event', this.allDownloadStates); + } + + private deleteDownloadStateFiles(downloadState: DownloadState) { + if (!downloadState.children?.length) return; + downloadState.children.forEach((child) => { + unlinkSync(child.id); + }); + } +} diff --git a/platform/src/infrastructure/services/file-manager/file-manager.module.ts b/platform/src/infrastructure/services/file-manager/file-manager.module.ts new file mode 100644 index 000000000..9e3757342 --- /dev/null +++ b/platform/src/infrastructure/services/file-manager/file-manager.module.ts @@ -0,0 +1,8 @@ +import { Module } from '@nestjs/common'; +import { FileManagerService } from './file-manager.service'; + +@Module({ + providers: [FileManagerService], + exports: [FileManagerService], +}) +export class FileManagerModule {} diff --git a/platform/src/infrastructure/services/file-manager/file-manager.service.spec.ts b/platform/src/infrastructure/services/file-manager/file-manager.service.spec.ts new file mode 100644 index 000000000..96cc6dad6 --- /dev/null +++ b/platform/src/infrastructure/services/file-manager/file-manager.service.spec.ts @@ -0,0 +1,18 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { FileManagerService } from './file-manager.service'; + +describe('FileManagerService', () => { + let service: FileManagerService; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + providers: [FileManagerService], + }).compile(); + + service = module.get(FileManagerService); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); +}); diff --git a/platform/src/infrastructure/services/file-manager/file-manager.service.ts b/platform/src/infrastructure/services/file-manager/file-manager.service.ts new file mode 100644 index 000000000..c452133eb --- /dev/null +++ b/platform/src/infrastructure/services/file-manager/file-manager.service.ts @@ -0,0 +1,406 @@ +import { Config } from '@/domain/config/config.interface'; +import { Injectable } from '@nestjs/common'; +import os from 'os'; +import { join } from 'node:path'; +import { + existsSync, + read, + open, + close, + promises, + createReadStream, +} from 'node:fs'; +import { promisify } from 'util'; +import yaml, { load } from 'js-yaml'; +import { readdirSync, readFileSync, write } from 'fs'; +import { createInterface } from 'readline'; +import { + defaultCortexCppHost, + defaultCortexCppPort, + defaultCortexJsHost, + defaultCortexJsPort, +} from '@/infrastructure/constants/cortex'; + +const readFileAsync = promisify(read); +const openAsync = promisify(open); +const closeAsync = promisify(close); +const writeAsync = promisify(write); + +@Injectable() +export class FileManagerService { + private cortexDirectoryName = 'cortex'; + private modelFolderName = 'models'; + private presetFolderName = 'presets'; + private extensionFoldername = 'extensions'; + private benchmarkFoldername = 'benchmark'; + private cortexEnginesFolderName = 'engines'; + private cortexTelemetryFolderName = 'telemetry'; + private configProfile = process.env.CORTEX_PROFILE || 'default'; + private configPath = process.env.CORTEX_CONFIG_PATH || os.homedir(); + + /** + * Get cortex configs + * @returns the config object + */ + async getConfig(dataFolderPath?: string): Promise { + const configPath = join( + this.configPath, + this.getConfigFileName(this.configProfile), + ); + const config = this.defaultConfig(); + const dataFolderPathUsed = dataFolderPath || config.dataFolderPath; + if (!existsSync(configPath) || !existsSync(dataFolderPathUsed)) { + if (!existsSync(dataFolderPathUsed)) { + await this.createFolderIfNotExist(dataFolderPathUsed); + } + if (!existsSync(configPath)) { + await this.writeConfigFile(config); + } + return config; + } + + try { + const content = await promises.readFile(configPath, 'utf8'); + const config = yaml.load(content) as Config & object; + return { + ...this.defaultConfig(), + ...config, + }; + } catch (error) { + console.warn('Error reading config file. Using default config.'); + console.warn(error); + await this.createFolderIfNotExist(dataFolderPathUsed); + await this.writeConfigFile(config); + return config; + } + } + + async writeConfigFile(config: Config & object): Promise { + const configPath = join( + this.configPath, + this.getConfigFileName(this.configProfile), + ); + + // write config to file as yaml + if (!existsSync(configPath)) { + await promises.writeFile(configPath, '', 'utf8'); + } + const content = await promises.readFile(configPath, 'utf8'); + const currentConfig = yaml.load(content) as Config & object; + const configString = yaml.dump({ + ...currentConfig, + ...config, + }); + await promises.writeFile(configPath, configString, 'utf8'); + } + + private async createFolderIfNotExist(dataFolderPath: string): Promise { + if (!existsSync(dataFolderPath)) { + await promises.mkdir(dataFolderPath, { recursive: true }); + } + + const modelFolderPath = join(dataFolderPath, this.modelFolderName); + const cortexCppFolderPath = join( + dataFolderPath, + this.cortexEnginesFolderName, + ); + const cortexTelemetryFolderPath = join( + dataFolderPath, + this.cortexTelemetryFolderName, + ); + if (!existsSync(modelFolderPath)) { + await promises.mkdir(modelFolderPath, { recursive: true }); + } + if (!existsSync(cortexCppFolderPath)) { + await promises.mkdir(cortexCppFolderPath, { recursive: true }); + } + if (!existsSync(cortexTelemetryFolderPath)) { + await promises.mkdir(cortexTelemetryFolderPath, { recursive: true }); + } + } + + public defaultConfig(): Config { + // default will store at home directory + const homeDir = os.homedir(); + const dataFolderPath = join(homeDir, this.cortexDirectoryName); + + return { + dataFolderPath, + cortexCppHost: defaultCortexCppHost, + cortexCppPort: defaultCortexCppPort, + apiServerHost: defaultCortexJsHost, + apiServerPort: defaultCortexJsPort, + }; + } + + /** + * Get the app data folder path + * Usually it is located at the home directory > cortex + * @returns the path to the data folder + */ + async getDataFolderPath(): Promise { + const config = await this.getConfig(); + return config.dataFolderPath; + } + + async getLastLine( + filePath: string, + ): Promise<{ data: any; position: number }> { + try { + const fileDescriptor = await openAsync(filePath, 'a+'); + const stats = await promises.stat(filePath); + const bufferSize = 1024 * 5; // 5KB + const buffer = Buffer.alloc(bufferSize); + const filePosition = stats.size; + let lastPosition = filePosition; + let data = ''; + let newLineIndex = -1; + + async function readPreviousChunk() { + const readPosition = Math.max(0, lastPosition - bufferSize); + const chunkSize = Math.min(bufferSize, lastPosition); + + const { bytesRead } = await readFileAsync( + fileDescriptor, + buffer, + 0, + chunkSize, + readPosition, + ); + data = buffer.slice(0, bytesRead).toString() + data; + lastPosition -= bufferSize; + newLineIndex = data.lastIndexOf('\n'); + if (newLineIndex === -1 && lastPosition > 0) { + return readPreviousChunk(); + } else { + const lastLine = data.slice(newLineIndex + 1).trim(); + await closeAsync(fileDescriptor); + return { + data: lastLine, + position: readPosition + newLineIndex + 1, + }; + } + } + + return await readPreviousChunk(); + } catch (err) { + //todo: add log level then log error + throw err; + } + } + async modifyLine(filePath: string, modifiedLine: any, position: number) { + try { + const fd = await openAsync(filePath, 'r+'); + const buffer = Buffer.from(modifiedLine, 'utf8'); + await writeAsync(fd, buffer, 0, buffer.length, position); + await closeAsync(fd); + } catch (err) { + //todo: add log level then log error + throw err; + } + } + + async append(filePath: string, data: any) { + try { + const stats = await promises.stat(filePath); + return await promises.appendFile( + filePath, + stats.size === 0 ? data : `\n${data}`, + { + encoding: 'utf8', + flag: 'a+', + }, + ); + } catch (err) { + throw err; + } + } + readLines( + filePath: string, + callback: (line: string) => void, + start: number = 0, + ) { + const fileStream = createReadStream(filePath, { + start, + }); + const rl = createInterface({ + input: fileStream, + crlfDelay: Infinity, + }); + rl.on('line', callback); + } + + /** + * Get the models data folder path + * Usually it is located at the home directory > cortex > models + * @returns the path to the models folder + */ + async getModelsPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.modelFolderName); + } + + /** + * Get the presets data folder path + * Usually it is located at the home directory > cortex > presets + * @returns the path to the presets folder + */ + async getPresetsPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.presetFolderName); + } + + /** + * Get the preset data + * Usually it is located at the home directory > cortex > presets > preset.yaml + * @returns the preset data + */ + async getPreset(preset?: string): Promise { + if (!preset) return undefined; + + const dataFolderPath = await this.getDataFolderPath(); + const presetsFolder = join(dataFolderPath, this.presetFolderName); + if (!existsSync(presetsFolder)) return {}; + + const presetFile = readdirSync(presetsFolder).find( + (file) => + file.toLowerCase() === `${preset?.toLowerCase()}.yaml` || + file.toLowerCase() === `${preset?.toLocaleLowerCase()}.yml`, + ); + if (!presetFile) return {}; + const presetPath = join(presetsFolder, presetFile); + + if (!preset || !existsSync(presetPath)) return {}; + return preset + ? (load(readFileSync(join(presetPath), 'utf-8')) as object) + : {}; + } + + /** + * Get the extensions data folder path + * Usually it is located at the home directory > cortex > extensions + * @returns the path to the extensions folder + */ + async getExtensionsPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.extensionFoldername); + } + + /** + * Get the benchmark folder path + * Usually it is located at the home directory > cortex > extensions + * @returns the path to the benchmark folder + */ + async getBenchmarkPath(): Promise { + const dataFolderPath = await this.getDataFolderPath(); + return join(dataFolderPath, this.benchmarkFoldername); + } + + /** + * Get Cortex CPP engines folder path + * @returns the path to the cortex engines folder + */ + async getCortexCppEnginePath(): Promise { + return join(await this.getDataFolderPath(), this.cortexEnginesFolderName); + } + + /** + * Get log path + * @returns the path to the cortex engines folder + */ + async getLogPath(): Promise { + return join(await this.getDataFolderPath(), 'cortex.log'); + } + + async createFolderIfNotExistInDataFolder(folderName: string): Promise { + const dataFolderPath = await this.getDataFolderPath(); + const folderPath = join(dataFolderPath, folderName); + if (!existsSync(folderPath)) { + await promises.mkdir(folderPath, { recursive: true }); + } + } + + async readFile(filePath: string): Promise { + try { + const isFileExist = existsSync(filePath); + if (!isFileExist) { + return null; + } + const content = await promises.readFile(filePath, { + encoding: 'utf8', + }); + return content; + } catch (error) { + throw error; + } + } + async writeFile(filePath: string, data: any): Promise { + try { + return promises.writeFile(filePath, data, { + encoding: 'utf8', + flag: 'w+', + }); + } catch (error) { + throw error; + } + } + + /** + * Get the cortex server configurations + * It is supposed to be stored in the home directory > .cortexrc + * @returns the server configurations + */ + getServerConfig(): { host: string; port: number } { + const configPath = join( + this.configPath, + this.getConfigFileName(this.configProfile), + ); + let config = this.defaultConfig(); + try { + const content = readFileSync(configPath, 'utf8'); + const currentConfig = (yaml.load(content) as Config & object) ?? {}; + if (currentConfig) { + config = currentConfig; + } + } catch {} + return { + host: config.apiServerHost ?? '127.0.0.1', + port: config.apiServerPort ?? 1337, + }; + } + + public setConfigProfile(profile: string) { + this.configProfile = profile; + } + + public getConfigProfile() { + return this.configProfile; + } + public profileConfigExists(profile: string): boolean { + const configPath = join(this.configPath, this.getConfigFileName(profile)); + try { + const content = readFileSync(configPath, 'utf8'); + const config = yaml.load(content) as Config & object; + return !!config; + } catch { + return false; + } + } + + private getConfigFileName(configProfile: string): string { + if (configProfile === 'default') { + return '.cortexrc'; + } + return `.${configProfile}rc`; + } + + public setConfigPath(configPath: string) { + this.configPath = configPath; + } + + public getConfigPath(): string { + return this.configPath; + } +} + +export const fileManagerService = new FileManagerService(); diff --git a/platform/src/infrastructure/services/resources-manager/resources-manager.module.ts b/platform/src/infrastructure/services/resources-manager/resources-manager.module.ts new file mode 100644 index 000000000..02c43c8bb --- /dev/null +++ b/platform/src/infrastructure/services/resources-manager/resources-manager.module.ts @@ -0,0 +1,8 @@ +import { Module } from '@nestjs/common'; +import { ResourcesManagerService } from './resources-manager.service'; + +@Module({ + providers: [ResourcesManagerService], + exports: [ResourcesManagerService], +}) +export class ResourceManagerModule {} diff --git a/platform/src/infrastructure/services/resources-manager/resources-manager.service.ts b/platform/src/infrastructure/services/resources-manager/resources-manager.service.ts new file mode 100644 index 000000000..190d79779 --- /dev/null +++ b/platform/src/infrastructure/services/resources-manager/resources-manager.service.ts @@ -0,0 +1,35 @@ +import { + ResourceStatus, + UsedMemInfo, +} from '@/domain/models/resource.interface'; +import { getMemoryInformation, MemoryInformation } from '@/utils/system-resource'; +import { Injectable } from '@nestjs/common'; +import si, { Systeminformation } from 'systeminformation'; + +@Injectable() +export class ResourcesManagerService { + async getResourceStatuses(): Promise { + const promises = [si.currentLoad(), getMemoryInformation()]; + const results = await Promise.all(promises); + + const cpuUsage = results[0] as Systeminformation.CurrentLoadData; + const memory = results[1] as MemoryInformation + const memInfo: UsedMemInfo = { + total: memory.total, + used: memory.used, + }; + return { + mem: memInfo, + cpu: { + usage: Number(cpuUsage.currentLoad.toFixed(2)), + }, + gpus: (await si.graphics()).controllers.map((gpu) => ({ + name: gpu.name, + vram: { + total: gpu.vram ?? 0, + used: gpu.memoryUsed ?? 0, + } + })), + }; + } +} diff --git a/platform/src/main.ts b/platform/src/main.ts new file mode 100644 index 000000000..19c3e4d74 --- /dev/null +++ b/platform/src/main.ts @@ -0,0 +1,23 @@ +import { + defaultCortexJsHost, + defaultCortexJsPort, +} from '@/infrastructure/constants/cortex'; +import { getApp } from './app'; + +process.title = 'Cortex API Server'; + +async function bootstrap() { + // getting port from env + const host = process.env.CORTEX_JS_HOST || defaultCortexJsHost; + const port = process.env.CORTEX_JS_PORT || defaultCortexJsPort; + const app = await getApp(host, Number(port)); + try { + await app.listen(port, host); + console.log(`Started server at http://${host}:${port}`); + console.log(`API Playground available at http://${host}:${port}/api`); + } catch (error) { + console.error(`Failed to start server. Is port ${port} in use? ${error}`); + } +} + +bootstrap(); diff --git a/platform/src/usecases/assistants/assistants.module.ts b/platform/src/usecases/assistants/assistants.module.ts new file mode 100644 index 000000000..95849a3e7 --- /dev/null +++ b/platform/src/usecases/assistants/assistants.module.ts @@ -0,0 +1,12 @@ +import { Module } from '@nestjs/common'; +import { AssistantsUsecases } from './assistants.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; + +@Module({ + imports: [DatabaseModule, ModelRepositoryModule], + controllers: [], + providers: [AssistantsUsecases], + exports: [AssistantsUsecases], +}) +export class AssistantsModule {} diff --git a/platform/src/usecases/assistants/assistants.usecases.spec.ts b/platform/src/usecases/assistants/assistants.usecases.spec.ts new file mode 100644 index 000000000..2dc8bb89f --- /dev/null +++ b/platform/src/usecases/assistants/assistants.usecases.spec.ts @@ -0,0 +1,29 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { AssistantsUsecases } from './assistants.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; + +describe('AssistantsService', () => { + let service: AssistantsUsecases; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ModelRepositoryModule, + DownloadManagerModule, + ], + exports: [AssistantsUsecases], + providers: [AssistantsUsecases], + }).compile(); + + service = module.get(AssistantsUsecases); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); +}); diff --git a/platform/src/usecases/assistants/assistants.usecases.ts b/platform/src/usecases/assistants/assistants.usecases.ts new file mode 100644 index 000000000..b3b425468 --- /dev/null +++ b/platform/src/usecases/assistants/assistants.usecases.ts @@ -0,0 +1,96 @@ +import { Inject, Injectable } from '@nestjs/common'; +import { CreateAssistantDto } from '@/infrastructure/dtos/assistants/create-assistant.dto'; +import { Assistant } from '@/domain/models/assistant.interface'; +import { PageDto } from '@/infrastructure/dtos/page.dto'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; +import { Op } from 'sequelize'; +import { AssistantEntity } from '@/infrastructure/entities/assistant.entity'; +import { Repository } from 'sequelize-typescript'; + +@Injectable() +export class AssistantsUsecases { + constructor( + @Inject('ASSISTANT_REPOSITORY') + private readonly assistantRepository: Repository, + private readonly modelRepository: ModelRepository, + ) {} + + async create(createAssistantDto: CreateAssistantDto) { + const { top_p, temperature, model, id } = createAssistantDto; + if (model !== '*') { + const modelEntity = await this.modelRepository.findOne(model); + if (!modelEntity) { + throw new ModelNotFoundException(model); + } + } + + const assistant: Partial = { + ...createAssistantDto, + object: 'assistant', + created_at: Date.now(), + response_format: null, + tool_resources: null, + top_p: top_p ?? null, + temperature: temperature ?? null, + }; + + try { + await this.assistantRepository.create(assistant); + } catch (err) { + throw err; + } + + return this.findOne(id); + } + + async listAssistants( + limit: number, + order: 'asc' | 'desc', + after?: string, + before?: string, + ) { + const normalizedOrder = order === 'asc' ? 'ASC' : 'DESC'; + + const where: any = {}; + if (after) { + where.id = { [Op.gt]: after }; + } + if (before) { + where.id = { [Op.lt]: before }; + } + + const assistants = await this.assistantRepository.findAll({ + where, + order: [['created_at', normalizedOrder]], + limit: limit + 1, + }); + + let hasMore = false; + if (assistants.length > limit) { + hasMore = true; + assistants.pop(); + } + + const firstId = assistants[0]?.id ?? undefined; + const lastId = assistants[assistants.length - 1]?.id ?? undefined; + + return new PageDto(assistants, hasMore, firstId, lastId); + } + + async findAll(): Promise { + return this.assistantRepository.findAll(); + } + + async findOne(id: string) { + return this.assistantRepository.findOne({ + where: { id }, + }); + } + + async remove(id: string) { + return this.assistantRepository.destroy({ + where: { id }, + }); + } +} diff --git a/platform/src/usecases/chat/chat.module.ts b/platform/src/usecases/chat/chat.module.ts new file mode 100644 index 000000000..a09d1fabf --- /dev/null +++ b/platform/src/usecases/chat/chat.module.ts @@ -0,0 +1,25 @@ +import { Module } from '@nestjs/common'; +import { ChatUsecases } from './chat.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; +import { HttpModule } from '@nestjs/axios'; +import { TelemetryModule } from '../telemetry/telemetry.module'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { ModelsModule } from '../models/models.module'; + +@Module({ + imports: [ + DatabaseModule, + ExtensionModule, + ModelRepositoryModule, + HttpModule, + TelemetryModule, + FileManagerModule, + ModelsModule, + ], + controllers: [], + providers: [ChatUsecases], + exports: [ChatUsecases], +}) +export class ChatModule {} diff --git a/platform/src/usecases/chat/chat.usecases.spec.ts b/platform/src/usecases/chat/chat.usecases.spec.ts new file mode 100644 index 000000000..896c5d275 --- /dev/null +++ b/platform/src/usecases/chat/chat.usecases.spec.ts @@ -0,0 +1,40 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ChatUsecases } from './chat.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; +import { TelemetryModule } from '../telemetry/telemetry.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; +import { HttpModule } from '@nestjs/axios'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { ModelsModule } from '../models/models.module'; + +describe('ChatService', () => { + let service: ChatUsecases; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ExtensionModule, + ModelRepositoryModule, + HttpModule, + TelemetryModule, + DownloadManagerModule, + EventEmitterModule.forRoot(), + FileManagerModule, + ModelsModule, + ], + providers: [ChatUsecases], + exports: [ChatUsecases], + }).compile(); + + service = module.get(ChatUsecases); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); +}); diff --git a/platform/src/usecases/chat/chat.usecases.ts b/platform/src/usecases/chat/chat.usecases.ts new file mode 100644 index 000000000..bf38e9cfb --- /dev/null +++ b/platform/src/usecases/chat/chat.usecases.ts @@ -0,0 +1,111 @@ +import { Injectable } from '@nestjs/common'; +import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; +import { EngineExtension } from '@/domain/abstracts/engine.abstract'; +import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; +import { TelemetryUsecases } from '../telemetry/telemetry.usecases'; +import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; +import { ExtensionRepository } from '@/domain/repositories/extension.interface'; +import { firstValueFrom } from 'rxjs'; +import { HttpService } from '@nestjs/axios'; +import { + CORTEX_CPP_EMBEDDINGS_URL, + defaultEmbeddingModel, +} from '@/infrastructure/constants/cortex'; +import { CreateEmbeddingsDto } from '@/infrastructure/dtos/embeddings/embeddings-request.dto'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; +import { ModelsUsecases } from '../models/models.usecases'; +import { isRemoteEngine } from '@/utils/normalize-model-id'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +@Injectable() +export class ChatUsecases { + constructor( + private readonly extensionRepository: ExtensionRepository, + private readonly telemetryUseCases: TelemetryUsecases, + private readonly modelsUsescases: ModelsUsecases, + private readonly httpService: HttpService, + ) {} + + async inference( + createChatDto: CreateChatCompletionDto, + headers: Record, + ): Promise { + const { model: modelId } = createChatDto; + const model = await this.modelsUsescases.findOne(modelId); + if (!model) { + throw new ModelNotFoundException(modelId); + } + + const isModelRunning = await this.modelsUsescases.isModelRunning(modelId); + // If model is not running + // Start the model + if (!isModelRunning) { + await this.modelsUsescases.startModel(modelId); + } + + const engine = (await this.extensionRepository.findOne( + model!.engine ?? Engines.llamaCPP, + )) as EngineExtension | undefined; + if (engine == null) { + throw new Error(`No engine found with name: ${model.engine}`); + } + const payload = { + ...createChatDto, + ...(model.engine && + !isRemoteEngine(model.engine) && { engine: model.engine }), + }; + try { + return await engine.inference(payload, headers); + } catch (error) { + await this.telemetryUseCases.createCrashReport( + error, + TelemetrySource.CORTEX_CPP, + ); + throw error; + } + } + + /** + * Creates an embedding vector representing the input text. + * @param model Embedding model ID. + * @param input Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. + * @param encoding_format Encoding format for the embeddings. Supported formats are 'float' and 'int'. + * @param host Cortex CPP host. + * @param port Cortex CPP port. + * @returns Embedding vector. + */ + async embeddings(dto: CreateEmbeddingsDto) { + const modelId = dto.model ?? defaultEmbeddingModel; + + if (modelId !== dto.model) dto = { ...dto, model: modelId }; + + if (!(await this.modelsUsescases.findOne(modelId))) { + await this.modelsUsescases.pullModel(modelId); + } + + const isModelRunning = await this.modelsUsescases.isModelRunning(modelId); + // If model is not running + // Start the model + if (!isModelRunning) { + await this.modelsUsescases.startModel(modelId, { + embedding: true, + model_type: 'embedding', + }); + } + + const configs = await fileManagerService.getConfig(); + + return firstValueFrom( + this.httpService.post( + CORTEX_CPP_EMBEDDINGS_URL(configs.cortexCppHost, configs.cortexCppPort), + dto, + { + headers: { + 'Content-Type': 'application/json', + 'Accept-Encoding': 'gzip', + }, + }, + ), + ).then((res) => res.data); + } +} diff --git a/platform/src/usecases/chat/exception/invalid-api-url.exception.ts b/platform/src/usecases/chat/exception/invalid-api-url.exception.ts new file mode 100644 index 000000000..958d5d87c --- /dev/null +++ b/platform/src/usecases/chat/exception/invalid-api-url.exception.ts @@ -0,0 +1,10 @@ +import { HttpException, HttpStatus } from '@nestjs/common'; + +export class InvalidApiUrlException extends HttpException { + constructor(modelId: string, apiUrl: string | undefined) { + super( + `Remote model ${modelId} has configured api url ${apiUrl} is not valid!`, + HttpStatus.INTERNAL_SERVER_ERROR, + ); + } +} diff --git a/platform/src/usecases/configs/configs.module.ts b/platform/src/usecases/configs/configs.module.ts new file mode 100644 index 000000000..afa4bca2f --- /dev/null +++ b/platform/src/usecases/configs/configs.module.ts @@ -0,0 +1,11 @@ +import { Module } from '@nestjs/common'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { ConfigsUsecases } from './configs.usecase'; + +@Module({ + imports: [FileManagerModule], + controllers: [], + providers: [ConfigsUsecases], + exports: [ConfigsUsecases], +}) +export class ConfigsModule {} diff --git a/platform/src/usecases/configs/configs.usecase.ts b/platform/src/usecases/configs/configs.usecase.ts new file mode 100644 index 000000000..0332fc1ce --- /dev/null +++ b/platform/src/usecases/configs/configs.usecase.ts @@ -0,0 +1,84 @@ +import { CommonResponseDto } from '@/infrastructure/dtos/common/common-response.dto'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { Injectable } from '@nestjs/common'; +import { EventEmitter2 } from '@nestjs/event-emitter'; + +@Injectable() +export class ConfigsUsecases { + constructor(private readonly eventEmitter: EventEmitter2) {} + + /** + * Save a configuration to the .cortexrc file. + * @param key Configuration Key + * @param engine The engine where the configs belongs + */ + async saveConfig( + key: string, + value: string, + engine?: string, + ): Promise { + const configs = await fileManagerService.getConfig(); + + const groupConfigs = configs[ + engine as keyof typeof configs + ] as unknown as object; + const newConfigs = { + ...configs, + ...(engine + ? { + [engine]: { + ...groupConfigs, + [key]: value, + }, + } + : {}), + }; + + return fileManagerService + .writeConfigFile(newConfigs) + .then(async () => { + if (engine) { + this.eventEmitter.emit('config.updated', { + engine, + key, + value, + }); + } + }) + .then(() => { + return { + message: 'The config has been successfully updated.', + }; + }); + } + + /** + * Get the configurations of a group. + * @param group + * @returns + */ + async getGroupConfigs(group: string) { + const configs = await fileManagerService.getConfig(); + return configs[group as keyof typeof configs] as unknown as object; + } + + /** + * Get all available configurations. + * @param group + * @returns + */ + async getConfigs() { + return fileManagerService.getConfig(); + } + + /** + * Get the configuration with given key + * @param group + * @param key + * @returns + */ + async getKeyConfig(key: string) { + const configs = await fileManagerService.getConfig(); + return configs[key as keyof typeof configs]; + } +} diff --git a/platform/src/usecases/cortex/cortex.module.ts b/platform/src/usecases/cortex/cortex.module.ts new file mode 100644 index 000000000..3b1107ae2 --- /dev/null +++ b/platform/src/usecases/cortex/cortex.module.ts @@ -0,0 +1,11 @@ +import { Module } from '@nestjs/common'; +import { CortexUsecases } from './cortex.usecases'; +import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; + +@Module({ + imports: [HttpModule, FileManagerModule], + providers: [CortexUsecases], + exports: [CortexUsecases], +}) +export class CortexModule {} diff --git a/platform/src/usecases/cortex/cortex.usecases.spec.ts b/platform/src/usecases/cortex/cortex.usecases.spec.ts new file mode 100644 index 000000000..08f584dac --- /dev/null +++ b/platform/src/usecases/cortex/cortex.usecases.spec.ts @@ -0,0 +1,23 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { CortexUsecases } from './cortex.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; + +describe('CortexUsecases', () => { + let cortexUsecases: CortexUsecases; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [DatabaseModule, HttpModule, FileManagerModule], + providers: [CortexUsecases], + exports: [], + }).compile(); + + cortexUsecases = module.get(CortexUsecases); + }); + + it('should be defined', () => { + expect(cortexUsecases).toBeDefined(); + }); +}); diff --git a/platform/src/usecases/cortex/cortex.usecases.ts b/platform/src/usecases/cortex/cortex.usecases.ts new file mode 100644 index 000000000..ac904da53 --- /dev/null +++ b/platform/src/usecases/cortex/cortex.usecases.ts @@ -0,0 +1,254 @@ +import { + BeforeApplicationShutdown, + HttpStatus, + Injectable, +} from '@nestjs/common'; +import { ChildProcess, fork } from 'child_process'; +import { delimiter, join } from 'path'; +import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; +import { HttpService } from '@nestjs/axios'; + +import { firstValueFrom } from 'rxjs'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { + CORTEX_CPP_HEALTH_Z_URL, + CORTEX_CPP_PROCESS_DESTROY_URL, + CORTEX_CPP_UNLOAD_ENGINE_URL, + CORTEX_JS_SYSTEM_URL, + defaultCortexJsHost, + defaultCortexJsPort, +} from '@/infrastructure/constants/cortex'; +import { openSync } from 'fs'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; + +@Injectable() +export class CortexUsecases implements BeforeApplicationShutdown { + private cortexProcess: ChildProcess | undefined; + + constructor(private readonly httpService: HttpService) {} + + /** + * Start the Cortex CPP process + * @param attach + * @returns + */ + async startCortex(): Promise { + const configs = await fileManagerService.getConfig(); + const host = configs.cortexCppHost; + const port = configs.cortexCppPort; + if (this.cortexProcess || (await this.healthCheck(host, port))) { + return { + message: 'Cortex is already running', + status: 'success', + }; + } + + const engineDir = await fileManagerService.getCortexCppEnginePath(); + const dataFolderPath = await fileManagerService.getDataFolderPath(); + + const writer = openSync(await fileManagerService.getLogPath(), 'a+'); + + // Attempt to stop the process if it's already running + await this.stopCortex(); + // go up one level to get the binary folder, have to also work on windows + this.cortexProcess = fork(join(__dirname, './../../utils/cortex-cpp'), [], { + detached: true, + cwd: dataFolderPath, + stdio: [0, writer, writer, 'ipc'], + env: { + ...process.env, + CUDA_VISIBLE_DEVICES: '0', + ENGINE_PATH: dataFolderPath, + PATH: (process.env.PATH || '').concat(delimiter, engineDir), + LD_LIBRARY_PATH: (process.env.LD_LIBRARY_PATH || '').concat( + delimiter, + engineDir, + ), + CORTEX_CPP_PORT: port.toString(), + // // Vulkan - Support 1 device at a time for now + // ...(executableOptions.vkVisibleDevices?.length > 0 && { + // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], + // }), + }, + }); + this.cortexProcess.unref(); + + // Handle process exit + this.cortexProcess.on('close', (code) => { + this.cortexProcess = undefined; + console.log(`child process exited with code ${code}`); + }); + + // Await for the /healthz status ok + return new Promise((resolve, reject) => { + const interval = setInterval(() => { + this.healthCheck(host, port) + .then(() => { + clearInterval(interval); + resolve({ + message: 'Cortex started successfully', + status: 'success', + }); + }) + .catch(reject); + }, 1000); + }).then((res) => { + fileManagerService.writeConfigFile({ + ...configs, + cortexCppHost: host, + cortexCppPort: port, + }); + return res; + }); + } + + /** + * Stop the Cortex CPP process + */ + async stopCortex(): Promise { + const configs = await fileManagerService.getConfig(); + try { + await firstValueFrom( + this.httpService.delete( + CORTEX_CPP_PROCESS_DESTROY_URL( + configs.cortexCppHost, + configs.cortexCppPort, + ), + ), + ); + } finally { + this.cortexProcess?.kill(); + return { + message: 'Cortex stopped successfully', + status: 'success', + }; + } + } + + /** + * Unload the engine + */ + async unloadCortexEngine(engine: Engines): Promise { + const configs = await fileManagerService.getConfig(); + try { + await firstValueFrom( + this.httpService.post( + CORTEX_CPP_UNLOAD_ENGINE_URL( + configs.cortexCppHost, + configs.cortexCppPort, + ), + ), + ); + } finally { + return { + message: `${engine} unloaded successfully`, + status: 'success', + }; + } + } + + + /** + * Check whether the Cortex CPP is healthy + * @param host + * @param port + * @returns + */ + async healthCheck(host: string, port: number): Promise { + return fetch(CORTEX_CPP_HEALTH_Z_URL(host, port)) + .then((res) => { + if (res.ok) { + return true; + } + return false; + }) + .catch(() => false); + } + + /** + * start the API server in detached mode + */ + async startServerDetached(host: string, port: number) { + const writer = openSync(await fileManagerService.getLogPath(), 'a+'); + const server = fork(join(__dirname, './../../main.js'), [], { + detached: true, + stdio: ['ignore', writer, writer, 'ipc'], + env: { + CORTEX_JS_HOST: host, + CORTEX_JS_PORT: port.toString(), + CORTEX_PROFILE: fileManagerService.getConfigProfile(), + CORTEX_CONFIG_PATH: fileManagerService.getConfigPath(), + ...process.env, + }, + }); + server.disconnect(); // closes the IPC channel + server.unref(); + // Await for the /healthz status ok + return new Promise((resolve, reject) => { + const TIMEOUT = 10 * 1000; + const timeout = setTimeout(() => { + clearInterval(interval); + clearTimeout(timeout); + reject(); + }, TIMEOUT); + const interval = setInterval(() => { + this.isAPIServerOnline(host, port) + .then((result) => { + if (result) { + clearInterval(interval); + clearTimeout(timeout); + resolve(true); + } + }) + .catch(reject); + }, 1000); + }); + } + /** + * Check if the Cortex API server is online + * @returns + */ + + async isAPIServerOnline(host?: string, port?: number): Promise { + const { + apiServerHost: configApiServerHost, + apiServerPort: configApiServerPort, + } = await fileManagerService.getConfig(); + // for backward compatibility, we didn't have the apiServerHost and apiServerPort in the config file in the past + const apiServerHost = host || configApiServerHost || defaultCortexJsHost; + const apiServerPort = port || configApiServerPort || defaultCortexJsPort; + return firstValueFrom( + this.httpService.get(CORTEX_JS_SYSTEM_URL(apiServerHost, apiServerPort)), + ) + .then((res) => res.status === HttpStatus.OK) + .catch(() => false); + } + + async stopApiServer() { + const { + apiServerHost: configApiServerHost, + apiServerPort: configApiServerPort, + } = await fileManagerService.getConfig(); + + // for backward compatibility, we didn't have the apiServerHost and apiServerPort in the config file in the past + const apiServerHost = configApiServerHost || defaultCortexJsHost; + const apiServerPort = configApiServerPort || defaultCortexJsPort; + return fetch(CORTEX_JS_SYSTEM_URL(apiServerHost, apiServerPort), { + method: 'DELETE', + }).catch(() => {}); + } + + async updateApiServerConfig(host: string, port: number) { + const config = await fileManagerService.getConfig(); + await fileManagerService.writeConfigFile({ + ...config, + cortexCppHost: host, + cortexCppPort: port, + }); + } + + async beforeApplicationShutdown(signal: string) { + console.log(`Received ${signal}, performing pre-shutdown tasks.`); + await this.stopCortex(); + } +} diff --git a/platform/src/usecases/engines/engines.module.ts b/platform/src/usecases/engines/engines.module.ts new file mode 100644 index 000000000..5ee607a8c --- /dev/null +++ b/platform/src/usecases/engines/engines.module.ts @@ -0,0 +1,22 @@ +import { Module } from '@nestjs/common'; +import { ConfigsModule } from '../configs/configs.module'; +import { EnginesUsecases } from './engines.usecase'; +import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; +import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; + +@Module({ + imports: [ + ConfigsModule, + ExtensionModule, + HttpModule, + FileManagerModule, + DownloadManagerModule, + ConfigsModule, + ], + controllers: [], + providers: [EnginesUsecases], + exports: [EnginesUsecases], +}) +export class EnginesModule {} diff --git a/platform/src/usecases/engines/engines.usecase.ts b/platform/src/usecases/engines/engines.usecase.ts new file mode 100644 index 000000000..ac0b22383 --- /dev/null +++ b/platform/src/usecases/engines/engines.usecase.ts @@ -0,0 +1,305 @@ +import { isEmpty } from 'lodash'; +import { ForbiddenException, Injectable } from '@nestjs/common'; +import { ExtensionRepository } from '@/domain/repositories/extension.interface'; +import { cpSync, existsSync, mkdirSync, readdirSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { HttpService } from '@nestjs/axios'; +import decompress from 'decompress'; +import { exit } from 'node:process'; +import { InitOptions } from '@commanders/types/init-options.interface'; +import { firstValueFrom } from 'rxjs'; +import { rm } from 'fs/promises'; +import { + CORTEX_ENGINE_RELEASES_URL, + CUDA_DOWNLOAD_URL, + MIN_CUDA_VERSION, +} from '@/infrastructure/constants/cortex'; + +import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; +import { DownloadType } from '@/domain/models/download.interface'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; +import { CommonResponseDto } from '@/infrastructure/dtos/common/common-response.dto'; +import { EngineStatus } from '@/domain/abstracts/engine.abstract'; +import { ConfigsUsecases } from '../configs/configs.usecase'; +import { defaultInstallationOptions } from '@/utils/init'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +@Injectable() +export class EnginesUsecases { + constructor( + private readonly httpService: HttpService, + private readonly downloadManagerService: DownloadManagerService, + private readonly extensionRepository: ExtensionRepository, + private readonly configsUsecases: ConfigsUsecases, + ) {} + + /** + * Get the engines + * @returns Cortex supported Engines + */ + async getEngines() { + return (await this.extensionRepository.findAll()).map((engine) => ({ + name: engine.name, + description: engine.description, + version: engine.version, + productName: engine.productName, + status: engine.status?.toLowerCase(), + })); + } + + /** + * Get the engine with the given name + * @param name Engine name + * @returns Engine + */ + async getEngine(name: string) { + return this.extensionRepository.findOne(name).then((engine) => + engine + ? { + name: engine.name, + description: engine.description, + version: engine.version, + productName: engine.productName, + status: engine.status?.toLowerCase(), + } + : undefined, + ); + } + + /** + * Install Engine and Dependencies with given options + * @param engineFileName + * @param version + */ + installEngine = async ( + options?: InitOptions, + engine: string = 'default', + force: boolean = false, + ): Promise => { + // Use default option if not defined + + if ((!options || isEmpty(options)) && engine === Engines.llamaCPP) { + options = await defaultInstallationOptions(); + } + const installPackages = []; + // Ship Llama.cpp engine by default + if ( + !existsSync( + join(await fileManagerService.getCortexCppEnginePath(), engine), + ) || + force + ) { + const isVulkan = + engine === Engines.llamaCPP && + (options?.vulkan || + (options?.runMode === 'GPU' && options?.gpuType !== 'Nvidia')); + const platformMatcher = + process.platform === 'win32' + ? '-windows' + : process.platform === 'darwin' + ? '-mac' + : '-linux'; + installPackages.push( + this.installAcceleratedEngine(options?.version ?? 'latest', engine, [ + platformMatcher, + // CPU Instructions - CPU | GPU Non-Vulkan + !isVulkan && engine === Engines.llamaCPP && platformMatcher !== '-mac' + ? `-noavx` + : '', + // Cuda + options?.runMode === 'GPU' && + options?.gpuType === 'Nvidia' && + !isVulkan + ? `cuda-${options.cudaVersion ?? '12'}` + : '', + // Vulkan + isVulkan ? '-vulkan' : '', + + // Arch + engine !== Engines.tensorrtLLM + ? process.arch === 'arm64' + ? '-arm64' + : '-amd64' + : '', + ]), + ); + } + + if ( + engine === Engines.tensorrtLLM || + (engine === Engines.llamaCPP && + options?.runMode === 'GPU' && + options?.gpuType === 'Nvidia' && + !options?.vulkan) + ) + installPackages.push( + this.installCudaToolkitDependency( + engine === Engines.tensorrtLLM + ? MIN_CUDA_VERSION + : options?.cudaVersion, + ), + ); + await Promise.all(installPackages); + // Update states + await this.extensionRepository.findOne(engine).then((e) => { + if (e) e.status = EngineStatus.READY; + }); + }; + + /** + * Update engine's configurations + * @param config Configuration Key + * @param value Configuration Value + * @param engine Configuration Group where the key belongs + */ + async updateConfigs( + config: string, + value: string, + engine: string, + ): Promise { + if (!engine || !(await this.extensionRepository.findOne(engine))) + throw new ForbiddenException('Engine not found'); + + return this.configsUsecases + .saveConfig(config, value, engine) + .then((res) => { + this.extensionRepository.findOne(engine).then((e) => { + if (e && value) e.status = EngineStatus.READY; + }); + return res; + }); + } + + /** + * Get the configurations of an engine. + * @param engine + * @returns + */ + async getEngineConfigs(engine: string) { + if (!engine || !(await this.extensionRepository.findOne(engine))) + throw new ForbiddenException('Engine not found'); + return this.configsUsecases.getGroupConfigs(engine); + } + + /** + * Install CUDA Toolkit dependency (dll/so files) + * @param options + */ + private installCudaToolkitDependency = async (cudaVersion?: string) => { + const platform = process.platform === 'win32' ? 'windows' : 'linux'; + + const dataFolderPath = await fileManagerService.getDataFolderPath(); + const url = CUDA_DOWNLOAD_URL.replace( + '', + cudaVersion === '11' ? '11.7' : MIN_CUDA_VERSION, + ).replace('', platform); + const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz'); + + console.log('Downloading CUDA Toolkit dependency...'); + + await this.downloadManagerService.submitDownloadRequest( + url, + 'Cuda Toolkit Dependencies', + DownloadType.Engine, + { [url]: { destination } }, + async () => { + try { + await decompress( + destination, + await fileManagerService.getCortexCppEnginePath(), + ); + } catch (e) { + console.log(e); + exit(1); + } + await rm(destination, { force: true }); + }, + ); + }; + + /** + * Download and install accelerated engine + * @param version + * @param engineFileName + */ + private async installAcceleratedEngine( + version: string = 'latest', + engine: string = Engines.llamaCPP, + matchers: string[] = [], + ) { + const res = await firstValueFrom( + this.httpService.get( + CORTEX_ENGINE_RELEASES_URL(engine) + + `${version === 'latest' ? '/latest' : ''}`, + { + headers: { + 'X-GitHub-Api-Version': '2022-11-28', + Accept: 'application/vnd.github+json', + }, + }, + ), + ); + + if (!res?.data) { + console.log('Failed to fetch releases'); + exit(1); + } + + let release = res?.data; + if (Array.isArray(res?.data)) { + release = Array(res?.data)[0].find( + (e) => e.name === version.replace('v', ''), + ); + } + // Find the asset for the current platform + const toDownloadAsset = release.assets + .sort((a: any, b: any) => a.name.length - b.name.length) + .find((asset: any) => + matchers.every((matcher) => asset.name.includes(matcher)), + ); + + console.log('Downloading ', toDownloadAsset.name); + + if (!toDownloadAsset) { + console.log( + `Could not find engine file for platform ${process.platform}`, + ); + } + + const engineDir = await fileManagerService.getCortexCppEnginePath(); + + if (!existsSync(engineDir)) mkdirSync(engineDir, { recursive: true }); + + const destination = join(engineDir, toDownloadAsset.name); + + await this.downloadManagerService.submitDownloadRequest( + toDownloadAsset.browser_download_url, + engine, + DownloadType.Engine, + { [toDownloadAsset.browser_download_url]: { destination } }, + // On completed - post processing + async () => { + try { + await decompress(destination, engineDir); + } catch (e) { + console.error('Error decompressing file', e); + exit(1); + } + await rm(destination, { force: true }); + + // Copy the additional files to the cortex-cpp directory + for (const file of readdirSync(join(engineDir, engine))) { + if (!file.includes('engine')) { + cpSync(join(engineDir, engine, file), join(engineDir, file)); + } + } + + writeFileSync( + join(engineDir, engine, 'version.txt'), + release.tag_name.replace(/^v/, ''), + 'utf-8', + ); + }, + ); + } +} diff --git a/platform/src/usecases/messages/messages.module.ts b/platform/src/usecases/messages/messages.module.ts new file mode 100644 index 000000000..ab759dc81 --- /dev/null +++ b/platform/src/usecases/messages/messages.module.ts @@ -0,0 +1,11 @@ +import { Module } from '@nestjs/common'; +import { MessagesUsecases } from './messages.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; + +@Module({ + imports: [DatabaseModule], + controllers: [], + providers: [MessagesUsecases], + exports: [MessagesUsecases], +}) +export class MessagesModule {} diff --git a/platform/src/usecases/messages/messages.usecases.spec.ts b/platform/src/usecases/messages/messages.usecases.spec.ts new file mode 100644 index 000000000..ad671d3f3 --- /dev/null +++ b/platform/src/usecases/messages/messages.usecases.spec.ts @@ -0,0 +1,21 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { MessagesUsecases } from './messages.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; + +describe('MessagesService', () => { + let service: MessagesUsecases; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [DatabaseModule], + providers: [MessagesUsecases], + exports: [MessagesUsecases], + }).compile(); + + service = module.get(MessagesUsecases); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); +}); diff --git a/platform/src/usecases/messages/messages.usecases.ts b/platform/src/usecases/messages/messages.usecases.ts new file mode 100644 index 000000000..2e4fab8d8 --- /dev/null +++ b/platform/src/usecases/messages/messages.usecases.ts @@ -0,0 +1,70 @@ +import { Inject, Injectable } from '@nestjs/common'; +import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto'; +import { UpdateMessageDto } from '@/infrastructure/dtos/messages/update-message.dto'; +import { ulid } from 'ulid'; +import { MessageEntity } from '@/infrastructure/entities/message.entity'; +import { Message } from '@/domain/models/message.interface'; +import { Repository } from 'sequelize-typescript'; + +@Injectable() +export class MessagesUsecases { + constructor( + @Inject('MESSAGE_REPOSITORY') + private messageRepository: Repository, + ) {} + + async create(createMessageDto: CreateMessageDto) { + const { assistant_id } = createMessageDto; + const message: Partial = { + ...createMessageDto, + id: ulid(), + created_at: Date.now(), + object: 'thread.message', + run_id: null, + completed_at: null, + incomplete_details: null, + attachments: [], + incomplete_at: null, + metadata: undefined, + assistant_id: assistant_id ?? null, + }; + return this.messageRepository.create(message); + } + + async findAll() { + return this.messageRepository.findAll(); + } + + async findOne(id: string) { + return this.messageRepository.findOne({ + where: { + id, + }, + }); + } + + async update(id: string, updateMessageDto: UpdateMessageDto) { + const [numberOfAffectedRows, [updatedMessage]] = + await this.messageRepository.update(updateMessageDto, { + where: { id }, + returning: true, + }); + return { numberOfAffectedRows, updatedMessage }; + } + + async remove(id: string) { + return this.messageRepository.destroy({ + where: { id }, + }); + } + + async getLastMessagesByThread(threadId: string, limit: number) { + return this.messageRepository.findAll({ + where: { + thread_id: threadId, + }, + order: [['created_at', 'DESC']], + limit: limit, + }); + } +} diff --git a/platform/src/usecases/models/models.module.ts b/platform/src/usecases/models/models.module.ts new file mode 100644 index 000000000..e5972941b --- /dev/null +++ b/platform/src/usecases/models/models.module.ts @@ -0,0 +1,29 @@ +import { Module } from '@nestjs/common'; +import { ModelsUsecases } from './models.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { CortexModule } from '@/usecases/cortex/cortex.module'; +import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; +import { HttpModule } from '@nestjs/axios'; +import { TelemetryModule } from '../telemetry/telemetry.module'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { ContextModule } from '@/infrastructure/services/context/context.module'; + +@Module({ + imports: [ + DatabaseModule, + CortexModule, + ExtensionModule, + HttpModule, + FileManagerModule, + TelemetryModule, + ContextModule, + ModelRepositoryModule, + DownloadManagerModule, + ], + controllers: [], + providers: [ModelsUsecases], + exports: [ModelsUsecases], +}) +export class ModelsModule {} diff --git a/platform/src/usecases/models/models.usecases.spec.ts b/platform/src/usecases/models/models.usecases.spec.ts new file mode 100644 index 000000000..ab6a13d09 --- /dev/null +++ b/platform/src/usecases/models/models.usecases.spec.ts @@ -0,0 +1,46 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ModelsUsecases } from './models.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; +import { ModelsModule } from './models.module'; +import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { HttpModule } from '@nestjs/axios'; +import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EventEmitterModule } from '@nestjs/event-emitter'; +import { TelemetryModule } from '../telemetry/telemetry.module'; +import { ContextModule } from '@/infrastructure/services/context/context.module'; +import { CortexModule } from '../cortex/cortex.module'; + +describe('ModelsService', () => { + let service: ModelsUsecases; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [ + EventEmitterModule.forRoot(), + DatabaseModule, + ModelsModule, + ExtensionModule, + FileManagerModule, + DownloadManagerModule, + HttpModule, + ModelRepositoryModule, + DownloadManagerModule, + EventEmitterModule.forRoot(), + TelemetryModule, + TelemetryModule, + ContextModule, + CortexModule, + ], + providers: [ModelsUsecases], + exports: [ModelsUsecases], + }).compile(); + + service = module.get(ModelsUsecases); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); +}); diff --git a/platform/src/usecases/models/models.usecases.ts b/platform/src/usecases/models/models.usecases.ts new file mode 100644 index 000000000..e5a9a4b37 --- /dev/null +++ b/platform/src/usecases/models/models.usecases.ts @@ -0,0 +1,574 @@ +import ora from 'ora'; +import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; +import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; +import { BadRequestException, Injectable } from '@nestjs/common'; +import { Model, ModelSettingParams } from '@/domain/models/model.interface'; +import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; +import { basename, join, parse } from 'path'; +import { promises, existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; +import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; +import { ExtensionRepository } from '@/domain/repositories/extension.interface'; +import { EngineExtension } from '@/domain/abstracts/engine.abstract'; +import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { AxiosError } from 'axios'; +import { TelemetryUsecases } from '../telemetry/telemetry.usecases'; +import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; +import { ModelRepository } from '@/domain/repositories/model.interface'; +import { ModelParameterParser } from '@/utils/model-parameter.parser'; +import { HuggingFaceRepoSibling } from '@/domain/models/huggingface.interface'; +import { fetchJanRepoData, getHFModelMetadata } from '@/utils/huggingface'; +import { + DownloadStatus, + DownloadType, +} from '@/domain/models/download.interface'; +import { EventEmitter2 } from '@nestjs/event-emitter'; +import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event'; +import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; +import { load } from 'js-yaml'; +import { llamaModelFile } from '@/utils/app-path'; +import { CortexUsecases } from '../cortex/cortex.usecases'; +import { isLocalFile } from '@/utils/urls'; + +@Injectable() +export class ModelsUsecases { + constructor( + private readonly modelRepository: ModelRepository, + private readonly cortexUsecases: CortexUsecases, + private readonly extensionRepository: ExtensionRepository, + private readonly downloadManagerService: DownloadManagerService, + private readonly telemetryUseCases: TelemetryUsecases, + private readonly contextService: ContextService, + private readonly eventEmitter: EventEmitter2, + ) {} + + private activeModelStatuses: Record = {}; + + /** + * Create a new model + * @param createModelDto Model data + */ + async create(createModelDto: CreateModelDto) { + const { model: modelId, owned_by } = createModelDto; + const model: Model = { + ...createModelDto, + id: modelId, + created: Date.now(), + object: 'model', + owned_by: owned_by ?? '', + }; + + await this.modelRepository.create(model); + } + + /** + * Find all models + * @returns Models + */ + async findAll(): Promise { + return this.modelRepository.findAll(); + } + + /** + * Find a model by ID + * @param model Model ID + * @returns Model + */ + async findOne(model: string) { + this.contextService.set('modelId', model); + return this.modelRepository.findOne(model); + } + + /** + * Get a model by ID or throw an exception + * @param id Model ID + * @returns Model + */ + async getModelOrThrow(id: string): Promise { + const model = await this.findOne(id); + if (!model) { + throw new ModelNotFoundException(id); + } + return model; + } + + /** + * Update a model by ID + * @param id Model ID + * @param updateModelDto Model data to update + * @returns Model update status + */ + update(id: string, updateModelDto: UpdateModelDto) { + return this.modelRepository.update(id, updateModelDto); + } + + /** + * Remove a model by ID + * @param id Model ID + * @returns Model removal status + */ + async remove(id: string) { + const modelsContainerDir = await fileManagerService.getModelsPath(); + if (!existsSync(modelsContainerDir)) { + return; + } + + const modelFolder = join(modelsContainerDir, normalizeModelId(id)); + const model = await this.getModelOrThrow(id); + const engine = (await this.extensionRepository.findOne( + model!.engine ?? Engines.llamaCPP, + )) as EngineExtension | undefined; + + if (engine) { + await engine + .unloadModel(id, model.engine || Engines.llamaCPP) + .catch(() => {}); // Silent fail + } + return this.modelRepository + .remove(id) + .then( + () => + existsSync(modelFolder) && rmSync(modelFolder, { recursive: true }), + ) + .then(() => { + const modelEvent: ModelEvent = { + model: id, + event: 'model-deleted', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); + }) + .then(() => { + return { + message: 'Model removed successfully', + modelId: id, + }; + }); + } + + /** + * Start a model by ID + * @param modelId Model ID + * @param settings Model settings + * @returns Model start status + */ + async startModel( + modelId: string, + settings?: ModelSettingParams, + filePath?: string, + metadataPath?: string, + ): Promise { + let model: Model | null; + if (filePath) { + model = await this.modelRepository.loadModelByFile( + modelId, + metadataPath!, + filePath, + ); + if (!existsSync(filePath) || !model) { + throw new ModelNotFoundException(model?.id ?? filePath); + } + } else { + model = await this.getModelOrThrow(modelId); + } + const engine = (await this.extensionRepository.findOne( + model!.engine ?? Engines.llamaCPP, + )) as EngineExtension | undefined; + + if (!engine) { + return { + message: 'No extension handler found for model', + modelId, + }; + } + + // Attempt to start cortex + await this.cortexUsecases.startCortex(); + + const loadingModelSpinner = ora('Loading model...').start(); + // update states and emitting event + this.activeModelStatuses[modelId] = { + model: modelId, + status: 'starting', + metadata: {}, + }; + const modelEvent: ModelEvent = { + model: modelId, + event: 'starting', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); + + const parser = new ModelParameterParser(); + const loadModelSettings: ModelSettingParams = { + // Default settings + ctx_len: 4096, + ngl: 100, + //TODO: Utils for model file retrieval + ...(model?.files && + Array.isArray(model.files) && + !('llama_model_path' in model) && { + llama_model_path: (model.files as string[])[0], + model_path: (model.files as string[])[0], + }), + engine: model.engine ?? Engines.llamaCPP, + // User / Model settings + ...parser.parseModelEngineSettings(model), + ...parser.parseModelEngineSettings(settings ?? {}), + }; + return engine + .loadModel(model, loadModelSettings) + .catch((e) => { + // Skip model already loaded error + if (e.code === AxiosError.ERR_BAD_REQUEST) return; + else throw e; + }) + .then(() => { + this.activeModelStatuses[modelId] = { + model: modelId, + status: 'started', + metadata: {}, + }; + const modelEvent: ModelEvent = { + model: modelId, + event: 'started', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); + }) + .then(() => { + loadingModelSpinner.succeed('Model loaded'); + return { + message: 'Model loaded successfully', + modelId, + }; + }) + .catch(async (e) => { + // remove the model from this.activeModelStatus. + delete this.activeModelStatuses[modelId]; + const modelEvent: ModelEvent = { + model: modelId, + event: 'starting-failed', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); + loadingModelSpinner.fail('Model loading failed'); + await this.telemetryUseCases.createCrashReport( + e, + TelemetrySource.CORTEX_CPP, + ); + throw { + message: e.message, + modelId, + }; + }); + } + + /** + * Stop a running model + * @param modelId Model Identifier + * @returns Model stop status + */ + async stopModel(modelId: string): Promise { + const model = await this.getModelOrThrow(modelId); + const engine = (await this.extensionRepository.findOne( + model!.engine ?? Engines.llamaCPP, + )) as EngineExtension | undefined; + + if (!engine) { + return { + message: 'No extension handler found for model', + modelId, + }; + } + + this.activeModelStatuses[modelId] = { + model: modelId, + status: 'stopping', + metadata: {}, + }; + const modelEvent: ModelEvent = { + model: modelId, + event: 'stopping', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); + + return engine + .unloadModel(modelId, model.engine || Engines.llamaCPP) + .catch((e) => { + // Skip model already unloaded error + if (e.code === AxiosError.ERR_BAD_REQUEST) return; + else throw e; + }) + .then(() => { + delete this.activeModelStatuses[modelId]; + const modelEvent: ModelEvent = { + model: modelId, + event: 'stopped', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); + }) + .then(() => ({ + message: 'Model is stopped', + modelId, + })) + .catch(async (e) => { + const modelEvent: ModelEvent = { + model: modelId, + event: 'stopping-failed', + metadata: {}, + }; + this.eventEmitter.emit('model.event', modelEvent); + await this.telemetryUseCases.createCrashReport( + e, + TelemetrySource.CORTEX_CPP, + ); + return { + message: e.message, + modelId, + }; + }); + } + + /** + * Abort a download + * @param downloadId Download ID + */ + async abortDownloadModel(downloadId: string) { + this.downloadManagerService.abortDownload(downloadId); + } + + /** + * Populate model metadata from a Model repository (HF, Jan...) and download it + * @param modelId + */ + async pullModel( + originModelId: string, + inSequence: boolean = true, + selection?: ( + siblings: HuggingFaceRepoSibling[], + ) => Promise, + persistedModelId?: string, + ) { + const modelId = persistedModelId ?? originModelId; + const existingModel = await this.findOne(modelId); + + if (isLocalModel(existingModel?.files)) { + throw new BadRequestException('Model already exists'); + } + + // Pull a local model file + if (isLocalFile(originModelId)) { + await this.populateHuggingFaceModel(originModelId, persistedModelId); + this.eventEmitter.emit('download.event', [ + { + id: modelId, + type: DownloadType.Model, + status: DownloadStatus.Downloaded, + progress: 100, + children: [], + }, + ]); + return; + } + + const modelsContainerDir = await fileManagerService.getModelsPath(); + + if (!existsSync(modelsContainerDir)) { + mkdirSync(modelsContainerDir, { recursive: true }); + } + + const modelFolder = join(modelsContainerDir, normalizeModelId(modelId)); + await promises.mkdir(modelFolder, { recursive: true }).catch(() => {}); + + let files = (await fetchJanRepoData(originModelId)).siblings; + + // HuggingFace GGUF Repo - Only one file is downloaded + if (originModelId.includes('/') && selection && files.length) { + try { + files = [await selection(files)]; + } catch (e) { + const modelEvent: ModelEvent = { + model: modelId, + event: 'model-downloaded-failed', + metadata: { + error: e.message || e, + }, + }; + this.eventEmitter.emit('model.event', modelEvent); + throw e; + } + } + + // Start downloading the model + const toDownloads: Record< + string, + { + destination: string; + checksum?: string; + } + > = files + .filter((e) => this.validFileDownload(e)) + .reduce( + ( + acc: Record< + string, + { + destination: string; + checksum?: string; + } + >, + file, + ) => { + if (file.downloadUrl) + acc[file.downloadUrl] = { + destination: join(modelFolder, file.rfilename), + checksum: file?.lfs?.oid, + }; + return acc; + }, + {}, + ); + return this.downloadManagerService.submitDownloadRequest( + modelId, + modelId, + DownloadType.Model, + toDownloads, + // Post processing + async () => { + const uploadModelMetadataSpiner = ora( + 'Updating model metadata...', + ).start(); + // Post processing after download + if (existsSync(join(modelFolder, 'model.yml'))) { + const model: CreateModelDto = load( + readFileSync(join(modelFolder, 'model.yml'), 'utf-8'), + ) as CreateModelDto; + if (model.engine === Engines.llamaCPP && model.files) { + const fileUrl = join( + await fileManagerService.getModelsPath(), + normalizeModelId(modelId), + llamaModelFile(model.files), + ); + model.files = [fileUrl]; + model.name = modelId.replace(':main', ''); + } else if (model.engine === Engines.llamaCPP) { + model.files = [ + join( + await fileManagerService.getModelsPath(), + normalizeModelId(modelId), + basename( + files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ?? + files[0].rfilename, + ), + ), + ]; + } else { + model.files = [modelFolder]; + } + model.model = modelId; + if (!(await this.findOne(modelId))) await this.create(model); + } else { + const fileUrl = join( + await fileManagerService.getModelsPath(), + normalizeModelId(modelId), + basename( + files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ?? + files[0].rfilename, + ), + ); + await this.populateHuggingFaceModel( + fileUrl, + modelId.replace(':main', ''), + ); + } + uploadModelMetadataSpiner.succeed('Model metadata updated'); + const modelEvent: ModelEvent = { + model: modelId, + event: 'model-downloaded', + metadata: { + ...(selection ? { file: [files] } : {}), + }, + }; + this.eventEmitter.emit('model.event', modelEvent); + }, + inSequence, + ); + } + + /** + * It's to pull model from HuggingFace repository + * It could be a model from Jan's repo or other authors + * @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b" + */ + async populateHuggingFaceModel(ggufUrl: string, overridenId?: string) { + const metadata = await getHFModelMetadata(ggufUrl); + + const stopWords: string[] = metadata?.stopWord ? [metadata.stopWord] : []; + + const modelId = + overridenId ?? (isLocalFile(ggufUrl) ? parse(ggufUrl).name : ggufUrl); + const model: CreateModelDto = { + files: [ggufUrl], + model: modelId, + name: metadata?.name ?? modelId, + prompt_template: metadata?.promptTemplate, + stop: stopWords, + + // Default Inference Params + stream: true, + max_tokens: 4096, + frequency_penalty: 0.7, + presence_penalty: 0.7, + temperature: 0.7, + top_p: 0.7, + + // Default Model Settings + ctx_len: metadata?.contextLength ?? 4096, + ngl: metadata?.ngl ?? 100, + engine: Engines.llamaCPP, + }; + if (!(await this.findOne(modelId))) await this.create(model); + else throw 'Model already exists.'; + } + + /** + * Get the current status of the models + * @returns Model statuses + */ + getModelStatuses(): Record { + return this.activeModelStatuses; + } + + /** + * Check whether the model is running in the Cortex C++ server + */ + async isModelRunning(modelId: string): Promise { + const model = await this.getModelOrThrow(modelId).catch(() => undefined); + + if (!model) return false; + + const engine = (await this.extensionRepository.findOne( + model.engine ?? Engines.llamaCPP, + )) as EngineExtension | undefined; + + if (!engine) return false; + + return engine.isModelRunning(modelId); + } + + /** + * Check whether the download file is valid or not + * @param file + * @returns + */ + private validFileDownload( + file: HuggingFaceRepoSibling, + ): file is Required { + return !!file.downloadUrl; + } +} diff --git a/platform/src/usecases/telemetry/telemetry.module.ts b/platform/src/usecases/telemetry/telemetry.module.ts new file mode 100644 index 000000000..bb0db2ecb --- /dev/null +++ b/platform/src/usecases/telemetry/telemetry.module.ts @@ -0,0 +1,20 @@ +import { Module } from '@nestjs/common'; +import { TelemetryUsecases } from './telemetry.usecases'; +import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { ContextModule } from '@/infrastructure/services/context/context.module'; +import { TelemetryRepositoryImpl } from '@/infrastructure/repositories/telemetry/telemetry.repository'; +import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +export const telemetryProvider = { + provide: 'TELEMETRY_REPOSITORY', + useFactory: () => new TelemetryRepositoryImpl(), + inject: [FileManagerService], +}; + +@Module({ + imports: [HttpModule, FileManagerModule, ContextModule], + providers: [telemetryProvider, TelemetryUsecases], + exports: [telemetryProvider, TelemetryUsecases], +}) +export class TelemetryModule {} diff --git a/platform/src/usecases/telemetry/telemetry.usecases.ts b/platform/src/usecases/telemetry/telemetry.usecases.ts new file mode 100644 index 000000000..3db376cc8 --- /dev/null +++ b/platform/src/usecases/telemetry/telemetry.usecases.ts @@ -0,0 +1,235 @@ +import { TelemetryRepository } from '@/domain/repositories/telemetry.interface'; +import { + BenchmarkHardware, + CrashReportAttributes, + EventAttributes, + EventName, + Telemetry, + TelemetryAnonymized, + TelemetrySource, +} from '@/domain/telemetry/telemetry.interface'; +import { ContextService } from '@/infrastructure/services/context/context.service'; +import { HttpException, Inject, Injectable, Scope } from '@nestjs/common'; +import { Cortex } from '@cortexso/cortex.js'; +import { v4 } from 'uuid'; + +@Injectable({ scope: Scope.TRANSIENT }) +export class TelemetryUsecases { + private readonly crashReports: string[] = []; + private readonly maxSize = 10; + private metricQueue: EventAttributes[] = []; + private readonly maxQueueSize = 10; + private readonly flushInterval = 1000 * 5; + private interval: NodeJS.Timeout; + private lastActiveAt?: string | null; + + constructor( + @Inject('TELEMETRY_REPOSITORY') + private readonly telemetryRepository: TelemetryRepository, + private readonly contextService: ContextService, + ) { + this.catchException(); + } + + async createCrashReport( + error: HttpException | Error, + source: TelemetrySource, + ): Promise { + try { + if (!this.isCrashReportEnabled()) return; + + const crashReport: CrashReportAttributes = this.buildCrashReport(error); + if (this.crashReports.includes(JSON.stringify(crashReport))) return; + if (this.crashReports.length >= this.maxSize) { + this.crashReports.shift(); + } + this.crashReports.push(JSON.stringify(crashReport)); + await this.telemetryRepository.createCrashReport(crashReport, source); + } catch (e) {} + return; + } + + async sendCrashReport(): Promise { + if (!this.isCrashReportEnabled()) { + return; + } + const crashReport = await this.telemetryRepository.getLastCrashReport(); + if (crashReport && !crashReport.metadata.sentAt) { + const promises = [ + this.telemetryRepository.sendTelemetryToServer(crashReport.event), + ]; + const collectorEndpoint = process.env.CORTEX_EXPORTER_OLTP_ENDPOINT; + if (collectorEndpoint) { + promises.push( + this.telemetryRepository.sendTelemetryToOTelCollector( + collectorEndpoint, + crashReport, + ), + ); + } + await Promise.all(promises); + await this.telemetryRepository.markLastCrashReportAsSent(); + } + return; + } + + async readCrashReports( + callback: (Telemetry: Telemetry) => void, + ): Promise { + return this.telemetryRepository.readCrashReports(callback); + } + + private buildCrashReport( + error: HttpException | Error, + ): CrashReportAttributes { + return { + message: error.message, + stack: error.stack, + payload: { + modelId: this.contextService.get('modelId') || '', + endpoint: this.contextService.get('endpoint') || '', + command: this.contextService.get('command') || '', + }, + sessionId: this.contextService.get('sessionId') || '', + }; + } + + private isCrashReportEnabled(): boolean { + return process.env.CORTEX_CRASH_REPORT !== '0'; + } + + private isMetricsEnabled(): boolean { + return process.env.CORTEX_METRICS !== '0'; + } + + private isBenchmarkEnabled(): boolean { + return process.env.CORTEX_BENCHMARK !== '0'; + } + + private async catchException(): Promise { + process.on('uncaughtException', async (error: Error) => { + await this.createCrashReport( + error, + this.contextService.get('source') as TelemetrySource, + ); + }); + + process.on('unhandledRejection', async (error: Error) => { + await this.createCrashReport( + error, + this.contextService.get('source') as TelemetrySource, + ); + }); + } + + async initInterval(): Promise { + this.interval = this.flushMetricQueueInterval(); + } + + async sendEvent( + events: EventAttributes[], + source: TelemetrySource, + ): Promise { + try { + if (!this.isMetricsEnabled()) return; + const sessionId = (this.contextService.get('sessionId') as string) || ''; + const sessionEvents = events.map((event) => ({ + ...event, + sessionId, + })); + await this.telemetryRepository.sendEvent(sessionEvents, source); + } catch (e) {} + } + + async sendActivationEvent(source: TelemetrySource): Promise { + try { + if (!this.isMetricsEnabled()) return; + if (!this.lastActiveAt) { + const currentData = await this.telemetryRepository.getAnonymizedData(); + this.lastActiveAt = currentData?.lastActiveAt; + } + const isActivatedToday = + this.lastActiveAt && + new Date(this.lastActiveAt).getDate() === new Date().getDate(); + if (isActivatedToday) return; + const isNewActivation = !this.lastActiveAt; + await this.sendEvent( + [ + { + name: isNewActivation ? EventName.NEW_ACTIVATE : EventName.ACTIVATE, + }, + ], + source, + ); + this.lastActiveAt = new Date().toISOString(); + } catch (e) {} + await this.updateAnonymousData(this.lastActiveAt); + } + + async addEventToQueue(event: EventAttributes): Promise { + if (!this.isMetricsEnabled()) return; + this.metricQueue.push({ + ...event, + sessionId: this.contextService.get('sessionId') || '', + }); + if (this.metricQueue.length >= this.maxQueueSize) { + await this.flushMetricQueue(); + } + } + + private async flushMetricQueue(): Promise { + if (this.metricQueue.length > 0) { + clearInterval(this.interval); + await this.sendEvent(this.metricQueue, TelemetrySource.CORTEX_SERVER); + this.interval = this.flushMetricQueueInterval(); + this.metricQueue = []; + } + } + + private flushMetricQueueInterval(): NodeJS.Timeout { + return setInterval(() => { + this.flushMetricQueue(); + }, this.flushInterval); + } + + async updateAnonymousData( + lastActiveAt?: string | null, + ): Promise { + try { + const currentData = await this.telemetryRepository.getAnonymizedData(); + const updatedData = { + ...currentData, + sessionId: currentData?.sessionId || v4(), + ...(lastActiveAt && { lastActiveAt }), + }; + await this.telemetryRepository.updateAnonymousData(updatedData); + return updatedData; + } catch (e) { + return null; + } + } + + async sendBenchmarkEvent({ + hardware, + results, + metrics, + model, + }: { + hardware: BenchmarkHardware; + results: any; + metrics: any; + model: Cortex.Models.Model; + }): Promise { + try { + if (!this.isBenchmarkEnabled()) return; + const sessionId: string = this.contextService.get('sessionId') || ''; + await this.telemetryRepository.sendBenchmarkToServer({ + hardware, + results, + metrics, + model, + sessionId, + }); + } catch (e) {} + } +} diff --git a/platform/src/usecases/threads/threads.module.ts b/platform/src/usecases/threads/threads.module.ts new file mode 100644 index 000000000..b11756f27 --- /dev/null +++ b/platform/src/usecases/threads/threads.module.ts @@ -0,0 +1,11 @@ +import { Module } from '@nestjs/common'; +import { ThreadsUsecases } from './threads.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; + +@Module({ + imports: [DatabaseModule], + controllers: [], + providers: [ThreadsUsecases], + exports: [ThreadsUsecases], +}) +export class ThreadsModule {} diff --git a/platform/src/usecases/threads/threads.usecases.spec.ts b/platform/src/usecases/threads/threads.usecases.spec.ts new file mode 100644 index 000000000..39a9d11dd --- /dev/null +++ b/platform/src/usecases/threads/threads.usecases.spec.ts @@ -0,0 +1,20 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { ThreadsUsecases } from './threads.usecases'; +import { DatabaseModule } from '@/infrastructure/database/database.module'; + +describe('ThreadsService', () => { + let service: ThreadsUsecases; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + imports: [DatabaseModule], + providers: [ThreadsUsecases], + }).compile(); + + service = module.get(ThreadsUsecases); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); +}); diff --git a/platform/src/usecases/threads/threads.usecases.ts b/platform/src/usecases/threads/threads.usecases.ts new file mode 100644 index 000000000..46c38bea2 --- /dev/null +++ b/platform/src/usecases/threads/threads.usecases.ts @@ -0,0 +1,269 @@ +import { Inject, Injectable, NotFoundException } from '@nestjs/common'; +import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto'; +import { UpdateThreadDto } from '@/infrastructure/dtos/threads/update-thread.dto'; +import { v4 as uuidv4 } from 'uuid'; +import { PageDto } from '@/infrastructure/dtos/page.dto'; +import { CreateMessageDto } from '@/infrastructure/dtos/threads/create-message.dto'; +import { ulid } from 'ulid'; +import { Message, MessageContent } from '@/domain/models/message.interface'; +import { UpdateMessageDto } from '@/infrastructure/dtos/threads/update-message.dto'; +import { Thread } from '@/domain/models/thread.interface'; +import DeleteMessageDto from '@/infrastructure/dtos/threads/delete-message.dto'; +import { Assistant } from '@/domain/models/assistant.interface'; +import { Repository } from 'sequelize-typescript'; +import { ThreadEntity } from '@/infrastructure/entities/thread.entity'; +import { MessageEntity } from '@/infrastructure/entities/message.entity'; +import { Op } from 'sequelize'; + +@Injectable() +export class ThreadsUsecases { + constructor( + @Inject('THREAD_REPOSITORY') + private threadRepository: Repository, + @Inject('MESSAGE_REPOSITORY') + private messageRepository: Repository, + ) {} + + async create(createThreadDto: CreateThreadDto): Promise { + const id = uuidv4(); + const { assistants } = createThreadDto; + const assistantEntity: Assistant[] = assistants.map((assistant) => { + const entity: Assistant = { + ...assistant, + response_format: null, + tool_resources: null, + top_p: assistant.top_p ?? null, + temperature: assistant.temperature ?? null, + }; + return entity; + }); + + const thread: Partial = { + id, + assistants: assistantEntity, + object: 'thread', + created_at: Date.now(), + title: 'New Thread', + tool_resources: null, + metadata: null, + }; + + return this.threadRepository.create(thread); + } + + async findAll( + limit: number, + order: 'asc' | 'desc', + after?: string, + before?: string, + ): Promise> { + const normalizedOrder = order === 'asc' ? 'ASC' : 'DESC'; + let afterQuery = {}; + let beforeQuery = {}; + if (after) { + const [afterDate, afterId] = after.split('_'); + const operator = order === 'asc' ? Op.gt : Op.lt; + afterQuery = { + [Op.or]: [ + { + created_at: { [operator]: Number(afterDate) }, + }, + { + created_at: Number(afterDate), + id: { [operator]: afterId }, + }, + ], + }; + } + if (before) { + const [beforeDate, beforeId] = before.split('_'); + const operator = order === 'asc' ? Op.lt : Op.gt; + beforeQuery = { + [Op.or]: [ + { + created_at: { [operator]: Number(beforeDate) }, + }, + { + created_at: Number(beforeDate), + id: { [operator]: beforeId }, + }, + ], + }; + } + const threads = await this.threadRepository.findAll({ + order: [ + ['created_at', normalizedOrder], + ['id', normalizedOrder], + ], + limit: limit + 1, + where: { + [Op.and]: [afterQuery, beforeQuery], + }, + }); + let hasMore = false; + if (threads.length > limit) { + hasMore = true; + threads.pop(); + } + const firstItem = threads[0]; + const lastItem = threads[threads.length - 1]; + const firstId = firstItem + ? `${firstItem.created_at}_${firstItem.id}` + : undefined; + const lastId = lastItem + ? `${lastItem?.created_at}_${lastItem?.id}` + : undefined; + return new PageDto(threads, hasMore, firstId, lastId); + } + + async getMessagesOfThread( + threadId: string, + limit: number, + order: 'asc' | 'desc', + after?: string, + before?: string, + runId?: string, + ) { + await this.getThreadOrThrow(threadId); + const normalizedOrder = order === 'asc' ? 'ASC' : 'DESC'; + + const messages = await this.messageRepository.findAll({ + where: { thread_id: threadId }, + order: [['created_at', normalizedOrder]], + limit: limit + 1, + ...(after && { where: { id: { [Op.gt]: after } } }), + ...(before && { where: { id: { [Op.lt]: before } } }), + }); + + let hasMore = false; + if (messages.length > limit) { + hasMore = true; + messages.pop(); + } + + const firstId = messages[0]?.id ?? undefined; + const lastId = messages[messages.length - 1]?.id ?? undefined; + + return new PageDto(messages, hasMore, firstId, lastId); + } + + async createMessageInThread( + threadId: string, + createMessageDto: CreateMessageDto, + ) { + const thread = await this.getThreadOrThrow(threadId); + const assistantId: string = thread.assistants[0].id; + + const messageContent: MessageContent = { + type: 'text', + text: { + annotations: [], + value: createMessageDto.content, + }, + }; + + const message: Partial = { + id: ulid(), + object: 'thread.message', + thread_id: threadId, + assistant_id: assistantId, + created_at: Date.now(), + status: 'completed', + role: createMessageDto.role, + content: [messageContent], + metadata: null, + run_id: null, + completed_at: null, + incomplete_details: null, + attachments: [], + incomplete_at: null, + }; + + await this.messageRepository.create(message); + return message; + } + + async updateMessage( + threadId: string, + messageId: string, + updateMessageDto: UpdateMessageDto, + ) { + await this.getThreadOrThrow(threadId); + await this.messageRepository.update(updateMessageDto, { + where: { id: messageId }, + }); + return this.messageRepository.findOne({ where: { id: messageId } }); + } + + private async getThreadOrThrow(threadId: string): Promise { + const thread = await this.findOne(threadId); + if (!thread) { + throw new NotFoundException(`Thread with id ${threadId} not found`); + } + return thread; + } + + private async getMessageOrThrow(messageId: string): Promise { + const message = await this.messageRepository.findOne({ + where: { id: messageId }, + }); + if (!message) { + throw new NotFoundException(`Message with id ${messageId} not found`); + } + return message; + } + + findOne(id: string) { + return this.threadRepository.findOne({ where: { id } }); + } + + async update(id: string, updateThreadDto: UpdateThreadDto) { + const assistantEntities: Assistant[] = + updateThreadDto.assistants?.map((assistant) => { + const entity: Assistant = { + ...assistant, + name: assistant.name, + response_format: null, + tool_resources: null, + top_p: assistant.top_p ?? null, + temperature: assistant.temperature ?? null, + }; + return entity; + }) ?? []; + + const entity: Partial = { + ...updateThreadDto, + assistants: assistantEntities, + }; + + return this.threadRepository.update(entity, { where: { id } }); + } + + async remove(id: string) { + await this.threadRepository.destroy({ where: { id } }); + } + + async deleteMessage( + _threadId: string, + messageId: string, + ): Promise { + await this.getMessageOrThrow(messageId); + await this.messageRepository.destroy({ where: { id: messageId } }); + + return { + id: messageId, + object: 'thread.message.deleted', + deleted: true, + }; + } + + async retrieveMessage(_threadId: string, messageId: string) { + // we still allow user to delete message even if the thread is not there + return this.getMessageOrThrow(messageId); + } + + async clean(threadId: string) { + await this.getThreadOrThrow(threadId); + await this.messageRepository.destroy({ where: { thread_id: threadId } }); + } +} diff --git a/platform/src/utils/app-path.ts b/platform/src/utils/app-path.ts new file mode 100644 index 000000000..d4fb0b1e0 --- /dev/null +++ b/platform/src/utils/app-path.ts @@ -0,0 +1,43 @@ +import { ModelArtifact } from '@/domain/models/model.interface'; +import { existsSync } from 'fs'; +import { basename, join } from 'path'; + +/** + * Path to the root of the application. + */ +export const appPath = join(__dirname, '../../'); + +/** + * Check if a file exists in any of the given paths. + * @param file + * @param paths + * @returns + */ +export const checkFileExistenceInPaths = ( + file: string, + paths: string[], +): boolean => { + return paths.some((p) => existsSync(join(p, file))); +}; + +/** + * Get the model file name from the given files. + * @param files + * @returns + */ +export const llamaModelFile = ( + files: string[] | ModelArtifact | ModelArtifact[], +) => { + let artifact: any = files; + // Legacy model.yml + if (Array.isArray(files)) { + artifact = files[0]; + } + const path = + 'llama_model_path' in artifact + ? ((artifact as ModelArtifact).llama_model_path ?? '') + : 'model_path' in files + ? ((artifact as ModelArtifact).model_path ?? '') + : (artifact as string[])[0]; + return basename(path); +}; diff --git a/platform/src/utils/cortex-cpp.ts b/platform/src/utils/cortex-cpp.ts new file mode 100644 index 000000000..efa28adbd --- /dev/null +++ b/platform/src/utils/cortex-cpp.ts @@ -0,0 +1,7 @@ +import * as cortexCPP from 'cortex-cpp'; + +process.title = 'Cortex Engine Process (cortex.cpp)'; +const port = process.env.CORTEX_CPP_PORT + ? parseInt(process.env.CORTEX_CPP_PORT) + : 3929; +cortexCPP.start(port); diff --git a/platform/src/utils/cuda.ts b/platform/src/utils/cuda.ts new file mode 100644 index 000000000..ecddbaf4f --- /dev/null +++ b/platform/src/utils/cuda.ts @@ -0,0 +1,148 @@ +import { exec } from 'child_process'; +import { existsSync } from 'fs'; +import { delimiter } from 'path'; +import { checkFileExistenceInPaths } from './app-path'; + +export type GpuSettingInfo = { + id: string; + vram: string; + name: string; + arch?: string; +}; + +/** + * Return the CUDA version installed on the system + * @returns CUDA Version 11 | 12 + */ +export const cudaVersion = async () => { + let filesCuda12: string[]; + let filesCuda11: string[]; + let paths: string[]; + + if (process.platform === 'win32') { + filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']; + filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']; + paths = process.env.PATH ? process.env.PATH.split(delimiter) : []; + } else { + filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']; + filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']; + paths = process.env.LD_LIBRARY_PATH + ? process.env.LD_LIBRARY_PATH.split(delimiter) + : []; + paths.push('/usr/lib/x86_64-linux-gnu/'); + } + + if ( + filesCuda12.every( + (file) => existsSync(file) || checkFileExistenceInPaths(file, paths), + ) + ) + return '12'; + + if ( + filesCuda11.every( + (file) => existsSync(file) || checkFileExistenceInPaths(file, paths), + ) + ) + return '11'; + + return undefined; // No CUDA Toolkit found +}; + +/** + * Check if an NVIDIA GPU is present + * @returns GPU driver exist or not + * TODO: This should be enhanced better + */ +export const checkNvidiaGPUExist = (): Promise => { + return new Promise((resolve) => { + // Execute the nvidia-smi command + exec('nvidia-smi', (error) => { + if (error) { + // If there's an error, it means nvidia-smi is not installed or there's no NVIDIA GPU + console.log('NVIDIA GPU not detected or nvidia-smi not installed.'); + resolve(false); + } else { + // If the command executes successfully, NVIDIA GPU is present + console.log('NVIDIA GPU detected.'); + resolve(true); + } + }); + }); +}; + +export const getCudaVersion = (): Promise => { + return new Promise((resolve, reject) => { + // Execute the nvidia-smi command + exec('nvidia-smi', (error, stdout) => { + if (!error) { + const cudaVersionLine = stdout + .split('\n') + .find((line) => line.includes('CUDA Version')); + + if (cudaVersionLine) { + // Extract the CUDA version number + const cudaVersionMatch = cudaVersionLine.match( + /CUDA Version:\s+(\d+\.\d+)/, + ); + if (cudaVersionMatch) { + const cudaVersion = cudaVersionMatch[1]; + resolve(cudaVersion); + } else { + reject('CUDA Version not found.'); + } + } else { + reject('CUDA Version not found.'); + } + } else { + reject(error); + } + }); + }); +}; + +/** + * Get GPU information from the system + * @returns GPU information + */ +export const getGpuInfo = async (): Promise => + new Promise((resolve) => { + exec( + 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits', + { + windowsHide: true, + }, + async (error, stdout) => { + if (!error) { + // Get GPU info and gpu has higher memory first + let highestVram = 0; + let highestVramId = '0'; + const gpus: GpuSettingInfo[] = stdout + .trim() + .split('\n') + .map((line) => { + let [id, vram, name] = line.split(', '); + const arch = getGpuArch(name); + vram = vram.replace(/\r/g, ''); + if (parseFloat(vram) > highestVram) { + highestVram = parseFloat(vram); + highestVramId = id; + } + return { id, vram, name, arch }; + }); + + resolve(gpus); + } else { + resolve([]); + } + }, + ); + }); + +const getGpuArch = (gpuName: string): string => { + if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'; + + if (gpuName.includes('30')) return 'ampere'; + else if (gpuName.includes('40')) return 'ada'; + else return 'unknown'; +}; diff --git a/platform/src/utils/download-progress.ts b/platform/src/utils/download-progress.ts new file mode 100644 index 000000000..68eafcd09 --- /dev/null +++ b/platform/src/utils/download-progress.ts @@ -0,0 +1,69 @@ +import { Presets, SingleBar } from 'cli-progress'; +import { Cortex } from '@cortexso/cortex.js'; +import { exit, stdin, stdout } from 'node:process'; +import { + DownloadState, + DownloadStatus, + DownloadType, +} from '@/domain/models/download.interface'; +import { isLocalFile } from './urls'; + +export const downloadProgress = async ( + cortex: Cortex, + downloadId?: string, + downloadType?: DownloadType, +) => { + // Do not update on local file symlink + if (downloadId && isLocalFile(downloadId)) return; + + const response = await cortex.events.downloadEvent(); + + const rl = require('readline').createInterface({ + input: stdin, + output: stdout, + }); + + rl.on('SIGINT', () => { + console.log('\nStopping download...'); + process.emit('SIGINT'); + }); + process.on('SIGINT', async () => { + if (downloadId) { + await cortex.models.abortDownload(downloadId); + } + exit(1); + }); + + const progressBar = new SingleBar({}, Presets.shades_classic); + progressBar.start(100, 0); + + for await (const stream of response) { + if (stream.length) { + const data = stream.find( + (data: any) => data.id === downloadId || !downloadId, + ) as DownloadState | undefined; + if (!data) continue; + if (downloadType && data.type !== downloadType) continue; + + if (data.status === DownloadStatus.Downloaded) break; + if (data.status === DownloadStatus.Error) { + rl.close(); + progressBar.stop(); + console.log('\n Download failed: ', data.error); + exit(1); + } + + let totalBytes = 0; + let totalTransferred = 0; + data.children.forEach((child: any) => { + totalBytes += child.size.total; + totalTransferred += child.size.transferred; + }); + progressBar.update( + Math.floor((totalTransferred / (totalBytes || 1)) * 100), + ); + } + } + progressBar.stop(); + rl.close(); +}; diff --git a/platform/src/utils/health.ts b/platform/src/utils/health.ts new file mode 100644 index 000000000..de164d123 --- /dev/null +++ b/platform/src/utils/health.ts @@ -0,0 +1,36 @@ +import { CORTEX_JS_HEALTH_URL_WITH_API_PATH } from '@/infrastructure/constants/cortex'; + +/** + * Check whether the Cortex CPP is healthy + * @param host + * @param port + * @returns + */ +export function healthCheck(apiPath: string): Promise { + return fetch(CORTEX_JS_HEALTH_URL_WITH_API_PATH(apiPath)) + .then((res) => { + if (res.ok) { + return true; + } + return false; + }) + .catch(() => false); +} + +/** + * Wait until the Cortex Server is healthy + * @param host + * @param port + * @returns + */ +export function waitUntilHealthy(apiPath: string): Promise { + return new Promise((resolve) => { + const interval = setInterval(async () => { + const isHealthy = await healthCheck(apiPath); + if (isHealthy) { + clearInterval(interval); + resolve(true); + } + }, 1000); + }); +} diff --git a/platform/src/utils/huggingface.ts b/platform/src/utils/huggingface.ts new file mode 100644 index 000000000..a69572992 --- /dev/null +++ b/platform/src/utils/huggingface.ts @@ -0,0 +1,200 @@ +//// PRIVATE METHODS //// + +import { + HuggingFaceRepoData, + HuggingFaceRepoSibling, +} from '@/domain/models/huggingface.interface'; +import { ModelMetadata } from '@/infrastructure/commanders/types/model-tokenizer.interface'; +import { + AllQuantizations, + HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL, + HUGGING_FACE_REPO_MODEL_API_URL, + HUGGING_FACE_REPO_URL, + HUGGING_FACE_TREE_REF_URL, +} from '@/infrastructure/constants/huggingface'; +import { + LLAMA_2, + LLAMA_3, + LLAMA_3_JINJA, + OPEN_CHAT_3_5, + OPEN_CHAT_3_5_JINJA, + ZEPHYR, + ZEPHYR_JINJA, +} from '@/infrastructure/constants/prompt-constants'; +import axios from 'axios'; +import { parseModelHubEngineBranch } from './normalize-model-id'; +import { closeSync, openSync, readSync } from 'fs'; + +// TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider? +export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string { + if (!jinjaCode) { + console.log('No jinja code provided. Returning default LLAMA_2'); + return LLAMA_2; + } + + if (typeof jinjaCode !== 'string') { + console.log( + `Invalid jinja code provided (type is ${typeof jinjaCode}). Returning default LLAMA_2`, + ); + return LLAMA_2; + } + + //TODO: Missing supported templates? + switch (jinjaCode) { + case ZEPHYR_JINJA: + return ZEPHYR; + + case OPEN_CHAT_3_5_JINJA: + return OPEN_CHAT_3_5; + + case LLAMA_3_JINJA: + return LLAMA_3; + + default: + // console.log( + // 'Unknown jinja code:', + // jinjaCode, + // 'Returning default LLAMA_2', + // ); + return LLAMA_2; + } +} + +/** + * Fetch the model data from Jan's repo + * @param modelId HuggingFace model id. e.g. "llama-3:7b" + * @returns + */ +export async function fetchJanRepoData( + modelId: string, +): Promise { + const repo = modelId.split(':')[0]; + const tree = await parseModelHubEngineBranch( + modelId.split(':')[1] ?? (!modelId.includes('/') ? 'main' : ''), + ); + const url = getRepoModelsUrl( + `${!modelId.includes('/') ? 'cortexso/' : ''}${repo}`, + tree, + ); + + const res = await fetch(url); + const jsonData = await res.json(); + if ('siblings' in jsonData) { + AllQuantizations.forEach((quantization) => { + jsonData.siblings.forEach((sibling: HuggingFaceRepoSibling) => { + if (!sibling.quantization && sibling.rfilename.includes(quantization)) { + sibling.quantization = quantization; + sibling.downloadUrl = HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL( + repo, + sibling.rfilename, + ); + } + }); + }); + return jsonData as HuggingFaceRepoData; + } + + const response: + | { + path: string; + size: number; + oid?: string; + }[] + | { error: string } = jsonData; + + if ('error' in response && response.error != null) { + throw new Error(response.error); + } + + // TODO: Support multiple engines + const data: HuggingFaceRepoData = { + siblings: Array.isArray(response) + ? response.map((e) => { + return { + rfilename: e.path, + downloadUrl: HUGGING_FACE_TREE_REF_URL(repo, tree, e.path), + fileSize: e.size ?? 0, + lfs: { + oid: e.oid, + }, + }; + }) + : [], + tags: ['gguf'], + id: modelId, + modelId: modelId, + author: 'cortexso', + sha: '', + downloads: 0, + lastModified: '', + private: false, + disabled: false, + gated: false, + pipeline_tag: 'text-generation', + cardData: {}, + createdAt: '', + }; + + AllQuantizations.forEach((quantization) => { + data.siblings.forEach((sibling: any) => { + if (!sibling.quantization && sibling.rfilename.includes(quantization)) { + sibling.quantization = quantization; + } + }); + }); + + data.modelUrl = url; + return data; +} + +/** + * Get the URL to fetch the model data from HuggingFace API + * @param repoId HuggingFace model id. e.g. "janhq/llama3" + * @param tree The tree ref. e.g. "8b" + * @returns The URL to fetch the model data from HuggingFace API + */ +export function getRepoModelsUrl(repoId: string, tree?: string): string { + return `${HUGGING_FACE_REPO_MODEL_API_URL(repoId)}${tree ? `/tree/${tree}` : ''}`; +} + +/** + * Get the model metadata from HuggingFace + * @param ggufUrl The URL to the GGUF file + * @returns The model tokenizer + */ +export async function getHFModelMetadata( + ggufUrl: string, +): Promise { + try { + const { ggufMetadata } = await import('hyllama'); + // Read first 10mb of gguf file + const fd = openSync(ggufUrl, 'r'); + const buffer = new Uint8Array(10_000_000); + readSync(fd, buffer, 0, 10_000_000, 0); + closeSync(fd); + + // Parse metadata and tensor info + const { metadata } = ggufMetadata(buffer.buffer); + + const index = metadata['tokenizer.ggml.eos_token_id']; + const hfChatTemplate = metadata['tokenizer.chat_template']; + const promptTemplate = guessPromptTemplateFromHuggingFace(hfChatTemplate); + const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? ''; + const name = metadata['general.name']; + const contextLength = metadata['llama.context_length'] ?? 4096; + const ngl = (metadata['llama.block_count'] ?? 32) + 1; + const version: number = metadata['version']; + + return { + contextLength, + ngl, + stopWord, + promptTemplate, + version, + name, + }; + } catch (err) { + console.log('Failed to get model metadata:', err.message); + return undefined; + } +} diff --git a/platform/src/utils/init.ts b/platform/src/utils/init.ts new file mode 100644 index 000000000..71db89cdf --- /dev/null +++ b/platform/src/utils/init.ts @@ -0,0 +1,19 @@ +import { InitOptions } from '@/infrastructure/commanders/types/init-options.interface'; +import { checkNvidiaGPUExist } from './cuda'; + +/** + * Default installation options base on the system + * @returns + */ +export const defaultInstallationOptions = async (): Promise => { + const options: InitOptions = {}; + + // Skip check if darwin + if (process.platform === 'darwin') { + return options; + } + // If Nvidia Driver is installed -> GPU + options.runMode = (await checkNvidiaGPUExist()) ? 'GPU' : 'CPU'; + options.gpuType = 'Nvidia'; + return options; +}; \ No newline at end of file diff --git a/platform/src/utils/logs.ts b/platform/src/utils/logs.ts new file mode 100644 index 000000000..1802d6d2b --- /dev/null +++ b/platform/src/utils/logs.ts @@ -0,0 +1,80 @@ +import { createReadStream, existsSync, stat, writeFileSync } from 'fs'; +import { createInterface } from 'readline'; +import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; + +const logCleaningInterval: number = 120000; +let timeout: NodeJS.Timeout | undefined; + +/** + * Print the last N lines of a file that contain the word 'ERROR' + * @param filename + * @param numLines + */ +export async function printLastErrorLines( + logPath: string, + numLines: number = 10, +): Promise { + const errorLines: string[] = []; + + const fileStream = createReadStream(logPath); + const rl = createInterface({ + input: fileStream, + crlfDelay: Infinity, + }); + + for await (const line of rl) { + errorLines.push(line); + if (errorLines.length > numLines) { + errorLines.shift(); + } + } + + console.log(`Last errors:`); + errorLines.forEach((line) => console.log(line)); + console.log('...'); +} + +export async function cleanLogs( + maxFileSizeBytes?: number | undefined, + daysToKeep?: number | undefined, +): Promise { + // clear existing timeout + // in case we rerun it with different values + if (timeout) clearTimeout(timeout); + timeout = undefined; + + console.log('Validating app logs. Next attempt in ', logCleaningInterval); + + const size = maxFileSizeBytes ?? 1 * 1024 * 1024; // 1 MB + const days = daysToKeep ?? 7; // 7 days + const filePath = await fileManagerService.getLogPath(); + // Perform log cleaning + const currentDate = new Date(); + if (existsSync(filePath)) + stat(filePath, (err, stats) => { + if (err) { + console.error('Error getting file stats:', err); + return; + } + + // Check size + if (stats.size > size) { + writeFileSync(filePath, '', 'utf8'); + } else { + // Check age + const creationDate = new Date(stats.ctime); + const daysDifference = Math.floor( + (currentDate.getTime() - creationDate.getTime()) / (1000 * 3600 * 24), + ); + if (daysDifference > days) { + writeFileSync(filePath, '', 'utf8'); + } + } + }); + + // Schedule the next execution with doubled delays + timeout = setTimeout( + () => cleanLogs(maxFileSizeBytes, daysToKeep), + logCleaningInterval, + ); +} diff --git a/platform/src/utils/model-check.ts b/platform/src/utils/model-check.ts new file mode 100644 index 000000000..05cd8827f --- /dev/null +++ b/platform/src/utils/model-check.ts @@ -0,0 +1,63 @@ +import { MIN_CUDA_VERSION } from '@/infrastructure/constants/cortex'; +import { getCudaVersion } from './cuda'; +import ora from 'ora'; + +export const checkModelCompatibility = async ( + modelId: string, + spinner?: ora.Ora, +) => { + function log(message: string) { + if (spinner) { + spinner.fail(message); + } else { + console.error(message); + } + } + if (modelId.includes('onnx') && process.platform !== 'win32') { + log('The ONNX engine does not support this OS yet.'); + process.exit(1); + } + + if (modelId.includes('tensorrt-llm')) { + if (process.platform === 'darwin') { + log('Tensorrt-LLM models are not supported on this OS'); + process.exit(1); + } + + try { + const version = await getCudaVersion(); + const [currentMajor, currentMinor] = version.split('.').map(Number); + const [requiredMajor, requiredMinor] = + MIN_CUDA_VERSION.split('.').map(Number); + const isMatchRequired = + currentMajor > requiredMajor || + (currentMajor === requiredMajor && currentMinor >= requiredMinor); + if (!isMatchRequired) { + log( + `CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`, + ); + process.exit(1); + } + } catch (e) { + console.error(e.message ?? e); + log(e.message ?? e); + process.exit(1); + } + } +}; + +export const parseVersion = (version: string) => { + return version.split('.').map(Number); +}; + +export const checkRequiredVersion = (version: string, minVersion: string) => { + const [currentMajor, currentMinor, currentPatch] = parseVersion(version); + const [requiredMajor, requiredMinor, requiredPatch] = + parseVersion(minVersion); + return ( + currentMajor > requiredMajor || + (currentMajor === requiredMajor && + (currentMinor > requiredMinor || + (currentMinor === requiredMinor && currentPatch >= requiredPatch))) + ); +}; diff --git a/platform/src/utils/model-parameter.parser.ts b/platform/src/utils/model-parameter.parser.ts new file mode 100644 index 000000000..5d91d5531 --- /dev/null +++ b/platform/src/utils/model-parameter.parser.ts @@ -0,0 +1,74 @@ +import { + Model, + ModelRuntimeParams, + ModelSettingParams, +} from '@/domain/models/model.interface'; + +// Make this class injectable +export class ModelParameterParser { + private modelSettingParamTypes: { [key: string]: string } = { + prompt_template: 'string', + ctx_len: 'number', + ngl: 'number', + n_parallel: 'number', + cpu_threads: 'number', + llama_model_path: 'string', + mmproj: 'string', + cont_batching: 'boolean', + pre_prompt: 'string', + model_type: 'string', + embedding: 'boolean', + }; + + private modelRuntimeParamTypes: { [key: string]: string } = { + temperature: 'number', + top_k: 'number', + top_p: 'number', + stream: 'boolean', + max_tokens: 'number', + stop: 'string[]', + frequency_penalty: 'number', + presence_penalty: 'number', + }; + + /** + * Parse the model inference parameters from origin Model + * @param model + * @returns Partial + */ + parseModelInferenceParams(model: Partial): Partial { + const inferenceParams: Partial & ModelRuntimeParams = + structuredClone(model); + return Object.keys(inferenceParams).reduce((acc, key) => { + if (!this.isModelRuntimeParam(key)) { + delete acc[key as keyof typeof acc]; + } + + return acc; + }, inferenceParams); + } + /** + * Parse the model engine settings from origin Model + * @param model + * @returns Partial + */ + parseModelEngineSettings(model: Partial): Partial { + const engineSettings: Partial & ModelSettingParams = + structuredClone(model); + return Object.keys(engineSettings).reduce((acc, key) => { + if (!this.isModelSettingParam(key)) { + delete acc[key as keyof typeof acc]; + } + + return acc; + }, engineSettings); + } + + private isModelSettingParam(key: string): boolean { + return key in this.modelSettingParamTypes; + } + + private isModelRuntimeParam(key: string): boolean { + return key in this.modelRuntimeParamTypes; + } +} diff --git a/platform/src/utils/normalize-model-id.ts b/platform/src/utils/normalize-model-id.ts new file mode 100644 index 000000000..fcc3f721f --- /dev/null +++ b/platform/src/utils/normalize-model-id.ts @@ -0,0 +1,48 @@ +import { ModelArtifact } from '@/domain/models/model.interface'; +import { getGpuInfo } from './cuda'; +import { + Engines, + RemoteEngines, +} from '@/infrastructure/commanders/types/engine.interface'; + +export const normalizeModelId = (modelId: string): string => { + return modelId.replace(':main', '').replace(/[:/]/g, '-'); +}; + +export const isLocalModel = ( + modelFiles?: string[] | ModelArtifact, +): boolean => { + return ( + !!modelFiles && + Array.isArray(modelFiles) && + !/^(http|https):\/\/[^/]+\/.*/.test(modelFiles[0]) + ); +}; + +export const isRemoteEngine = (engine: string): boolean => { + return RemoteEngines.includes(engine as Engines); +}; + +/** + * Parse the model hub engine branch + * @param branch + * @returns + */ +export const parseModelHubEngineBranch = async ( + branch: string, +): Promise => { + if (branch.includes('tensorrt')) { + let engineBranch = branch; + const platform = process.platform == 'win32' ? 'windows' : 'linux'; + if (!engineBranch.includes(platform)) { + engineBranch += `-${platform}`; + } + + const gpus = await getGpuInfo(); + if (gpus[0]?.arch && !engineBranch.includes(gpus[0].arch)) { + engineBranch += `-${gpus[0].arch}`; + } + return engineBranch; + } + return branch; +}; diff --git a/platform/src/utils/request.ts b/platform/src/utils/request.ts new file mode 100644 index 000000000..e81c1b0de --- /dev/null +++ b/platform/src/utils/request.ts @@ -0,0 +1,22 @@ +export function extractCommonHeaders(headers: any) { + const commonHeaders = [ + 'Content-Type', + 'User-Agent', + 'Accept', + 'Authorization', + 'Origin', + 'Referer', + 'Connection', + ]; + + const extractedHeaders: Record = {}; + + for (const header of commonHeaders) { + const value = headers[header]; + if (value) { + extractedHeaders[header] = value; + } + } + + return extractedHeaders; +} diff --git a/platform/src/utils/system-resource.ts b/platform/src/utils/system-resource.ts new file mode 100644 index 000000000..527884a9c --- /dev/null +++ b/platform/src/utils/system-resource.ts @@ -0,0 +1,16 @@ +import osUtils from 'node-os-utils' + +export type MemoryInformation = { + total: osUtils.MemUsedInfo['totalMemMb']; + used: osUtils.MemUsedInfo['usedMemMb']; + free: osUtils.MemFreeInfo['freeMemMb'] +} + +export const getMemoryInformation = async (): Promise => { + const [usedMemoryInfo, freeMemoryInfo] = await Promise.all([osUtils.mem.used(), osUtils.mem.free()]) + return { + total: usedMemoryInfo.totalMemMb, + used: usedMemoryInfo.usedMemMb, + free: freeMemoryInfo.freeMemMb + } +} \ No newline at end of file diff --git a/platform/src/utils/urls.ts b/platform/src/utils/urls.ts new file mode 100644 index 000000000..6559ef804 --- /dev/null +++ b/platform/src/utils/urls.ts @@ -0,0 +1,25 @@ +import { isAbsolute } from 'path'; + +/** + * Check if a string is a valid URL. + * @param input - The string to check. + * @returns True if the string is a valid URL, false otherwise. + */ +export function isValidUrl(input: string | undefined): boolean { + if (!input) return false; + try { + new URL(input); + return true; + } catch (e) { + return false; + } +} + +/** + * Check if the URL is a lcoal file path + * @param modelFiles + * @returns + */ +export const isLocalFile = (path: string): boolean => { + return !/^(http|https):\/\/[^/]+\/.*/.test(path) && isAbsolute(path); +}; diff --git a/platform/test/jest-e2e.json b/platform/test/jest-e2e.json new file mode 100644 index 000000000..c74edd836 --- /dev/null +++ b/platform/test/jest-e2e.json @@ -0,0 +1,13 @@ +{ + "moduleFileExtensions": ["js", "json", "ts"], + "rootDir": ".", + "testEnvironment": "node", + "testRegex": ".e2e-spec.ts$", + "transform": { + "^.+\\.(t|j)s$": "ts-jest" + }, + "moduleNameMapper": { + "@/(.*)$": "/../src/$1", + "@commanders/(.*)$": "/../src/infrastructure/commanders/$1" + } +} diff --git a/platform/test/models.e2e-spec.ts b/platform/test/models.e2e-spec.ts new file mode 100644 index 000000000..f6bf5a2e5 --- /dev/null +++ b/platform/test/models.e2e-spec.ts @@ -0,0 +1,24 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { INestApplication } from '@nestjs/common'; +import request from 'supertest'; +import { AppModule } from '@/app.module'; + +describe('ModelsController (e2e)', () => { + let app: INestApplication; + + beforeEach(async () => { + const moduleFixture: TestingModule = await Test.createTestingModule({ + imports: [AppModule], + }).compile(); + + app = moduleFixture.createNestApplication(); + await app.init(); + }); + + it('/ (GET)', () => { + return request(app.getHttpServer()) + .get('/models') + .expect(200) + .expect((e) => Array.isArray(e)); + }); +}); diff --git a/platform/tsconfig.build.json b/platform/tsconfig.build.json new file mode 100644 index 000000000..64f86c6bd --- /dev/null +++ b/platform/tsconfig.build.json @@ -0,0 +1,4 @@ +{ + "extends": "./tsconfig.json", + "exclude": ["node_modules", "test", "dist", "**/*spec.ts"] +} diff --git a/platform/tsconfig.json b/platform/tsconfig.json new file mode 100644 index 000000000..44f33f788 --- /dev/null +++ b/platform/tsconfig.json @@ -0,0 +1,28 @@ +{ + "compilerOptions": { + "module": "node16", + "moduleResolution": "node16", + "declaration": true, + "removeComments": true, + "emitDecoratorMetadata": true, + "experimentalDecorators": true, + "allowSyntheticDefaultImports": true, + "target": "es2018", + "sourceMap": true, + "outDir": "./dist", + "baseUrl": "./", + "incremental": true, + "skipLibCheck": true, + "strictNullChecks": true, + "noImplicitAny": true, + "strictBindCallApply": true, + "forceConsistentCasingInFileNames": true, + "noFallthroughCasesInSwitch": true, + "esModuleInterop": true, + "resolveJsonModule": true, + "paths": { + "@/*": ["src/*"], + "@commanders/*": ["src/infrastructure/commanders/*"] + } + } +} diff --git a/platform/uninstall.js b/platform/uninstall.js new file mode 100644 index 000000000..2c5b04807 --- /dev/null +++ b/platform/uninstall.js @@ -0,0 +1,40 @@ +const { promises } = require('node:fs'); +const os = require('os'); +const fs = require('fs'); +const path = require('path'); +const yaml = require('js-yaml'); +const constants = require('./dist/src/infrastructure/constants/cortex.js') + +const uninstall = async () => { + const cortexConfigPath = path.join(os.homedir(), '.cortexrc'); + if (fs.existsSync(cortexConfigPath)) { + const content = await promises.readFile(cortexConfigPath, 'utf8'); + const config = yaml.load(content); + if(!config) { + return; + } + const { dataFolderPath, cortexCppHost, cortexCppPort, apiServerHost, apiServerPort } = config; + + await fetch(constants.CORTEX_CPP_PROCESS_DESTROY_URL(cortexCppHost, cortexCppPort), + { method: 'DELETE' }) + .catch(() => {}) + await fetch(constants.CORTEX_JS_SYSTEM_URL(apiServerHost, apiServerPort), + { method: 'DELETE' }) + .catch(() => {}) + // remove all data in data folder path except models + const modelsFolderPath = path.join(dataFolderPath, 'models'); + const files = fs.readdirSync(dataFolderPath); + for (const file of files) { + const fileStat = fs.statSync(path.join(dataFolderPath, file)); + if (file !== 'models') { + if (fileStat.isDirectory()) { + fs.rmSync(path.join(dataFolderPath, file), { recursive: true }); + } else { + fs.unlinkSync(path.join(dataFolderPath, file)); + } + } + } + } +}; + +uninstall(); From 75eb883b87074863ed9f556ab37a0782888c4990 Mon Sep 17 00:00:00 2001 From: marknguyen1302 Date: Tue, 27 Aug 2024 14:16:40 +0700 Subject: [PATCH 2/4] chore: rename js folder --- .github/workflows/cortex-build.yml | 38 +- .../workflows/cortex-js-openai-coverage.yml | 6 +- .github/workflows/cortex-js-quality-gate.yml | 10 +- .github/workflows/cortex-js.yml | 14 +- .gitignore | 12 +- README.md | 4 +- cortex-js/.env.development | 0 cortex-js/.env.example | 2 - cortex-js/.eslintrc.cjs | 46 -- cortex-js/.gitignore | 60 -- cortex-js/.prettierrc | 4 - cortex-js/CONTRIBUTING.md | 42 -- cortex-js/Makefile | 68 --- cortex-js/README.md | 142 ----- cortex-js/control.template | 9 - cortex-js/cortex.ico | Bin 15086 -> 0 bytes cortex-js/entitlements.plist | 33 - cortex-js/installer.iss | 86 --- cortex-js/nest-cli.json | 17 - cortex-js/package.json | 158 ----- cortex-js/patches/sqlite3+5.1.7.patch | 15 - cortex-js/presets/alpaca.yml | 16 - cortex-js/presets/chatml.yml | 18 - cortex-js/presets/llama3.yml | 19 - cortex-js/presets/vicuna.yml | 20 - cortex-js/src/app.module.ts | 77 --- cortex-js/src/app.ts | 75 --- cortex-js/src/command.module.ts | 83 --- cortex-js/src/command.ts | 50 -- .../src/domain/abstracts/engine.abstract.ts | 65 -- .../domain/abstracts/extension.abstract.ts | 38 -- .../src/domain/abstracts/oai.abstract.ts | 70 --- .../src/domain/config/config.interface.ts | 8 - .../src/domain/models/assistant.interface.ts | 12 - .../src/domain/models/download.interface.ts | 80 --- .../domain/models/huggingface.interface.ts | 74 --- .../models/inference-setting.interface.ts | 20 - .../src/domain/models/message.interface.ts | 13 - cortex-js/src/domain/models/model.event.ts | 38 -- .../src/domain/models/model.interface.ts | 186 ------ .../models/prompt-template.interface.ts | 6 - .../src/domain/models/resource.interface.ts | 21 - .../src/domain/models/thread.interface.ts | 13 - .../repositories/extension.interface.ts | 4 - .../domain/repositories/model.interface.ts | 10 - .../repositories/repository.interface.ts | 11 - .../repositories/telemetry.interface.ts | 50 -- .../domain/telemetry/telemetry.interface.ts | 115 ---- cortex-js/src/extensions/anthropic.engine.ts | 113 ---- cortex-js/src/extensions/cohere.engine.ts | 122 ---- cortex-js/src/extensions/extensions.module.ts | 40 -- cortex-js/src/extensions/groq.engine.ts | 49 -- cortex-js/src/extensions/martian.engine.ts | 49 -- cortex-js/src/extensions/mistral.engine.ts | 48 -- cortex-js/src/extensions/nvidia.engine.ts | 49 -- cortex-js/src/extensions/openai.engine.ts | 48 -- cortex-js/src/extensions/openrouter.engine.ts | 55 -- cortex-js/src/index.ts | 103 ---- .../infrastructure/commanders/base.command.ts | 39 -- .../commanders/benchmark.command.ts | 338 ----------- .../infrastructure/commanders/chat.command.ts | 178 ------ .../commanders/cortex-command.commander.ts | 240 -------- .../commanders/decorators/CommandContext.ts | 34 -- .../commanders/embeddings.command.ts | 107 ---- .../commanders/engines.command.ts | 87 --- .../commanders/engines/engines-get.command.ts | 36 -- .../engines/engines-init.command.ts | 79 --- .../engines/engines-list.command.ts | 30 - .../commanders/engines/engines-set.command.ts | 28 - .../commanders/models.command.ts | 87 --- .../commanders/models/model-get.command.ts | 29 - .../commanders/models/model-list.command.ts | 44 -- .../commanders/models/model-pull.command.ts | 93 --- .../commanders/models/model-remove.command.ts | 27 - .../commanders/models/model-start.command.ts | 115 ---- .../commanders/models/model-stop.command.ts | 34 -- .../commanders/models/model-update.command.ts | 74 --- .../commanders/presets.command.ts | 31 - .../infrastructure/commanders/ps.command.ts | 146 ----- .../commanders/questions/init.questions.ts | 40 -- .../infrastructure/commanders/run.command.ts | 170 ------ .../commanders/serve-stop.command.ts | 18 - .../commanders/services/chat-client.ts | 193 ------ .../services/cortex.client.module.ts | 9 - .../commanders/services/cortex.client.ts | 19 - .../commanders/telemetry.command.ts | 44 -- .../commanders/test/helpers.command.spec.ts | 146 ----- .../commanders/test/log.service.ts | 8 - .../commanders/test/models.command.spec.ts | 131 ---- .../types/benchmark-config.interface.ts | 32 - .../commanders/types/engine.interface.ts | 34 -- .../types/init-options.interface.ts | 9 - .../commanders/types/model-stat.interface.ts | 8 - .../types/model-tokenizer.interface.ts | 8 - .../types/telemetry-options.interface.ts | 3 - .../src/infrastructure/constants/benchmark.ts | 36 -- .../src/infrastructure/constants/cortex.ts | 64 -- .../infrastructure/constants/huggingface.ts | 39 -- .../constants/prompt-constants.ts | 45 -- .../controllers/assistants.controller.spec.ts | 31 - .../controllers/assistants.controller.ts | 128 ---- .../controllers/chat.controller.spec.ts | 41 -- .../controllers/chat.controller.ts | 103 ---- .../controllers/embeddings.controller.spec.ts | 44 -- .../controllers/embeddings.controller.ts | 26 - .../controllers/engines.controller.spec.ts | 43 -- .../controllers/engines.controller.ts | 122 ---- .../controllers/models.controller.spec.ts | 44 -- .../controllers/models.controller.ts | 303 --------- .../controllers/system.controller.ts | 138 ----- .../controllers/threads.controller.spec.ts | 22 - .../controllers/threads.controller.ts | 342 ----------- .../database/database.module.ts | 18 - .../database/providers/assistant.providers.ts | 12 - .../database/providers/message.providers.ts | 12 - .../database/providers/thread.providers.ts | 12 - .../database/sqlite-database.providers.ts | 28 - .../dtos/assistants/create-assistant.dto.ts | 84 --- .../dtos/assistants/delete-assistant.dto.ts | 22 - .../dtos/assistants/model-setting.dto.ts | 208 ------- .../dtos/chat/chat-completion-message.dto.ts | 14 - .../dtos/chat/chat-completion-response.dto.ts | 52 -- .../infrastructure/dtos/chat/choice.dto.ts | 25 - .../dtos/chat/create-chat-completion.dto.ts | 90 --- .../dtos/chat/embeddings-response.dto.ts | 30 - .../infrastructure/dtos/chat/message.dto.ts | 17 - .../src/infrastructure/dtos/chat/usage.dto.ts | 24 - .../dtos/common/common-response.dto.ts | 10 - .../dtos/configs/config-update.dto.ts | 22 - .../cortex-operation-successfully.dto.ts | 13 - .../dtos/cortex/start-cortex.dto.ts | 27 - .../dtos/embeddings/embeddings-request.dto.ts | 37 -- .../dtos/engines/engines.dto.ts | 106 ---- .../dtos/messages/create-message.dto.ts | 58 -- .../dtos/messages/delete-message.dto.ts | 22 - .../dtos/messages/get-message.dto.ts | 91 --- .../dtos/messages/list-message.dto.ts | 95 --- .../dtos/messages/update-message.dto.ts | 4 - .../dtos/models/create-model.dto.ts | 158 ----- .../dtos/models/delete-model.dto.ts | 22 - .../dtos/models/download-model.dto.ts | 9 - .../dtos/models/list-model-response.dto.ts | 10 - .../dtos/models/model-artifact.dto.ts | 17 - .../dtos/models/model-settings.dto.ts | 142 ----- .../infrastructure/dtos/models/model.dto.ts | 204 ------- .../dtos/models/start-model-success.dto.ts | 15 - .../dtos/models/update-model.dto.ts | 4 - cortex-js/src/infrastructure/dtos/page.dto.ts | 28 - .../dtos/threads/create-message.dto.ts | 17 - .../threads/create-thread-assistant.dto.ts | 134 ---- .../dtos/threads/create-thread.dto.ts | 10 - .../dtos/threads/delete-message.dto.ts | 22 - .../dtos/threads/delete-thread.dto.ts | 22 - .../dtos/threads/get-thread.dto.ts | 39 -- .../dtos/threads/update-message.dto.ts | 4 - .../dtos/threads/update-thread.dto.ts | 4 - .../entities/assistant.entity.ts | 96 --- .../infrastructure/entities/message.entity.ts | 94 --- .../infrastructure/entities/thread.entity.ts | 54 -- .../duplicate-assistant.exception.ts | 10 - .../exception/global.exception.ts | 39 -- .../exception/model-not-found.exception.ts | 7 - .../model-setting-not-found.exception.ts | 7 - .../interceptors/transform.interceptor.ts | 33 - .../middlewares/app.logger.middleware.ts | 40 -- .../providers/cortex/cortex.module.ts | 30 - .../providers/cortex/cortex.provider.ts | 183 ------ .../providers/cortex/llamacpp.provider.ts | 11 - .../providers/cortex/onnx.provider.ts | 11 - .../providers/cortex/tensorrtllm.provider.ts | 11 - .../extensions/extension.module.ts | 24 - .../extensions/extension.repository.ts | 159 ----- .../repositories/models/model.module.ts | 24 - .../repositories/models/model.repository.ts | 194 ------ .../telemetry/telemetry.repository.ts | 276 --------- .../services/context/context.module.ts | 9 - .../services/context/context.service.ts | 21 - .../download-manager.module.ts | 10 - .../download-manager.service.spec.ts | 21 - .../download-manager.service.ts | 348 ----------- .../file-manager/file-manager.module.ts | 8 - .../file-manager/file-manager.service.spec.ts | 18 - .../file-manager/file-manager.service.ts | 406 ------------- .../resources-manager.module.ts | 8 - .../resources-manager.service.ts | 35 -- cortex-js/src/main.ts | 23 - .../usecases/assistants/assistants.module.ts | 12 - .../assistants/assistants.usecases.spec.ts | 29 - .../assistants/assistants.usecases.ts | 96 --- cortex-js/src/usecases/chat/chat.module.ts | 25 - .../src/usecases/chat/chat.usecases.spec.ts | 40 -- cortex-js/src/usecases/chat/chat.usecases.ts | 111 ---- .../exception/invalid-api-url.exception.ts | 10 - .../src/usecases/configs/configs.module.ts | 11 - .../src/usecases/configs/configs.usecase.ts | 84 --- .../src/usecases/cortex/cortex.module.ts | 11 - .../usecases/cortex/cortex.usecases.spec.ts | 23 - .../src/usecases/cortex/cortex.usecases.ts | 254 -------- .../src/usecases/engines/engines.module.ts | 22 - .../src/usecases/engines/engines.usecase.ts | 305 ---------- .../src/usecases/messages/messages.module.ts | 11 - .../messages/messages.usecases.spec.ts | 21 - .../usecases/messages/messages.usecases.ts | 70 --- .../src/usecases/models/models.module.ts | 29 - .../usecases/models/models.usecases.spec.ts | 46 -- .../src/usecases/models/models.usecases.ts | 574 ------------------ .../usecases/telemetry/telemetry.module.ts | 20 - .../usecases/telemetry/telemetry.usecases.ts | 235 ------- .../src/usecases/threads/threads.module.ts | 11 - .../usecases/threads/threads.usecases.spec.ts | 20 - .../src/usecases/threads/threads.usecases.ts | 269 -------- cortex-js/src/utils/app-path.ts | 43 -- cortex-js/src/utils/cortex-cpp.ts | 7 - cortex-js/src/utils/cuda.ts | 148 ----- cortex-js/src/utils/download-progress.ts | 69 --- cortex-js/src/utils/health.ts | 36 -- cortex-js/src/utils/huggingface.ts | 200 ------ cortex-js/src/utils/init.ts | 19 - cortex-js/src/utils/logs.ts | 80 --- cortex-js/src/utils/model-check.ts | 63 -- cortex-js/src/utils/model-parameter.parser.ts | 74 --- cortex-js/src/utils/normalize-model-id.ts | 48 -- cortex-js/src/utils/request.ts | 22 - cortex-js/src/utils/system-resource.ts | 16 - cortex-js/src/utils/urls.ts | 25 - cortex-js/test/jest-e2e.json | 13 - cortex-js/test/models.e2e-spec.ts | 24 - cortex-js/tsconfig.build.json | 4 - cortex-js/tsconfig.json | 28 - cortex-js/uninstall.js | 40 -- 230 files changed, 42 insertions(+), 14413 deletions(-) delete mode 100644 cortex-js/.env.development delete mode 100644 cortex-js/.env.example delete mode 100644 cortex-js/.eslintrc.cjs delete mode 100644 cortex-js/.gitignore delete mode 100644 cortex-js/.prettierrc delete mode 100644 cortex-js/CONTRIBUTING.md delete mode 100644 cortex-js/Makefile delete mode 100644 cortex-js/README.md delete mode 100644 cortex-js/control.template delete mode 100644 cortex-js/cortex.ico delete mode 100644 cortex-js/entitlements.plist delete mode 100644 cortex-js/installer.iss delete mode 100644 cortex-js/nest-cli.json delete mode 100644 cortex-js/package.json delete mode 100644 cortex-js/patches/sqlite3+5.1.7.patch delete mode 100644 cortex-js/presets/alpaca.yml delete mode 100644 cortex-js/presets/chatml.yml delete mode 100644 cortex-js/presets/llama3.yml delete mode 100644 cortex-js/presets/vicuna.yml delete mode 100644 cortex-js/src/app.module.ts delete mode 100644 cortex-js/src/app.ts delete mode 100644 cortex-js/src/command.module.ts delete mode 100644 cortex-js/src/command.ts delete mode 100644 cortex-js/src/domain/abstracts/engine.abstract.ts delete mode 100644 cortex-js/src/domain/abstracts/extension.abstract.ts delete mode 100644 cortex-js/src/domain/abstracts/oai.abstract.ts delete mode 100644 cortex-js/src/domain/config/config.interface.ts delete mode 100644 cortex-js/src/domain/models/assistant.interface.ts delete mode 100644 cortex-js/src/domain/models/download.interface.ts delete mode 100644 cortex-js/src/domain/models/huggingface.interface.ts delete mode 100644 cortex-js/src/domain/models/inference-setting.interface.ts delete mode 100644 cortex-js/src/domain/models/message.interface.ts delete mode 100644 cortex-js/src/domain/models/model.event.ts delete mode 100644 cortex-js/src/domain/models/model.interface.ts delete mode 100644 cortex-js/src/domain/models/prompt-template.interface.ts delete mode 100644 cortex-js/src/domain/models/resource.interface.ts delete mode 100644 cortex-js/src/domain/models/thread.interface.ts delete mode 100644 cortex-js/src/domain/repositories/extension.interface.ts delete mode 100644 cortex-js/src/domain/repositories/model.interface.ts delete mode 100644 cortex-js/src/domain/repositories/repository.interface.ts delete mode 100644 cortex-js/src/domain/repositories/telemetry.interface.ts delete mode 100644 cortex-js/src/domain/telemetry/telemetry.interface.ts delete mode 100644 cortex-js/src/extensions/anthropic.engine.ts delete mode 100644 cortex-js/src/extensions/cohere.engine.ts delete mode 100644 cortex-js/src/extensions/extensions.module.ts delete mode 100644 cortex-js/src/extensions/groq.engine.ts delete mode 100644 cortex-js/src/extensions/martian.engine.ts delete mode 100644 cortex-js/src/extensions/mistral.engine.ts delete mode 100644 cortex-js/src/extensions/nvidia.engine.ts delete mode 100644 cortex-js/src/extensions/openai.engine.ts delete mode 100644 cortex-js/src/extensions/openrouter.engine.ts delete mode 100644 cortex-js/src/index.ts delete mode 100644 cortex-js/src/infrastructure/commanders/base.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/benchmark.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/chat.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/cortex-command.commander.ts delete mode 100644 cortex-js/src/infrastructure/commanders/decorators/CommandContext.ts delete mode 100644 cortex-js/src/infrastructure/commanders/embeddings.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/engines.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/engines/engines-get.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/engines/engines-list.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/engines/engines-set.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models/model-get.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models/model-list.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models/model-pull.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models/model-remove.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models/model-start.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models/model-stop.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/models/model-update.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/presets.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/ps.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/questions/init.questions.ts delete mode 100644 cortex-js/src/infrastructure/commanders/run.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/serve-stop.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/services/chat-client.ts delete mode 100644 cortex-js/src/infrastructure/commanders/services/cortex.client.module.ts delete mode 100644 cortex-js/src/infrastructure/commanders/services/cortex.client.ts delete mode 100644 cortex-js/src/infrastructure/commanders/telemetry.command.ts delete mode 100644 cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts delete mode 100644 cortex-js/src/infrastructure/commanders/test/log.service.ts delete mode 100644 cortex-js/src/infrastructure/commanders/test/models.command.spec.ts delete mode 100644 cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts delete mode 100644 cortex-js/src/infrastructure/commanders/types/engine.interface.ts delete mode 100644 cortex-js/src/infrastructure/commanders/types/init-options.interface.ts delete mode 100644 cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts delete mode 100644 cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts delete mode 100644 cortex-js/src/infrastructure/commanders/types/telemetry-options.interface.ts delete mode 100644 cortex-js/src/infrastructure/constants/benchmark.ts delete mode 100644 cortex-js/src/infrastructure/constants/cortex.ts delete mode 100644 cortex-js/src/infrastructure/constants/huggingface.ts delete mode 100644 cortex-js/src/infrastructure/constants/prompt-constants.ts delete mode 100644 cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts delete mode 100644 cortex-js/src/infrastructure/controllers/assistants.controller.ts delete mode 100644 cortex-js/src/infrastructure/controllers/chat.controller.spec.ts delete mode 100644 cortex-js/src/infrastructure/controllers/chat.controller.ts delete mode 100644 cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts delete mode 100644 cortex-js/src/infrastructure/controllers/embeddings.controller.ts delete mode 100644 cortex-js/src/infrastructure/controllers/engines.controller.spec.ts delete mode 100644 cortex-js/src/infrastructure/controllers/engines.controller.ts delete mode 100644 cortex-js/src/infrastructure/controllers/models.controller.spec.ts delete mode 100644 cortex-js/src/infrastructure/controllers/models.controller.ts delete mode 100644 cortex-js/src/infrastructure/controllers/system.controller.ts delete mode 100644 cortex-js/src/infrastructure/controllers/threads.controller.spec.ts delete mode 100644 cortex-js/src/infrastructure/controllers/threads.controller.ts delete mode 100644 cortex-js/src/infrastructure/database/database.module.ts delete mode 100644 cortex-js/src/infrastructure/database/providers/assistant.providers.ts delete mode 100644 cortex-js/src/infrastructure/database/providers/message.providers.ts delete mode 100644 cortex-js/src/infrastructure/database/providers/thread.providers.ts delete mode 100644 cortex-js/src/infrastructure/database/sqlite-database.providers.ts delete mode 100644 cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/assistants/delete-assistant.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/assistants/model-setting.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/chat/chat-completion-response.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/chat/choice.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/chat/message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/chat/usage.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/common/common-response.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/configs/config-update.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/engines/engines.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/messages/delete-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/messages/get-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/messages/list-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/messages/update-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/create-model.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/delete-model.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/download-model.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/list-model-response.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/model.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/models/update-model.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/page.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/delete-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/delete-thread.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/get-thread.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/update-message.dto.ts delete mode 100644 cortex-js/src/infrastructure/dtos/threads/update-thread.dto.ts delete mode 100644 cortex-js/src/infrastructure/entities/assistant.entity.ts delete mode 100644 cortex-js/src/infrastructure/entities/message.entity.ts delete mode 100644 cortex-js/src/infrastructure/entities/thread.entity.ts delete mode 100644 cortex-js/src/infrastructure/exception/duplicate-assistant.exception.ts delete mode 100644 cortex-js/src/infrastructure/exception/global.exception.ts delete mode 100644 cortex-js/src/infrastructure/exception/model-not-found.exception.ts delete mode 100644 cortex-js/src/infrastructure/exception/model-setting-not-found.exception.ts delete mode 100644 cortex-js/src/infrastructure/interceptors/transform.interceptor.ts delete mode 100644 cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts delete mode 100644 cortex-js/src/infrastructure/providers/cortex/cortex.module.ts delete mode 100644 cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts delete mode 100644 cortex-js/src/infrastructure/providers/cortex/llamacpp.provider.ts delete mode 100644 cortex-js/src/infrastructure/providers/cortex/onnx.provider.ts delete mode 100644 cortex-js/src/infrastructure/providers/cortex/tensorrtllm.provider.ts delete mode 100644 cortex-js/src/infrastructure/repositories/extensions/extension.module.ts delete mode 100644 cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts delete mode 100644 cortex-js/src/infrastructure/repositories/models/model.module.ts delete mode 100644 cortex-js/src/infrastructure/repositories/models/model.repository.ts delete mode 100644 cortex-js/src/infrastructure/repositories/telemetry/telemetry.repository.ts delete mode 100644 cortex-js/src/infrastructure/services/context/context.module.ts delete mode 100644 cortex-js/src/infrastructure/services/context/context.service.ts delete mode 100644 cortex-js/src/infrastructure/services/download-manager/download-manager.module.ts delete mode 100644 cortex-js/src/infrastructure/services/download-manager/download-manager.service.spec.ts delete mode 100644 cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts delete mode 100644 cortex-js/src/infrastructure/services/file-manager/file-manager.module.ts delete mode 100644 cortex-js/src/infrastructure/services/file-manager/file-manager.service.spec.ts delete mode 100644 cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts delete mode 100644 cortex-js/src/infrastructure/services/resources-manager/resources-manager.module.ts delete mode 100644 cortex-js/src/infrastructure/services/resources-manager/resources-manager.service.ts delete mode 100644 cortex-js/src/main.ts delete mode 100644 cortex-js/src/usecases/assistants/assistants.module.ts delete mode 100644 cortex-js/src/usecases/assistants/assistants.usecases.spec.ts delete mode 100644 cortex-js/src/usecases/assistants/assistants.usecases.ts delete mode 100644 cortex-js/src/usecases/chat/chat.module.ts delete mode 100644 cortex-js/src/usecases/chat/chat.usecases.spec.ts delete mode 100644 cortex-js/src/usecases/chat/chat.usecases.ts delete mode 100644 cortex-js/src/usecases/chat/exception/invalid-api-url.exception.ts delete mode 100644 cortex-js/src/usecases/configs/configs.module.ts delete mode 100644 cortex-js/src/usecases/configs/configs.usecase.ts delete mode 100644 cortex-js/src/usecases/cortex/cortex.module.ts delete mode 100644 cortex-js/src/usecases/cortex/cortex.usecases.spec.ts delete mode 100644 cortex-js/src/usecases/cortex/cortex.usecases.ts delete mode 100644 cortex-js/src/usecases/engines/engines.module.ts delete mode 100644 cortex-js/src/usecases/engines/engines.usecase.ts delete mode 100644 cortex-js/src/usecases/messages/messages.module.ts delete mode 100644 cortex-js/src/usecases/messages/messages.usecases.spec.ts delete mode 100644 cortex-js/src/usecases/messages/messages.usecases.ts delete mode 100644 cortex-js/src/usecases/models/models.module.ts delete mode 100644 cortex-js/src/usecases/models/models.usecases.spec.ts delete mode 100644 cortex-js/src/usecases/models/models.usecases.ts delete mode 100644 cortex-js/src/usecases/telemetry/telemetry.module.ts delete mode 100644 cortex-js/src/usecases/telemetry/telemetry.usecases.ts delete mode 100644 cortex-js/src/usecases/threads/threads.module.ts delete mode 100644 cortex-js/src/usecases/threads/threads.usecases.spec.ts delete mode 100644 cortex-js/src/usecases/threads/threads.usecases.ts delete mode 100644 cortex-js/src/utils/app-path.ts delete mode 100644 cortex-js/src/utils/cortex-cpp.ts delete mode 100644 cortex-js/src/utils/cuda.ts delete mode 100644 cortex-js/src/utils/download-progress.ts delete mode 100644 cortex-js/src/utils/health.ts delete mode 100644 cortex-js/src/utils/huggingface.ts delete mode 100644 cortex-js/src/utils/init.ts delete mode 100644 cortex-js/src/utils/logs.ts delete mode 100644 cortex-js/src/utils/model-check.ts delete mode 100644 cortex-js/src/utils/model-parameter.parser.ts delete mode 100644 cortex-js/src/utils/normalize-model-id.ts delete mode 100644 cortex-js/src/utils/request.ts delete mode 100644 cortex-js/src/utils/system-resource.ts delete mode 100644 cortex-js/src/utils/urls.ts delete mode 100644 cortex-js/test/jest-e2e.json delete mode 100644 cortex-js/test/models.e2e-spec.ts delete mode 100644 cortex-js/tsconfig.build.json delete mode 100644 cortex-js/tsconfig.json delete mode 100644 cortex-js/uninstall.js diff --git a/.github/workflows/cortex-build.yml b/.github/workflows/cortex-build.yml index 182e445d9..162d30976 100644 --- a/.github/workflows/cortex-build.yml +++ b/.github/workflows/cortex-build.yml @@ -2,8 +2,8 @@ name: CI Cortex Release on: push: - tags: ["v[0-9]+.[0-9]+.[0-9]+", "v[0-9]+.[0-9]+.[0-9]+-*", "!v[0-9]+.[0-9]+.[0-9]+-cortex-js", "!v[0-9]+.[0-9]+.[0-9]+-[0-9]+-cortex-js"] - paths: ["cortex-cpp/**", "cortex-js/**"] + tags: ["v[0-9]+.[0-9]+.[0-9]+", "v[0-9]+.[0-9]+.[0-9]+-*", "!v[0-9]+.[0-9]+.[0-9]+-platform", "!v[0-9]+.[0-9]+.[0-9]+-[0-9]+-platform"] + paths: ["cortex-cpp/**", "platform/**"] workflow_dispatch: jobs: @@ -267,7 +267,7 @@ jobs: registry-url: "https://registry.npmjs.org" - name: "Update version by tag" - working-directory: cortex-js + working-directory: platform shell: bash run: | echo "Version: ${{ needs.create-draft-release.outputs.version }}" @@ -293,7 +293,7 @@ jobs: if: runner.os == 'macOS' - run: yarn install && yarn build:binary - working-directory: ./cortex-js + working-directory: ./platform - name: Get Cer for code signing if: runner.os == 'macOS' @@ -311,13 +311,13 @@ jobs: - name: update app info run: | - cd cortex-js + cd platform make update-app-info - name: copy bin file macos if: runner.os == 'macOS' run: | - cd cortex-js + cd platform mkdir -p installer which cp which mv @@ -328,7 +328,7 @@ jobs: - name: Code Signing macOS if: runner.os == 'macOS' run: | - cd cortex-js + cd platform ./dist/cortexso --help echo "--------" ./cortex --help @@ -347,7 +347,7 @@ jobs: - name: Create MacOS PKG Installer if: runner.os == 'macOS' run: | - cd cortex-js + cd platform echo "--------" npx cpx ./cortex ./installer ./installer/cortex --help @@ -360,7 +360,7 @@ jobs: - name: Create Linux DEB Installer if: runner.os == 'Linux' run: | - cd cortex-js + cd platform mkdir -p cortexso/DEBIAN mkdir -p cortexso/usr/local/bin sed "s/Version:/Version: ${{ needs.create-draft-release.outputs.version }}/g" control.template > cortexso/DEBIAN/control @@ -368,7 +368,7 @@ jobs: dpkg-deb --build cortexso - run: | - cd cortex-js + cd platform set PATH=%PATH%;%USERPROFILE%\.dotnet\tools make codesign-binary CODE_SIGN=true CORTEX_VERSION="0.${{ needs.create-draft-release.outputs.version }}" AZURE_KEY_VAULT_URI="${{ secrets.AZURE_KEY_VAULT_URI }}" AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}" AZURE_TENANT_ID="${{ secrets.AZURE_TENANT_ID }}" AZURE_CLIENT_SECRET="${{ secrets.AZURE_CLIENT_SECRET }}" AZURE_CERT_NAME="${{ secrets.AZURE_CERT_NAME }}" name: Code Signing Windows @@ -379,7 +379,7 @@ jobs: if: runner.os == 'Windows' shell: bash run: | - cd cortex-js + cd platform sed -i "s/AppVersion=1.0/AppVersion=${{ needs.create-draft-release.outputs.version }}/g" installer.iss cat installer.iss @@ -387,11 +387,11 @@ jobs: uses: Minionguyjpro/Inno-Setup-Action@v1.2.2 if: runner.os == 'Windows' with: - path: cortex-js/installer.iss + path: platform/installer.iss options: /O+ - run: | - cd cortex-js + cd platform set PATH=%PATH%;%USERPROFILE%\.dotnet\tools make codesign-installer CODE_SIGN=true CORTEX_VERSION="0.${{ needs.create-draft-release.outputs.version }}" AZURE_KEY_VAULT_URI="${{ secrets.AZURE_KEY_VAULT_URI }}" AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}" AZURE_TENANT_ID="${{ secrets.AZURE_TENANT_ID }}" AZURE_CLIENT_SECRET="${{ secrets.AZURE_CLIENT_SECRET }}" AZURE_CERT_NAME="${{ secrets.AZURE_CERT_NAME }}" name: Code Signing Windows @@ -400,7 +400,7 @@ jobs: - name: Post-Bundle run: | - cd cortex-js + cd platform make postbundle - name: Upload Cortex Installer @@ -410,7 +410,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex-js/cortex-installer.tar.gz + asset_path: ./platform/cortex-installer.tar.gz asset_name: cortex-installer-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.tar.gz asset_content_type: application/gzip @@ -421,7 +421,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex-js/setup.exe + asset_path: ./platform/setup.exe asset_name: cortex-installer-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.exe asset_content_type: application/octet-stream @@ -430,7 +430,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex-js/cortex.tar.gz + asset_path: ./platform/cortex.tar.gz asset_name: cortex-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.tar.gz asset_content_type: application/gzip @@ -440,7 +440,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex-js/cortexso.deb + asset_path: ./platform/cortexso.deb asset_name: cortex-installer-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.deb asset_content_type: application/gzip @@ -450,7 +450,7 @@ jobs: uses: actions/upload-artifact@v2 with: name: cortex-linux - path: ./cortex-js/cortex + path: ./platform/cortex update_release_draft: needs: [build-and-test, build-cortex-single-binary] diff --git a/.github/workflows/cortex-js-openai-coverage.yml b/.github/workflows/cortex-js-openai-coverage.yml index 16f7e24fe..010113f53 100644 --- a/.github/workflows/cortex-js-openai-coverage.yml +++ b/.github/workflows/cortex-js-openai-coverage.yml @@ -36,7 +36,7 @@ jobs: node-version: 20 - run: npm install -g yarn && yarn install && yarn build - working-directory: ./cortex-js + working-directory: ./platform - name: install python venv run: | @@ -46,7 +46,7 @@ jobs: - name: Run test run: | - node cortex-js/dist/src/command.js --address 127.0.0.1 --port 4010 > cortex.log + node platform/dist/src/command.js --address 127.0.0.1 --port 4010 > cortex.log sleep 3 wget --no-verbose -O api.json http://127.0.0.1:4010/api-json cat api.json @@ -108,7 +108,7 @@ jobs: - name: Clean up if: always() run: | - node cortex-js/dist/src/command.js stop + node platform/dist/src/command.js stop rm -rf /tmp/jan rm -rf openai-python rm -rf report.html diff --git a/.github/workflows/cortex-js-quality-gate.yml b/.github/workflows/cortex-js-quality-gate.yml index bac43fe3f..223ad862c 100644 --- a/.github/workflows/cortex-js-quality-gate.yml +++ b/.github/workflows/cortex-js-quality-gate.yml @@ -1,9 +1,9 @@ -name: CI test for cortex-js +name: CI test for platform on: pull_request: paths: - - "cortex-js/**" - - .github/workflows/cortex-js-quality-gate.yml + - "platform/**" + - .github/workflows/platform-quality-gate.yml workflow_dispatch: jobs: build-and-publish-plugins: @@ -19,7 +19,7 @@ jobs: node-version: "20.x" registry-url: "https://registry.npmjs.org" - run: yarn install && yarn build - working-directory: ./cortex-js + working-directory: ./platform - run: yarn test name: run tests - working-directory: ./cortex-js + working-directory: ./platform diff --git a/.github/workflows/cortex-js.yml b/.github/workflows/cortex-js.yml index 99f07bb64..b81351a50 100644 --- a/.github/workflows/cortex-js.yml +++ b/.github/workflows/cortex-js.yml @@ -1,10 +1,10 @@ name: Publish cortex js Package to npmjs on: push: - tags: ["v[0-9]+.[0-9]+.[0-9]+-cortex-js", "v[0-9]+.[0-9]+.[0-9]+-[0-9]+-cortex-js"] + tags: ["v[0-9]+.[0-9]+.[0-9]+-platform", "v[0-9]+.[0-9]+.[0-9]+-[0-9]+-platform"] paths: [ - "cortex-js/**", + "platform/**", ] jobs: build-and-publish-plugins: @@ -19,11 +19,11 @@ jobs: - name: "Update version by tag" run: | - cd cortex-js + cd platform # Remove the v prefix tag_version=${GITHUB_REF#refs/tags/v} - # Remove the -cortex-js suffix - new_version=${tag_version%-cortex-js} + # Remove the -platform suffix + new_version=${tag_version%-platform} # Replace the old version with the new version in package.json jq --arg version "$new_version" '.version = $version' ./package.json > /tmp/package.json && mv /tmp/package.json ./package.json @@ -37,8 +37,8 @@ jobs: node-version: "20.x" registry-url: "https://registry.npmjs.org" - run: yarn install && yarn build - working-directory: ./cortex-js + working-directory: ./platform - run: npm publish --access public env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - working-directory: ./cortex-js + working-directory: ./platform diff --git a/.gitignore b/.gitignore index d3c4ef22b..ac2700de4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -# cortex-js -cortex-js/cortex.db +# platform +platform/cortex.db dist *.lock node_modules @@ -13,8 +13,8 @@ prism.log api.json openai-python/* build -cortex-js/cortex.exe -cortex-js/package-lock.json +platform/cortex.exe +platform/package-lock.json .vscode -cortex-js/command -cortex-js/src/infrastructure/commanders/test/test_data +platform/command +platform/src/infrastructure/commanders/test/test_data diff --git a/README.md b/README.md index 660664159..828d87a3f 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ and the Swagger UI at `http://localhost:1337/api`. To install Cortex from the source, follow the steps below: 1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev). -2. Navigate to the `cortex-js` folder. +2. Navigate to the `platform` folder. 3. Open the terminal and run the following command to build the Cortex project: ```bash @@ -89,7 +89,7 @@ npx nest build 4. Make the `command.js` executable: ```bash -chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js' +chmod +x '[path-to]/cortex/platform/dist/src/command.js' ``` 5. Link the package globally: diff --git a/cortex-js/.env.development b/cortex-js/.env.development deleted file mode 100644 index e69de29bb..000000000 diff --git a/cortex-js/.env.example b/cortex-js/.env.example deleted file mode 100644 index d0666607c..000000000 --- a/cortex-js/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -EXTENSIONS_PATH= -CORTEX_MODELS_DIR= diff --git a/cortex-js/.eslintrc.cjs b/cortex-js/.eslintrc.cjs deleted file mode 100644 index f116c675b..000000000 --- a/cortex-js/.eslintrc.cjs +++ /dev/null @@ -1,46 +0,0 @@ -module.exports = { - parser: '@typescript-eslint/parser', - parserOptions: { - project: 'tsconfig.json', - tsconfigRootDir: __dirname, - sourceType: 'module', - }, - plugins: ['@typescript-eslint/eslint-plugin'], - extends: [ - 'plugin:@typescript-eslint/recommended', - 'plugin:prettier/recommended', - ], - root: true, - env: { - node: true, - jest: true, - }, - ignorePatterns: ['.eslintrc.js'], - rules: { - '@typescript-eslint/interface-name-prefix': 'off', - '@typescript-eslint/explicit-function-return-type': 'off', - '@typescript-eslint/explicit-module-boundary-types': 'off', - '@typescript-eslint/no-explicit-any': 'off', - '@typescript-eslint/no-unused-vars': ['warn'], - '@typescript-eslint/no-floating-promises': 'warn', - '@typescript-eslint/no-var-requires': 'warn', - '@typescript-eslint/ban-types': 'warn', - 'no-unused-vars': 'off', - 'require-await': 'off', - 'prefer-const': 'warn', - 'no-restricted-syntax': [ - 'warn', - { - selector: - 'CallExpression[callee.object.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true])), CallExpression[callee.object.property.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true]))', - message: - 'Add "{ infer: true }" to configService.get() for correct typechecking. Example: configService.get("database.port", { infer: true })', - }, - { - selector: - 'CallExpression[callee.name=it][arguments.0.value!=/^should/]', - message: '"it" should start with "should"', - }, - ], - }, -}; diff --git a/cortex-js/.gitignore b/cortex-js/.gitignore deleted file mode 100644 index 6b6383708..000000000 --- a/cortex-js/.gitignore +++ /dev/null @@ -1,60 +0,0 @@ -# compiled output -/dist -/node_modules -/build - -# Logs -logs -*.log -npm-debug.log* -pnpm-debug.log* -yarn-debug.log* -yarn-error.log* -lerna-debug.log* - -# OS -.DS_Store - -# Tests -/coverage -/.nyc_output - -# IDEs and editors -/.idea -.project -.classpath -.c9/ -*.launch -.settings/ -*.sublime-workspace - -# IDE - VSCode -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json - -# dotenv environment variable files -.env -.env.development.local -.env.test.local -.env.production.local -.env.local - -# temp directory -.temp -.tmp - -# Runtime data -pids -*.pid -*.seed -*.pid.lock - -# Diagnostic reports (https://nodejs.org/api/report.html) -report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json - -# cortex-js -cortex.db -dist diff --git a/cortex-js/.prettierrc b/cortex-js/.prettierrc deleted file mode 100644 index dcb72794f..000000000 --- a/cortex-js/.prettierrc +++ /dev/null @@ -1,4 +0,0 @@ -{ - "singleQuote": true, - "trailingComma": "all" -} \ No newline at end of file diff --git a/cortex-js/CONTRIBUTING.md b/cortex-js/CONTRIBUTING.md deleted file mode 100644 index 53909c391..000000000 --- a/cortex-js/CONTRIBUTING.md +++ /dev/null @@ -1,42 +0,0 @@ -### Repo Structure -``` -# Entity Definitions -domain/ # This is the core directory where the domains are defined. - abstracts/ # Abstract base classes for common attributes and methods. - models/ # Domain interface definitions, e.g. model, assistant. - repositories/ # Extensions abstract and interface - -# Business Rules -usecases/ # Application logic - assistants/ # CRUD logic (invokes dtos, entities). - chat/ # Logic for chat functionalities. - models/ # Logic for model operations. - -# Adapters & Implementations -infrastructure/ # Implementations for Cortex interactions - commanders/ # CLI handlers - models/ - questions/ # CLI installation UX - shortcuts/ # CLI chained syntax - types/ - usecases/ # Invokes UseCases - - controllers/ # Nest controllers and HTTP routes - assistants/ # Invokes UseCases - chat/ # Invokes UseCases - models/ # Invokes UseCases - - database/ # Database providers (mysql, sqlite) - - # Framework specific object definitions - dtos/ # DTO definitions (data transfer & validation) - entities/ # TypeORM entity definitions (db schema) - - # Providers - providers/cortex # Cortex [server] provider (a core extension) - repositories/extensions # Extension provider (core & external extensions) - -extensions/ # External extensions -command.module.ts # CLI Commands List -main.ts # Entrypoint -``` \ No newline at end of file diff --git a/cortex-js/Makefile b/cortex-js/Makefile deleted file mode 100644 index 2d9d7bcec..000000000 --- a/cortex-js/Makefile +++ /dev/null @@ -1,68 +0,0 @@ -CODE_SIGN ?= false -AZURE_KEY_VAULT_URI ?= xxxx -AZURE_CLIENT_ID ?= xxxx -AZURE_TENANT_ID ?= xxxx -AZURE_CLIENT_SECRET ?= xxxx -AZURE_CERT_NAME ?= xxxx -DEVELOPER_ID ?= xxxx -CORTEX_EXE_IN ?= "dist/cortexso.exe" -CORTEX_EXE_OUT ?= "cortex.exe" -CORTEX_VERSION ?= "0.0.0.1" - -update-app-info: -ifeq ($(OS),Windows_NT) - @powershell -Command 'npx resedit --in $(CORTEX_EXE_IN) --out $(CORTEX_EXE_OUT) --icon "1,cortex.ico" --no-grow --company-name "Homebrew Computer Pte Ltd" --file-description "cortex cli" --file-version "$(CORTEX_VERSION)" --internal-name "cortex" --product-name "cortex" --product-version "$(CORTEX_VERSION)"' -else ifeq ($(shell uname -s),Linux) - @cp ./dist/cortexso ./cortex -endif - -codesign-binary: -ifeq ($(CODE_SIGN),false) - @echo "Skipping Code Sign" - @exit 0 -endif - -ifeq ($(OS),Windows_NT) - @powershell -Command "dotnet tool install --global AzureSignTool;" - @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\cortex.exe";' -else ifeq ($(shell uname -s),Linux) - @echo "Skipping Code Sign for linux" - @exit 0 -else - codesign --force -s "$(DEVELOPER_ID)" --options=runtime --entitlements="./entitlements.plist" ./cortex; -endif - -codesign-installer: -ifeq ($(CODE_SIGN),false) - @echo "Skipping Code Sign" - @exit 0 -endif - -ifeq ($(OS),Windows_NT) - @powershell -Command "dotnet tool install --global AzureSignTool;" - @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\setup.exe";' -else ifeq ($(shell uname -s),Linux) - @echo "Skipping Code Sign for linux" - @exit 0 -else - productsign --sign "Developer ID Installer: $(DEVELOPER_ID)" cortex-installer.pkg cortex-installer-signed.pkg; - rm cortex-installer.pkg; - mv cortex-installer-signed.pkg cortex-installer.pkg; -endif - -postbundle: -ifeq ($(RUN_TESTS),false) - @echo "Skipping tests" - @exit 0 -endif -ifeq ($(OS),Windows_NT) - @powershell -Command "7z a -ttar temp.tar cortex.exe; 7z a -tgzip cortex.tar.gz temp.tar;" - @powershell -Command "7z a -ttar temp2.tar setup.exe; 7z a -tgzip cortex-installer.tar.gz temp2.tar;" -else ifeq ($(shell uname -s),Linux) - @chmod +x cortex; \ - tar -czvf cortex.tar.gz cortex; -else - @chmod +x cortex; \ - tar -czvf cortex.tar.gz cortex; \ - tar -czvf cortex-installer.tar.gz cortex-installer.pkg; -endif \ No newline at end of file diff --git a/cortex-js/README.md b/cortex-js/README.md deleted file mode 100644 index 660664159..000000000 --- a/cortex-js/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# Cortex -

- cortex-cpplogo -

- -

- Documentation - API Reference - - Changelog - Bug reports - Discord -

- -> ⚠️ **Cortex is currently in Development**: Expect breaking changes and bugs! - -## About -Cortex is an OpenAI-compatible AI engine that developers can use to build LLM apps. It is packaged with a Docker-inspired command-line interface and client libraries. It can be used as a standalone server or imported as a library. - -## Cortex Engines -Cortex supports the following engines: -- [`cortex.llamacpp`](https://github.com/janhq/cortex.llamacpp): `cortex.llamacpp` library is a C++ inference tool that can be dynamically loaded by any server at runtime. We use this engine to support GGUF inference with GGUF models. The `llama.cpp` is optimized for performance on both CPU and GPU. -- [`cortex.onnx` Repository](https://github.com/janhq/cortex.onnx): `cortex.onnx` is a C++ inference library for Windows that leverages `onnxruntime-genai` and uses DirectML to provide GPU acceleration across a wide range of hardware and drivers, including AMD, Intel, NVIDIA, and Qualcomm GPUs. -- [`cortex.tensorrt-llm`](https://github.com/janhq/cortex.tensorrt-llm): `cortex.tensorrt-llm` is a C++ inference library designed for NVIDIA GPUs. It incorporates NVIDIA’s TensorRT-LLM for GPU-accelerated inference. - -## Quicklinks - -- [Homepage](https://cortex.so/) -- [Docs](https://cortex.so/docs/) - -## Quickstart -### Prerequisites -- **OS**: - - MacOSX 13.6 or higher. - - Windows 10 or higher. - - Ubuntu 22.04 and later. -- **Dependencies**: - - **Node.js**: Version 18 and above is required to run the installation. - - **NPM**: Needed to manage packages. - - **CPU Instruction Sets**: Available for download from the [Cortex GitHub Releases](https://github.com/janhq/cortex/releases) page. - - **OpenMPI**: Required for Linux. Install by using the following command: - ```bash - sudo apt install openmpi-bin libopenmpi-dev - ``` - -> Visit [Quickstart](https://cortex.so/docs/quickstart) to get started. - -### NPM -``` bash -# Install using NPM -npm i -g cortexso -# Run model -cortex run mistral -# To uninstall globally using NPM -npm uninstall -g cortexso -``` - -### Homebrew -``` bash -# Install using Brew -brew install cortexso -# Run model -cortex run mistral -# To uninstall using Brew -brew uninstall cortexso -``` -> You can also install Cortex using the Cortex Installer available on [GitHub Releases](https://github.com/janhq/cortex/releases). - -## Cortex Server -```bash -cortex serve - -# Output -# Started server at http://localhost:1337 -# Swagger UI available at http://localhost:1337/api -``` - -You can now access the Cortex API server at `http://localhost:1337`, -and the Swagger UI at `http://localhost:1337/api`. - -## Build from Source - -To install Cortex from the source, follow the steps below: - -1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev). -2. Navigate to the `cortex-js` folder. -3. Open the terminal and run the following command to build the Cortex project: - -```bash -npx nest build -``` - -4. Make the `command.js` executable: - -```bash -chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js' -``` - -5. Link the package globally: - -```bash -npm link -``` - -## Cortex CLI Commands - -The following CLI commands are currently available. -See [CLI Reference Docs](https://cortex.so/docs/cli) for more information. - -```bash - - serve Providing API endpoint for Cortex backend. - chat Send a chat request to a model. - init|setup Init settings and download cortex's dependencies. - ps Show running models and their status. - kill Kill running cortex processes. - pull|download Download a model. Working with HuggingFace model id. - run [options] EXPERIMENTAL: Shortcut to start a model and chat. - models Subcommands for managing models. - models list List all available models. - models pull Download a specified model. - models remove Delete a specified model. - models get Retrieve the configuration of a specified model. - models start Start a specified model. - models stop Stop a specified model. - models update Update the configuration of a specified model. - benchmark Benchmark and analyze the performance of a specific AI model using your system. - presets Show all the available model presets within Cortex. - telemetry Retrieve telemetry logs for monitoring and analysis. - embeddings Creates an embedding vector representing the input text. - engines Subcommands for managing engines. - engines get Get an engine details. - engines list Get all the available Cortex engines. - engines init Setup and download the required dependencies to run cortex engines. - configs Subcommands for managing configurations. - configs get Get a configuration details. - configs list Get all the available configurations. - configs set Set a configuration. -``` - -## Contact Support -- For support, please file a [GitHub ticket](https://github.com/janhq/cortex/issues/new/choose). -- For questions, join our Discord [here](https://discord.gg/FTk2MvZwJH). -- For long-form inquiries, please email [hello@jan.ai](mailto:hello@jan.ai). - - diff --git a/cortex-js/control.template b/cortex-js/control.template deleted file mode 100644 index 92a6e7a2a..000000000 --- a/cortex-js/control.template +++ /dev/null @@ -1,9 +0,0 @@ -Package: cortexso -Version: -Section: base -Priority: optional -Architecture: amd64 -Depends: openmpi-bin,libopenmpi-dev -Maintainer: Homebrew Pte Ltd -Description: Cortex - Cortex is an OpenAI-compatible AI engine that developers can use to build LLM apps. It is packaged with a Docker-inspired command-line interface and client libraries. It can be used as a standalone server or imported as a library. diff --git a/cortex-js/cortex.ico b/cortex-js/cortex.ico deleted file mode 100644 index d9498120a6bf064f4f83889c9e7918163ce32209..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15086 zcmeHOZA@EL7``1Q{xC7~m&V1UKUgI3tELIT43ipEKp~=Id?^g7d=z80*@clgTm{BZ z93X|ZK={~%jR6ibw(bYu7ElIlLHSz2F=aC50#UM!kCwt-&$;zp+CWKfZ(Az*!`I5Cp~_kI~lv=+S1;#Xdv zIpXn#;%Cc$q$L-=Irt%Ga7lfTpYHYZB=${mo5K_iVF0Y2C&>{qx(M9?>$l8@2lha+l)UCi~g`;0a!MB9Am~2 zx3(XpQVDZ&b6$hNU~nt{&3=}#9e;TK?4EnP{_ON)+y2<F~!L!vYS{sRL8>&6dR@-#a?uJE6jm>_MN`zqDk@q7kw|3c#})q3(b15Ulw^m<$VfZ8Z27yw%bj(2{*K2Fck1lO z)}yBXg|g4#`JKL;!53+}8Rf4$kyCJoO$Yh6MQr=fClSz8_!%98xWk{jZQO1@0fGJ^ znj={A&qnO0`}-phAbtr_V}cKM|MZkRY0;~9nR?3K<~kM)Wp6C}rru#? z&hL2z9&H7z?+pauY^lIfPD_X&K@gCi&CG4cJ?H;MZCQYPcF*^y;MDOnIJQ5=)02SY#Pkns|9 zsh&dq7g|dXKwZ8JrY{$J95|Lti4G?H&-Yh7i#4yxP6k86QMmI{5wv`_&tvF1d6@LO ze{UFBk};x<|K9p9?eZgz-g{j=9vd#%GX867YG88k3gDbCTO`L@^G-~zzrDR3mhTLb zda>=7a#;d-Uzz$}nf((iziU||Eh&z%f2~#vCX>l62oDcu?BCTkzEM3U`fp^%)pJ|P ze`;zfR4NPL(&-WsqL*KEdTf$NB#cs%GQR&aGc$JmPx{err~TK~)&lN3 zRKInl|8c?p1M_`Yxr^pT5W?OGVRZdqS)}X6Mp?CE(*Ek}>!H8DpN3d0cG|yGDg~8F zMI#^}fbsozwU2F7kID6KWXIKWT=$RTFT7UOQ@C1pWWD%f9{a1#P4UXw-m&v}bo@7` zD|-gT!JP}eD!^-ay%GNwp!KuVT4xwI^@OEY^O{*#K1+>NUMu@FtvLRt-b46ua}==V z6IS;MYt{wd7monjU-brn^AtEOAw&?6d-MVQ*NkhMsaG?M=Q5bi0}P+bg@vB7$ie39 z2mNOi&{LP?H2NA8(2VAO(EGVef95FQ`b@@tJSTGz&Do(bCf&CFh@lfqeKi~6*Nf%k zoX&dn$47@@X?k=+Xq!(utnchKIN~3@=Y7Cye0+U<*XZHr=Lg}TZ-FE*z@dN3>?HXd zcvG@=ofBS@fY&3BbyPd7KQ(z1=Kh_6_0}mwL_|Qpz8i|a&39Pe)$1QQmjmf>p-v+! zJ(9lvw>t_K@c$2(wqKl&Sk$YfFjV>UlCDDJ3a)EQ9%tL1QEm+7K9LoMqY_o`KW`r8 AZU6uP diff --git a/cortex-js/entitlements.plist b/cortex-js/entitlements.plist deleted file mode 100644 index e56476ecf..000000000 --- a/cortex-js/entitlements.plist +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - com.apple.security.cs.allow-jit - - com.apple.security.cs.allow-unsigned-executable-memory - - - - com.apple.security.app-sandbox - - com.apple.security.network.client - - com.apple.security.network.server - - com.apple.security.device.audio-input - - com.apple.security.device.microphone - - com.apple.security.device.camera - - com.apple.security.files.user-selected.read-write - - com.apple.security.cs.disable-library-validation - - com.apple.security.cs.allow-dyld-environment-variables - - com.apple.security.cs.allow-executable-memory - - - \ No newline at end of file diff --git a/cortex-js/installer.iss b/cortex-js/installer.iss deleted file mode 100644 index 874e4d79a..000000000 --- a/cortex-js/installer.iss +++ /dev/null @@ -1,86 +0,0 @@ -; Inno Setup Script -; Define the application name, version, and other details -[Setup] -AppName=Cortexso -AppVersion=1.0 -DefaultDirName={pf}\Cortexso -DefaultGroupName=Cortexso -OutputDir=. -OutputBaseFilename=setup -Compression=lzma -SolidCompression=yes -PrivilegesRequired=admin - -; Define the languages section -[Languages] -Name: "english"; MessagesFile: "compiler:Default.isl" - -; Define the files to be installed -[Files] -Source: "cortex.exe"; DestDir: "{app}"; Flags: ignoreversion - -; Define the icons to be created -[Icons] -Name: "{group}\Cortexso"; Filename: "{app}\cortex.exe" - -; Define the run section to execute the application after installation -[Run] -Filename: "cmd"; Parameters: "/c setx PATH ""%PATH%;{app}"""; StatusMsg: "Updating system PATH environment variable..."; Flags: runhidden -Filename: "{app}\cortex.exe"; Description: "{cm:LaunchProgram,Cortexso}"; Flags: nowait postinstall skipifsilent - -; Define the tasks section (optional, for additional tasks like creating desktop icons) -[Tasks] -Name: "desktopicon"; Description: "Create a &desktop icon"; GroupDescription: "Additional icons:"; Flags: unchecked -Name: "quicklaunchicon"; Description: "Create a &Quick Launch icon"; GroupDescription: "Additional icons:"; Flags: unchecked - -; Define icons for the additional tasks -[Icons] -Name: "{commondesktop}\Cortexso"; Filename: "{app}\cortex.exe"; Tasks: desktopicon -Name: "{userappdata}\Microsoft\Internet Explorer\Quick Launch\Cortexso"; Filename: "{app}\cortex.exe"; Tasks: quicklaunchicon - -; Define the uninstall run section to execute commands before uninstallation -[UninstallRun] -Filename: "{app}\cortex.exe"; Parameters: "stop"; StatusMsg: "Stopping Cortexso service..."; Flags: runhidden - -; Use Pascal scripting to delete the directory for all users -[Code] -function GetUsersFolder: String; -var - WinDir: String; -begin - WinDir := ExpandConstant('{win}'); - Result := Copy(WinDir, 1, Pos('\Windows', WinDir) - 1) + '\Users'; -end; - -procedure DeleteUserCortexFolder; -var - UsersFolder: String; - FindRec: TFindRec; -begin - UsersFolder := GetUsersFolder; - if FindFirst(UsersFolder + '\*', FindRec) then - begin - try - repeat - if (FindRec.Attributes and FILE_ATTRIBUTE_DIRECTORY <> 0) and - (FindRec.Name <> '.') and (FindRec.Name <> '..') then - begin - if DirExists(UsersFolder + '\' + FindRec.Name + '\cortex') then - begin - DelTree(UsersFolder + '\' + FindRec.Name + '\cortex', True, True, True); - end; - end; - until not FindNext(FindRec); - finally - FindClose(FindRec); - end; - end; -end; - -procedure CurUninstallStepChanged(CurUninstallStep: TUninstallStep); -begin - if CurUninstallStep = usPostUninstall then - begin - DeleteUserCortexFolder; - end; -end; diff --git a/cortex-js/nest-cli.json b/cortex-js/nest-cli.json deleted file mode 100644 index f5e93169b..000000000 --- a/cortex-js/nest-cli.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "$schema": "https://json.schemastore.org/nest-cli", - "collection": "@nestjs/schematics", - "sourceRoot": "src", - "compilerOptions": { - "deleteOutDir": true, - "plugins": [ - { - "name": "@nestjs/swagger", - "options": { - "classValidatorShim": true, - "introspectComments": true - } - } - ] - } -} diff --git a/cortex-js/package.json b/cortex-js/package.json deleted file mode 100644 index 3cabf3381..000000000 --- a/cortex-js/package.json +++ /dev/null @@ -1,158 +0,0 @@ -{ - "name": "cortexso", - "version": "0.0.1", - "description": "Cortex is an openAI-compatible local AI server that developers can use to build LLM apps. It is packaged with a Docker-inspired command-line interface and a Typescript client library. It can be used as a standalone server, or imported as a library.", - "author": "Jan ", - "license": "AGPL-3.0", - "homepage": "https://github.com/janhq/cortex", - "bin": { - "cortex": "./dist/src/command.js" - }, - "main": "dist/src/index.js", - "types": "dist/src/index.d.ts", - "scripts": { - "dev": "nest dev", - "compile": "npx ncc build ./dist/src/command.js -o command", - "build": "yarn add sqlite3 --build-from-source && nest build", - "build:binary": "yarn build && yarn compile && npx -q patch-package && run-script-os", - "build:binary:windows": "npx @yao-pkg/pkg . --targets node20-win", - "build:binary:linux": "npx @yao-pkg/pkg . --targets node20-linux", - "build:binary:macos": "npx @yao-pkg/pkg . --targets node20-macos", - "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"", - "build:extensions": "run-script-os", - "build:extensions:windows": "powershell -command \"$jobs = Get-ChildItem -Path './src/extensions' -Directory | ForEach-Object { Start-Job -Name ($_.Name) -ScriptBlock { param($_dir); try { Set-Location $_dir; yarn; yarn build; Write-Output 'Build successful in ' + $_dir } catch { Write-Error 'Error in ' + $_dir; throw } } -ArgumentList $_.FullName }; $jobs | Wait-Job; $jobs | ForEach-Object { Receive-Job -Job $_ -Keep } | ForEach-Object { Write-Host $_ }; $failed = $jobs | Where-Object { $_.State -ne 'Completed' -or $_.ChildJobs[0].JobStateInfo.State -ne 'Completed' }; if ($failed) { Exit 1 }\"", - "build:extensions:linux": "for dir in ./src/extensions/*/; do (cd \"$dir\" && yarn && yarn build); done", - "build:extensions:macos": "for dir in ./src/extensions/*/; do (cd \"$dir\" && yarn && yarn build); done", - "start": "nest start", - "start:dev": "nest start --watch", - "start:debug": "nest start --debug --watch", - "start:prod": "node dist/src/main --trace-deprecation", - "lint": "eslint \"{src,apps,libs,test}/**/*.ts\"", - "test": "jest", - "test:watch": "jest --watch", - "test:cov": "jest --coverage", - "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand", - "test:e2e": "jest --config ./test/jest-e2e.json", - "typeorm": "typeorm-ts-node-esm", - "build:dev": "yarn build && run-script-os && npm link", - "build:dev:windows": "echo 'Windows build complete'", - "build:dev:macos": "chmod +x ./dist/src/command.js", - "build:dev:linux": "chmod +x ./dist/src/command.js", - "preuninstall": "node ./uninstall.js" - }, - "dependencies": { - "@cortexso/cortex.js": "^0.1.7", - "@nestjs/axios": "^3.0.2", - "@nestjs/common": "^10.0.0", - "@nestjs/config": "^3.2.2", - "@nestjs/core": "^10.0.0", - "@nestjs/devtools-integration": "^0.1.6", - "@nestjs/event-emitter": "^2.0.4", - "@nestjs/mapped-types": "*", - "@nestjs/platform-express": "^10.0.0", - "@nestjs/sequelize": "^10.0.1", - "@nestjs/swagger": "^7.3.1", - "@terascope/fetch-github-release": "^0.8.8", - "@types/node-os-utils": "^1.3.4", - "axios": "^1.6.8", - "class-transformer": "^0.5.1", - "class-validator": "^0.14.1", - "cli-progress": "^3.12.0", - "cortex-cpp": "0.5.0-46", - "decompress": "^4.2.1", - "hyllama": "^0.2.2", - "js-yaml": "^4.1.0", - "nest-commander": "^3.13.0", - "node-os-utils": "^1.3.7", - "ora": "5.4.1", - "readline": "^1.3.0", - "reflect-metadata": "^0.2.0", - "rxjs": "^7.8.1", - "sequelize": "^6.37.3", - "sequelize-typescript": "^2.1.6", - "sqlite3": "^5.1.7", - "systeminformation": "^5.23.4", - "ulid": "^2.3.0", - "uuid": "^9.0.1", - "whatwg-url": "^14.0.0", - "yaml": "^2.4.2" - }, - "devDependencies": { - "@nestjs/cli": "^10.0.0", - "@nestjs/schematics": "^10.0.0", - "@nestjs/testing": "^10.0.0", - "@types/cli-progress": "^3.11.5", - "@types/decompress": "^4.2.7", - "@types/express": "^4.17.17", - "@types/jest": "^29.5.2", - "@types/js-yaml": "^4.0.9", - "@types/node": "^20.12.9", - "@types/sequelize": "^4.28.20", - "@types/supertest": "^6.0.2", - "@types/update-notifier": "^6.0.8", - "@types/uuid": "^9.0.8", - "@typescript-eslint/eslint-plugin": "7.16.1", - "@typescript-eslint/parser": "7.16.1", - "@vercel/ncc": "^0.38.0", - "@yao-pkg/pkg": "^5.12.0", - "cpx": "^1.5.0", - "env-cmd": "10.1.0", - "eslint": "8.57.0", - "eslint-config-prettier": "9.1.0", - "eslint-plugin-import": "2.29.1", - "eslint-plugin-prettier": "5.2.1", - "hanbi": "^1.0.3", - "is-primitive": "^3.0.1", - "jest": "^29.5.0", - "nest-commander-testing": "^3.3.0", - "node-gyp": "^10.1.0", - "patch-package": "^8.0.0", - "prettier": "^3.0.0", - "resedit-cli": "^2.0.0", - "run-script-os": "^1.1.6", - "source-map-support": "^0.5.21", - "supertest": "^6.3.3", - "ts-jest": "^29.1.0", - "ts-loader": "^9.4.3", - "tsconfig-paths": "^4.2.0", - "typescript": "^5.1.3" - }, - "files": [ - "dist" - ], - "jest": { - "moduleFileExtensions": [ - "js", - "json", - "ts" - ], - "rootDir": "src", - "testRegex": ".*\\.spec\\.ts$", - "transform": { - "^.+\\.(t|j)s$": "ts-jest" - }, - "collectCoverageFrom": [ - "**/*.(t|j)s" - ], - "coverageDirectory": "../coverage", - "testEnvironment": "node", - "moduleNameMapper": { - "@/(.*)$": "/$1", - "@commanders/(.*)$": "/../src/infrastructure/commanders/$1" - } - }, - "pkg": { - "scripts": "command/**/*.js", - "assets": [ - "command/*.node", - "**/package.json", - "node_modules/axios/**/*", - "node_modules/swagger-ui-dist/", - "**/main.js", - "node_modules/cortex-cpp/**/*", - "dist/src/utils/cortex-cpp/**/*", - "**/cortex-cpp.js" - ], - "outputPath": "dist" - } -} diff --git a/cortex-js/patches/sqlite3+5.1.7.patch b/cortex-js/patches/sqlite3+5.1.7.patch deleted file mode 100644 index fc72a4d4d..000000000 --- a/cortex-js/patches/sqlite3+5.1.7.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/node_modules/sqlite3/.DS_Store b/node_modules/sqlite3/.DS_Store -new file mode 100644 -index 0000000..10f4d43 -Binary files /dev/null and b/node_modules/sqlite3/.DS_Store differ -diff --git a/node_modules/sqlite3/lib/sqlite3.js b/node_modules/sqlite3/lib/sqlite3.js -index 430a2b8..9df0857 100644 ---- a/node_modules/sqlite3/lib/sqlite3.js -+++ b/node_modules/sqlite3/lib/sqlite3.js -@@ -1,5 +1,5 @@ - const path = require('path'); --const sqlite3 = require('./sqlite3-binding.js'); -+const sqlite3 = require('./../build/Release/node_sqlite3.node'); - const EventEmitter = require('events').EventEmitter; - module.exports = exports = sqlite3; - diff --git a/cortex-js/presets/alpaca.yml b/cortex-js/presets/alpaca.yml deleted file mode 100644 index 4d9a0f9b2..000000000 --- a/cortex-js/presets/alpaca.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 1.0 -# Model Settings -prompt_template: |+ - {system_message} - ### Instruction: {prompt} - ### Response: -ctx_len: 2048 - -# Results Preferences -stop: - - -temperature: 0.7 -top_p: 0.95 -max_tokens: 2048 -frequency_penalty: 0 -presence_penalty: 0 diff --git a/cortex-js/presets/chatml.yml b/cortex-js/presets/chatml.yml deleted file mode 100644 index 88b525ef0..000000000 --- a/cortex-js/presets/chatml.yml +++ /dev/null @@ -1,18 +0,0 @@ -version: 1.0 -# Model Settings -prompt_template: |+ - <|im_start|>system - {system_message}<|im_end|> - <|im_start|>user - {prompt}<|im_end|> - <|im_start|>assistant -ctx_len: 2048 - -# Results Preferences -stop: - - <|im_end|> -temperature: 0.7 -top_p: 0.95 -max_tokens: 2048 -frequency_penalty: 0 -presence_penalty: 0 diff --git a/cortex-js/presets/llama3.yml b/cortex-js/presets/llama3.yml deleted file mode 100644 index 92f2cb677..000000000 --- a/cortex-js/presets/llama3.yml +++ /dev/null @@ -1,19 +0,0 @@ -version: 1.0 -# Model Settings -prompt_template: |+ - <|begin_of_text|><|start_header_id|>system<|end_header_id|> - - {system_message}<|eot_id|><|start_header_id|>user<|end_header_id|> - - {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|> -ctx_len: 2048 - -# Results Preferences -stop: - - <|eot_id|> - - <|<|end_header_id|>|> -temperature: 0.7 -top_p: 0.95 -max_tokens: 2048 -frequency_penalty: 0 -presence_penalty: 0 diff --git a/cortex-js/presets/vicuna.yml b/cortex-js/presets/vicuna.yml deleted file mode 100644 index b08495c6d..000000000 --- a/cortex-js/presets/vicuna.yml +++ /dev/null @@ -1,20 +0,0 @@ -version: 1.0 -# Model Settings -prompt_template: |- - <|begin_of_sentence|>{system_prompt} - - User: {prompt} - - Assistant: - -ctx_len: 2048 - -# Results Preferences -stop: - - - - end_of_sentence -temperature: 0.7 -top_p: 0.95 -max_tokens: 2048 -frequency_penalty: 0 -presence_penalty: 0 diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts deleted file mode 100644 index e7f8d6698..000000000 --- a/cortex-js/src/app.module.ts +++ /dev/null @@ -1,77 +0,0 @@ -import { MiddlewareConsumer, Module, NestModule } from '@nestjs/common'; -import { MessagesModule } from './usecases/messages/messages.module'; -import { ThreadsModule } from './usecases/threads/threads.module'; -import { ModelsModule } from './usecases/models/models.module'; -import { DatabaseModule } from './infrastructure/database/database.module'; -import { ChatModule } from './usecases/chat/chat.module'; -import { AssistantsModule } from './usecases/assistants/assistants.module'; -import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; -import { CortexModule } from './usecases/cortex/cortex.module'; -import { ConfigModule } from '@nestjs/config'; -import { env } from 'node:process'; -import { FileManagerModule } from './infrastructure/services/file-manager/file-manager.module'; -import { AppLoggerMiddleware } from './infrastructure/middlewares/app.logger.middleware'; -import { TelemetryModule } from './usecases/telemetry/telemetry.module'; -import { APP_FILTER } from '@nestjs/core'; -import { GlobalExceptionFilter } from './infrastructure/exception/global.exception'; -import { EventEmitterModule } from '@nestjs/event-emitter'; -import { AssistantsController } from './infrastructure/controllers/assistants.controller'; -import { ChatController } from './infrastructure/controllers/chat.controller'; -import { EmbeddingsController } from './infrastructure/controllers/embeddings.controller'; -import { ModelsController } from './infrastructure/controllers/models.controller'; -import { ThreadsController } from './infrastructure/controllers/threads.controller'; -import { SystemController } from './infrastructure/controllers/system.controller'; -import { DownloadManagerModule } from './infrastructure/services/download-manager/download-manager.module'; -import { ContextModule } from './infrastructure/services/context/context.module'; -import { ExtensionsModule } from './extensions/extensions.module'; -import { ConfigsModule } from './usecases/configs/configs.module'; -import { EnginesModule } from './usecases/engines/engines.module'; -import { EnginesController } from './infrastructure/controllers/engines.controller'; -import { ResourceManagerModule } from './infrastructure/services/resources-manager/resources-manager.module'; - -@Module({ - imports: [ - ConfigModule.forRoot({ - isGlobal: true, - envFilePath: env.NODE_ENV !== 'production' ? '.env.development' : '.env', - }), - EventEmitterModule.forRoot(), - DownloadManagerModule, - DatabaseModule, - MessagesModule, - ThreadsModule, - ModelsModule, - ChatModule, - AssistantsModule, - CortexModule, - ExtensionModule, - FileManagerModule, - TelemetryModule, - ContextModule, - DownloadManagerModule, - ExtensionsModule, - ConfigsModule, - EnginesModule, - ResourceManagerModule, - ], - controllers: [ - AssistantsController, - ChatController, - EmbeddingsController, - ModelsController, - ThreadsController, - SystemController, - EnginesController, - ], - providers: [ - { - provide: APP_FILTER, - useClass: GlobalExceptionFilter, - }, - ], -}) -export class AppModule implements NestModule { - configure(consumer: MiddlewareConsumer): void { - consumer.apply(AppLoggerMiddleware).forRoutes('*'); - } -} diff --git a/cortex-js/src/app.ts b/cortex-js/src/app.ts deleted file mode 100644 index 53104b886..000000000 --- a/cortex-js/src/app.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { NestFactory } from '@nestjs/core'; -import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger'; -import { AppModule } from './app.module'; -import { fileManagerService } from './infrastructure/services/file-manager/file-manager.service'; -import { ValidationPipe } from '@nestjs/common'; -import { TelemetryUsecases } from './usecases/telemetry/telemetry.usecases'; -import { cleanLogs } from './utils/logs'; -export const getApp = async (host?: string, port?: number) => { - const app = await NestFactory.create(AppModule, { - snapshot: true, - cors: true, - logger: console, - }); - - // Set the global prefix for the API /v1/ - app.setGlobalPrefix('v1'); - - await fileManagerService.getConfig(); - - const telemetryService = await app.resolve(TelemetryUsecases); - await telemetryService.initInterval(); - - app.useGlobalPipes( - new ValidationPipe({ - transform: true, - enableDebugMessages: true, - }), - ); - - cleanLogs(); - const config = new DocumentBuilder() - .setTitle('Cortex API') - .setDescription( - 'Cortex API provides a command-line interface (CLI) for seamless interaction with Large Language Models (LLMs). It is fully compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference) and enables straightforward command execution and management of LLM interactions.', - ) - .setVersion('1.0') - .addTag( - 'Inference', - 'This endpoint initiates interaction with a Large Language Models (LLM).', - ) - .addTag( - 'Assistants', - 'These endpoints manage the lifecycle of an Assistant within a conversation thread.', - ) - .addTag( - 'Models', - 'These endpoints provide a list and descriptions of all available models within the Cortex framework.', - ) - .addTag( - 'Messages', - 'These endpoints manage the retrieval and storage of conversation content, including responses from LLMs and other metadata related to chat interactions.', - ) - .addTag( - 'Threads', - 'These endpoints handle the creation, retrieval, updating, and deletion of conversation threads.', - ) - .addTag( - 'Embeddings', - 'Endpoint for creating and retrieving embedding vectors from text inputs using specified models.', - ) - .addTag( - 'Engines', - 'Endpoints for managing the available engines within Cortex.', - ) - .addTag( - 'System', - 'Endpoints for stopping the Cortex API server, checking its status, and fetching system events.', - ) - .addServer(`http://${host || '127.0.0.1'}:${port || 1337}`) - .build(); - const document = SwaggerModule.createDocument(app, config); - - SwaggerModule.setup('api', app, document); - return app; -}; diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts deleted file mode 100644 index 242136d07..000000000 --- a/cortex-js/src/command.module.ts +++ /dev/null @@ -1,83 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ConfigModule } from '@nestjs/config'; -import { CortexModule } from './usecases/cortex/cortex.module'; -import { ModelsCommand } from './infrastructure/commanders/models.command'; -import { HttpModule } from '@nestjs/axios'; -import { InitRunModeQuestions } from './infrastructure/commanders/questions/init.questions'; -import { ModelListCommand } from './infrastructure/commanders/models/model-list.command'; -import { ModelPullCommand } from './infrastructure/commanders/models/model-pull.command'; -import { CortexCommand } from './infrastructure/commanders/cortex-command.commander'; -import { ChatCommand } from './infrastructure/commanders/chat.command'; -import { ModelStartCommand } from './infrastructure/commanders/models/model-start.command'; -import { ModelStopCommand } from './infrastructure/commanders/models/model-stop.command'; -import { ModelGetCommand } from './infrastructure/commanders/models/model-get.command'; -import { ModelRemoveCommand } from './infrastructure/commanders/models/model-remove.command'; -import { RunCommand } from './infrastructure/commanders/run.command'; -import { ModelUpdateCommand } from './infrastructure/commanders/models/model-update.command'; -import { FileManagerModule } from './infrastructure/services/file-manager/file-manager.module'; -import { PSCommand } from './infrastructure/commanders/ps.command'; -import { PresetCommand } from './infrastructure/commanders/presets.command'; -import { TelemetryModule } from './usecases/telemetry/telemetry.module'; -import { TelemetryCommand } from './infrastructure/commanders/telemetry.command'; -import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command'; -import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command'; -import { ServeStopCommand } from './infrastructure/commanders/serve-stop.command'; -import { ContextModule } from './infrastructure/services/context/context.module'; -import { EnginesCommand } from './infrastructure/commanders/engines.command'; -import { EnginesListCommand } from './infrastructure/commanders/engines/engines-list.command'; -import { EnginesGetCommand } from './infrastructure/commanders/engines/engines-get.command'; -import { EnginesInitCommand } from './infrastructure/commanders/engines/engines-init.command'; -import { EnginesSetCommand } from './infrastructure/commanders/engines/engines-set.command'; - -@Module({ - imports: [ - ConfigModule.forRoot({ - isGlobal: true, - envFilePath: - process.env.NODE_ENV !== 'production' ? '.env.development' : '.env', - }), - CortexModule, - HttpModule, - FileManagerModule, - TelemetryModule, - ContextModule, - ], - providers: [ - CortexCommand, - ModelsCommand, - ChatCommand, - PSCommand, - PresetCommand, - EmbeddingCommand, - BenchmarkCommand, - EnginesCommand, - - // Questions - InitRunModeQuestions, - - // Model commands - ModelStartCommand, - ModelStopCommand, - ModelListCommand, - ModelGetCommand, - ModelRemoveCommand, - ModelPullCommand, - ModelUpdateCommand, - - // Shortcuts - RunCommand, - - // Telemetry - TelemetryCommand, - - // Serve - ServeStopCommand, - - // Engines - EnginesListCommand, - EnginesGetCommand, - EnginesInitCommand, - EnginesSetCommand, - ], -}) -export class CommandModule {} diff --git a/cortex-js/src/command.ts b/cortex-js/src/command.ts deleted file mode 100644 index 37aa2a77a..000000000 --- a/cortex-js/src/command.ts +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env node -import ora from 'ora'; -const dependenciesSpinner = ora('Loading dependencies...').start(); -const time = Date.now(); -import { CommandFactory } from 'nest-commander'; -import { CommandModule } from './command.module'; -import { TelemetryUsecases } from './usecases/telemetry/telemetry.usecases'; -import { TelemetrySource } from './domain/telemetry/telemetry.interface'; -import { ContextService } from '@/infrastructure/services/context/context.service'; - -dependenciesSpinner.succeed( - 'Dependencies loaded in ' + (Date.now() - time) + 'ms', -); - -process.removeAllListeners('warning'); -process.title = 'Cortex CLI Command Process'; - -async function bootstrap() { - let telemetryUseCase: TelemetryUsecases | null = null; - let contextService: ContextService | null = null; - const app = await CommandFactory.createWithoutRunning(CommandModule, { - logger: ['warn', 'error'], - enablePositionalOptions: true, - errorHandler: async (error) => { - await telemetryUseCase!.createCrashReport(error, TelemetrySource.CLI); - console.error(error); - process.exit(1); - }, - serviceErrorHandler: async (error) => { - await telemetryUseCase!.createCrashReport(error, TelemetrySource.CLI); - console.error(error); - process.exit(1); - }, - }); - - telemetryUseCase = await app.resolve(TelemetryUsecases); - contextService = await app.resolve(ContextService); - - const anonymousData = await telemetryUseCase!.updateAnonymousData(); - - await contextService!.init(async () => { - contextService!.set('source', TelemetrySource.CLI); - contextService!.set('sessionId', anonymousData?.sessionId); - telemetryUseCase!.sendActivationEvent(TelemetrySource.CLI); - telemetryUseCase!.sendCrashReport(); - await CommandFactory.runApplication(app); - }); -} - -bootstrap(); diff --git a/cortex-js/src/domain/abstracts/engine.abstract.ts b/cortex-js/src/domain/abstracts/engine.abstract.ts deleted file mode 100644 index fbd457e0d..000000000 --- a/cortex-js/src/domain/abstracts/engine.abstract.ts +++ /dev/null @@ -1,65 +0,0 @@ -/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ -import stream from 'stream'; -import { Model, ModelSettingParams } from '../../domain/models/model.interface'; -import { Extension } from './extension.abstract'; - -export enum EngineStatus { - READY = 'READY', - MISSING_CONFIGURATION = 'MISSING_CONFIGURATION', - NOT_INITIALIZED = 'NOT_INITIALIZED', - NOT_SUPPORTED = 'NOT_SUPPORTED', - ERROR = 'ERROR', -} - -/** - * This class should be extended by any class that represents an engine extension. - * It provides methods for loading and unloading models, and for making inference requests. - */ -export abstract class EngineExtension extends Extension { - abstract onLoad(): void; - - transformPayload?: Function; - - transformResponse?: Function; - - status: EngineStatus = EngineStatus.READY; - - /** - * Makes an inference request to the engine. - * @param dto - * @param headers - */ - abstract inference( - dto: any, - headers: Record, - ): Promise; - - /** - * Checks if a model is running by the engine - * This method should check run-time status of the model - * Since the model can be corrupted during the run-time - * This method should return false if the model is not running - * @param modelId - */ - async isModelRunning(modelId: string): Promise { - return true; - } - - /** - * Loads a model into the engine. - * There are model settings such as `ngl` and `ctx_len` that can be passed to the engine. - * Applicable for local engines only - * @param model - * @param settingParams - */ - async loadModel( - model: Model, - settingParams?: ModelSettingParams, - ): Promise {} - - /** - * Unloads a model from the engine. - * @param modelId - */ - async unloadModel(modelId: string, engine?: string): Promise {} -} diff --git a/cortex-js/src/domain/abstracts/extension.abstract.ts b/cortex-js/src/domain/abstracts/extension.abstract.ts deleted file mode 100644 index ce62df74c..000000000 --- a/cortex-js/src/domain/abstracts/extension.abstract.ts +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Represents a base extension. - * This class should be extended by any class that represents an extension. - */ -export abstract class Extension { - /** @type {string} Name of the extension. */ - name: string; - - /** @type {string} Product Name of the extension. */ - productName?: string; - - /** @type {string} The URL of the extension to load. */ - url?: string; - - /** @type {boolean} Whether the extension is activated or not. */ - active?: boolean; - - /** @type {string} Extension's description. */ - description?: string; - - /** @type {string} Extension's version. */ - version?: string; - - /** @type {string} The status of the extension. */ - status: string; - - /** - * Called when the extension is loaded. - * Any initialization logic for the extension should be put here. - */ - onLoad(): void {} - - /** - * Called when the extension is unloaded. - * Any cleanup logic for the extension should be put here. - */ - onUnload(): void {} -} diff --git a/cortex-js/src/domain/abstracts/oai.abstract.ts b/cortex-js/src/domain/abstracts/oai.abstract.ts deleted file mode 100644 index 09b2fbbf1..000000000 --- a/cortex-js/src/domain/abstracts/oai.abstract.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { EngineExtension } from './engine.abstract'; -import stream, { Transform } from 'stream'; -import { firstValueFrom } from 'rxjs'; -import { omit } from 'lodash'; - -export abstract class OAIEngineExtension extends EngineExtension { - abstract apiUrl: string; - abstract apiKey?: string; - - constructor(protected readonly httpService: HttpService) { - super(); - } - - override onLoad(): void {} - - override async inference( - createChatDto: any, - headers: Record, - ): Promise { - const payload = this.transformPayload - ? this.transformPayload(createChatDto) - : createChatDto; - const { stream: isStream } = payload; - const additionalHeaders = omit(headers, [ - 'content-type', - 'authorization', - 'content-length', - ]); - const response = await firstValueFrom( - this.httpService.post(this.apiUrl, payload, { - headers: { - 'Content-Type': headers['content-type'] ?? 'application/json', - Authorization: this.apiKey - ? `Bearer ${this.apiKey}` - : headers['authorization'], - ...additionalHeaders, - }, - responseType: isStream ? 'stream' : 'json', - }), - ); - - if (!response) { - throw new Error('No response'); - } - if (!this.transformResponse) { - return response.data; - } - if (isStream) { - const transformResponse = this.transformResponse.bind(this); - const lineStream = new Transform({ - transform(chunk, encoding, callback) { - const lines = chunk.toString().split('\n'); - const transformedLines = []; - for (const line of lines) { - if (line.trim().length > 0) { - const transformedLine = transformResponse(line, true); - if (transformedLine) { - transformedLines.push(`data: ${transformedLine}\n\n`); - } - } - } - callback(null, transformedLines.join('')); - }, - }); - return response.data.pipe(lineStream); - } - return this.transformResponse(response.data, false); - } -} diff --git a/cortex-js/src/domain/config/config.interface.ts b/cortex-js/src/domain/config/config.interface.ts deleted file mode 100644 index b99d76600..000000000 --- a/cortex-js/src/domain/config/config.interface.ts +++ /dev/null @@ -1,8 +0,0 @@ -export interface Config { - dataFolderPath: string; - cortexCppHost: string; - cortexCppPort: number; - // todo: will remove optional when all command request api server - apiServerPort?: number; - apiServerHost?: string; -} diff --git a/cortex-js/src/domain/models/assistant.interface.ts b/cortex-js/src/domain/models/assistant.interface.ts deleted file mode 100644 index d49eb1362..000000000 --- a/cortex-js/src/domain/models/assistant.interface.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { Cortex } from '@cortexso/cortex.js'; -// TODO: Why we need to alias these? - -export interface Assistant extends Cortex.Beta.Assistant { - avatar?: string; -} - -export type AssistantResponseFormatOption = - Cortex.Beta.Threads.AssistantResponseFormatOption; - -export interface AssistantToolResources - extends Cortex.Beta.Assistant.ToolResources {} diff --git a/cortex-js/src/domain/models/download.interface.ts b/cortex-js/src/domain/models/download.interface.ts deleted file mode 100644 index 8075f25f5..000000000 --- a/cortex-js/src/domain/models/download.interface.ts +++ /dev/null @@ -1,80 +0,0 @@ -export class DownloadState { - /** - * The id of a particular download. Being used to prevent duplication of downloads. - */ - id: string; - - /** - * For displaying purposes. - */ - title: string; - - /** - * The type of download. - */ - type: DownloadType; - - /** - * The status of the download. - */ - status: DownloadStatus; - - /** - * Explanation of the error if the download failed. - */ - error?: string; - - /** - * The progress of the download. It is a number between 0 and 100. - */ - progress: number; - - /** - * The actual downloads. [DownloadState] is just a group to supporting for download multiple files. - */ - children: DownloadItem[]; -} - -export enum DownloadStatus { - Pending = 'pending', - Downloading = 'downloading', - Error = 'error', - Downloaded = 'downloaded', -} - -export class DownloadItem { - /** - * Filename of the download. - */ - id: string; - - time: { - elapsed: number; - remaining: number; - }; - - size: { - total: number; - transferred: number; - }; - - progress: number; - - checksum?: string; - - status: DownloadStatus; - - error?: string; - - metadata?: Record; -} - -export interface DownloadStateEvent { - data: DownloadState[]; -} - -export enum DownloadType { - Model = 'model', - Miscelanous = 'miscelanous', - Engine = 'engine', -} diff --git a/cortex-js/src/domain/models/huggingface.interface.ts b/cortex-js/src/domain/models/huggingface.interface.ts deleted file mode 100644 index 2c2274b3e..000000000 --- a/cortex-js/src/domain/models/huggingface.interface.ts +++ /dev/null @@ -1,74 +0,0 @@ -export interface HuggingFaceModelVersion { - rfilename: string; - downloadUrl?: string; - fileSize?: number; - quantization?: Quantization; -} - -export interface HuggingFaceRepoSibling { - rfilename: string; - downloadUrl?: string; - fileSize?: number; - quantization?: Quantization; - lfs?: { oid?: string }; -} -export interface HuggingFaceRepoData { - id: string; - modelId: string; - modelUrl?: string; - author: string; - sha: string; - downloads: number; - lastModified: string; - private: boolean; - disabled: boolean; - gated: boolean; - pipeline_tag: 'text-generation'; - tags: Array<'transformers' | 'pytorch' | 'safetensors' | string>; - cardData: Record; - siblings: HuggingFaceRepoSibling[]; - createdAt: string; -} - -const CardDataKeys = [ - 'base_model', - 'datasets', - 'inference', - 'language', - 'library_name', - 'license', - 'model_creator', - 'model_name', - 'model_type', - 'pipeline_tag', - 'prompt_template', - 'quantized_by', - 'tags', -] as const; -export type CardDataKeysTuple = typeof CardDataKeys; -export type CardDataKeys = CardDataKeysTuple[number]; - -export const AllQuantizations = [ - 'Q3_K_S', - 'Q3_K_M', - 'Q3_K_L', - 'Q4_K_S', - 'Q4_K_M', - 'Q5_K_S', - 'Q5_K_M', - 'Q4_0', - 'Q4_1', - 'Q5_0', - 'Q5_1', - 'IQ2_XXS', - 'IQ2_XS', - 'Q2_K', - 'Q2_K_S', - 'Q6_K', - 'Q8_0', - 'F16', - 'F32', - 'COPY', -]; -export type QuantizationsTuple = typeof AllQuantizations; -export type Quantization = QuantizationsTuple[number]; diff --git a/cortex-js/src/domain/models/inference-setting.interface.ts b/cortex-js/src/domain/models/inference-setting.interface.ts deleted file mode 100644 index 4a7a30c44..000000000 --- a/cortex-js/src/domain/models/inference-setting.interface.ts +++ /dev/null @@ -1,20 +0,0 @@ -export interface InferenceSetting { - inferenceId: string; - - settings: InferenceSettingDocument[]; -} - -export interface InferenceSettingDocument { - key: string; - extensionName: string; - title: string; - description: string; - controllerType: string; - controllerProps: ControllerProps; -} - -export interface ControllerProps { - placeholder: string; - value: string; - type?: string; -} diff --git a/cortex-js/src/domain/models/message.interface.ts b/cortex-js/src/domain/models/message.interface.ts deleted file mode 100644 index 6cd2176a2..000000000 --- a/cortex-js/src/domain/models/message.interface.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { Cortex } from '@cortexso/cortex.js'; - -export interface Message extends Cortex.Beta.Threads.Message {} - -export type MessageContent = Cortex.Beta.Threads.MessageContent; - -export type TextContentBlock = Cortex.Beta.Threads.TextContentBlock; - -export interface MessageIncompleteDetails - extends Cortex.Beta.Threads.Message.IncompleteDetails {} - -export interface MessageAttachment - extends Cortex.Beta.Threads.Message.Attachment {} diff --git a/cortex-js/src/domain/models/model.event.ts b/cortex-js/src/domain/models/model.event.ts deleted file mode 100644 index a28a9afa4..000000000 --- a/cortex-js/src/domain/models/model.event.ts +++ /dev/null @@ -1,38 +0,0 @@ -export type ModelId = string; - -const ModelLoadingEvents = [ - 'starting', - 'stopping', - 'started', - 'stopped', - 'starting-failed', - 'stopping-failed', - 'model-downloaded', - 'model-downloaded-failed', - 'model-deleted', -] as const; -export type ModelLoadingEvent = (typeof ModelLoadingEvents)[number]; - -const AllModelStates = ['starting', 'stopping', 'started'] as const; -export type ModelState = (typeof AllModelStates)[number]; - -export interface ModelStatus { - model: ModelId; - status: ModelState; - metadata: Record; -} - -export interface ModelEvent { - model: ModelId; - event: ModelLoadingEvent; - metadata: Record; -} - -export const EmptyModelEvent = {}; - -export interface ModelStatusAndEvent { - data: { - status: Record; - event: ModelEvent | typeof EmptyModelEvent; - }; -} diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts deleted file mode 100644 index 51861b768..000000000 --- a/cortex-js/src/domain/models/model.interface.ts +++ /dev/null @@ -1,186 +0,0 @@ -import { Cortex } from '@cortexso/cortex.js'; - -export interface Model - extends Cortex.Model, - ModelSettingParams, - ModelRuntimeParams { - /** - * Model identifier. - */ - model: string; - - /** - * GGUF metadata: general.name - */ - name?: string; - - /** - * GGUF metadata: version - */ - version?: string; - - /** - * The model download source. It can be an external url or a local filepath. - */ - files: string[] | ModelArtifact; - - metadata?: Record; -} - -/** - * The available model settings. - */ -export interface ModelSettingParams { - /** - * The context length for model operations varies; the maximum depends on the specific model used. - */ - ctx_len?: number; - - /** - * The number of layers to load onto the GPU for acceleration. - */ - ngl?: number; - - /** - * Support embedding or not (legacy) - */ - embedding?: boolean; - - /** - * Number of parallel sequences to decode - */ - n_parallel?: number; - - /** - * Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system) - */ - cpu_threads?: number; - - /** - * GGUF metadata: tokenizer.chat_template - */ - prompt_template?: string; - system_prompt?: string; - ai_prompt?: string; - user_prompt?: string; - llama_model_path?: string; - mmproj?: string; - cont_batching?: boolean; - - /** - * The model engine. - */ - engine?: string; - - /** - * The prompt to use for internal configuration - */ - pre_prompt?: string; - - /** - * The batch size for prompt eval step - */ - n_batch?: number; - - /** - * To enable prompt caching or not - */ - caching_enabled?: boolean; - - /** - * Group attention factor in self-extend - */ - grp_attn_n?: number; - - /** - * Group attention width in self-extend - */ - grp_attn_w?: number; - - /** - * Prevent system swapping of the model to disk in macOS - */ - mlock?: boolean; - - /** - * You can constrain the sampling using GBNF grammars by providing path to a grammar file - */ - grammar_file?: string; - - /** - * To enable Flash Attention, default is true - */ - flash_attn?: boolean; - - /** - * KV cache type: f16, q8_0, q4_0, default is f16 - */ - cache_type?: string; - - /** - * To enable mmap, default is true - */ - use_mmap?: boolean; - - /** - * Model type we want to use: llm or embedding, default value is llm (latest llama.cpp update) - */ - model_type?: string; - - /** - * The model path. - */ - model_path?: string; -} - -/** - * The available model runtime parameters. - */ -export interface ModelRuntimeParams { - /** - * Controls the randomness of the model’s output. - */ - temperature?: number; - token_limit?: number; - top_k?: number; - - /** - * Set probability threshold for more relevant outputs. - */ - top_p?: number; - - /** - * Enable real-time data processing for faster predictions. - */ - stream?: boolean; - - /* - * The maximum number of tokens the model will generate in a single response. - */ - max_tokens?: number; - - /** - * Defines specific tokens or phrases at which the model will stop generating further output. - */ - stop?: string[]; - - /** - * Adjusts the likelihood of the model repeating words or phrases in its output. - */ - frequency_penalty?: number; - - /** - * Influences the generation of new and varied concepts in the model’s output. - */ - presence_penalty?: number; -} - -/** - * The model artifact object. - * In-case the model files is not a raw file list - */ -export interface ModelArtifact { - mmproj?: string; - llama_model_path?: string; - model_path?: string; -} diff --git a/cortex-js/src/domain/models/prompt-template.interface.ts b/cortex-js/src/domain/models/prompt-template.interface.ts deleted file mode 100644 index a066c5ff6..000000000 --- a/cortex-js/src/domain/models/prompt-template.interface.ts +++ /dev/null @@ -1,6 +0,0 @@ -export type PromptTemplate = { - system_prompt?: string; - ai_prompt?: string; - user_prompt?: string; - error?: string; -}; diff --git a/cortex-js/src/domain/models/resource.interface.ts b/cortex-js/src/domain/models/resource.interface.ts deleted file mode 100644 index a048c38fd..000000000 --- a/cortex-js/src/domain/models/resource.interface.ts +++ /dev/null @@ -1,21 +0,0 @@ -export interface ResourceEvent { - data: ResourceStatus; -} - -export interface ResourceStatus { - mem: UsedMemInfo; - cpu: { - usage: number; - }; - gpus: GpuInfo[]; -} - -export interface UsedMemInfo { - total: number; - used: number; -} - -export interface GpuInfo { - name: string | undefined; - vram: UsedMemInfo; -} diff --git a/cortex-js/src/domain/models/thread.interface.ts b/cortex-js/src/domain/models/thread.interface.ts deleted file mode 100644 index 7e98a06b0..000000000 --- a/cortex-js/src/domain/models/thread.interface.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { Assistant } from './assistant.interface'; -import { Cortex } from '@cortexso/cortex.js'; - -export interface ThreadToolResources - extends Cortex.Beta.Threads.Thread.ToolResources {} - -export interface Thread extends Cortex.Beta.Threads.Thread { - title: string; - - assistants: Assistant[]; - - tool_resources: ThreadToolResources | null; -} diff --git a/cortex-js/src/domain/repositories/extension.interface.ts b/cortex-js/src/domain/repositories/extension.interface.ts deleted file mode 100644 index 5c453af86..000000000 --- a/cortex-js/src/domain/repositories/extension.interface.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { Extension } from '@/domain/abstracts/extension.abstract'; -import { Repository } from './repository.interface'; - -export abstract class ExtensionRepository extends Repository {} diff --git a/cortex-js/src/domain/repositories/model.interface.ts b/cortex-js/src/domain/repositories/model.interface.ts deleted file mode 100644 index 193af9a52..000000000 --- a/cortex-js/src/domain/repositories/model.interface.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { Model } from '@/domain/models/model.interface'; -import { Repository } from './repository.interface'; - -export abstract class ModelRepository extends Repository { - abstract loadModelByFile( - modelId: string, - filePath: string, - modelFile: string, - ): Promise; -} diff --git a/cortex-js/src/domain/repositories/repository.interface.ts b/cortex-js/src/domain/repositories/repository.interface.ts deleted file mode 100644 index 33eb18581..000000000 --- a/cortex-js/src/domain/repositories/repository.interface.ts +++ /dev/null @@ -1,11 +0,0 @@ -export abstract class Repository { - abstract create(object: Partial): Promise; - - abstract findAll(): Promise; - - abstract findOne(id: string): Promise; - - abstract update(id: string, object: Partial): Promise; - - abstract remove(id: string): Promise; -} diff --git a/cortex-js/src/domain/repositories/telemetry.interface.ts b/cortex-js/src/domain/repositories/telemetry.interface.ts deleted file mode 100644 index b5c82c89e..000000000 --- a/cortex-js/src/domain/repositories/telemetry.interface.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { ModelStat } from '@/infrastructure/commanders/types/model-stat.interface'; -import { - BenchmarkHardware, - CrashReportAttributes, - EventAttributes, - Telemetry, - TelemetryAnonymized, - TelemetrySource, -} from '../telemetry/telemetry.interface'; - -export abstract class TelemetryRepository { - abstract readCrashReports( - callback: (Telemetry: Telemetry) => void, - ): Promise; - - abstract createCrashReport( - crashReport: CrashReportAttributes, - source?: TelemetrySource, - ): Promise; - - abstract getLastCrashReport(): Promise; - - abstract markLastCrashReportAsSent(): Promise; - - abstract sendTelemetryToOTelCollector( - endpoint: string, - telemetry: Telemetry, - ): Promise; - - abstract sendTelemetryToServer( - telemetryEvent: Telemetry['event'], - ): Promise; - - abstract sendEvent( - events: EventAttributes[], - source: TelemetrySource, - ): Promise; - - abstract getAnonymizedData(): Promise; - - abstract updateAnonymousData(data: TelemetryAnonymized): Promise; - - abstract sendBenchmarkToServer(data: { - hardware: BenchmarkHardware; - results: any; - metrics: any; - model: any; - sessionId: string; - }): Promise; -} diff --git a/cortex-js/src/domain/telemetry/telemetry.interface.ts b/cortex-js/src/domain/telemetry/telemetry.interface.ts deleted file mode 100644 index 3c400b1e6..000000000 --- a/cortex-js/src/domain/telemetry/telemetry.interface.ts +++ /dev/null @@ -1,115 +0,0 @@ -// TODO. move openTelemetry interfaces to dto -export interface TelemetryResource { - osName: string; - osVersion: string; - architecture: string; - appVersion: string; - 'service.name'?: string; - source?: TelemetrySource; - cpu?: string; - gpus?: string; - sessionId?: string; -} - -export interface Resource { - attributes: Attribute[]; -} - -export enum TelemetrySource { - CLI = 'cli', - CORTEX_SERVER = 'cortex-server', - CORTEX_CPP = 'cortex-cpp', -} - -export interface TelemetryEvent { - resource: Resource; - scopeLogs: ScopeLog[]; -} - -interface StringValue { - stringValue: string; -} - -interface ObjectValue { - kvlist_value: { - values: { - key: string; - value: StringValue; - }[]; - }; -} - -export interface Attribute { - key: string; - value: StringValue | ObjectValue; -} - -interface ScopeLog { - scope: object; - logRecords: TelemetryLog[]; -} - -export interface TelemetryLog { - traceId: string; - timeUnixNano?: string; - endTimeUnixNano?: string; - severityText: string; - body: { - stringValue: string; - }; - attributes: Attribute[]; -} - -export interface CrashReportPayload { - modelId?: string; - endpoint?: string; - command?: string; -} - -export interface CrashReportAttributes { - stack?: string; - message: string; - payload: CrashReportPayload; - sessionId?: string; -} - -export interface TelemetryEventMetadata { - createdAt: string; - sentAt: string | null; -} - -export interface Telemetry { - metadata: TelemetryEventMetadata; - event: { - resourceLogs: TelemetryEvent[]; - }; -} - -export enum EventName { - INIT = 'init', - DOWNLOAD_MODEL = 'download-model', - CHAT = 'chat', - ACTIVATE = 'activate', - NEW_ACTIVATE = 'new_activate', -} - -export interface EventAttributes { - name: string; - modelId?: string; - sessionId?: string; -} - -export interface TelemetryAnonymized { - sessionId: string | null; - lastActiveAt?: string | null; -} - -export interface BenchmarkHardware { - gpu: any[]; - cpu: any; - board: any; - disk: any[]; - chassis: any; - memLayout: any[]; - os: any; -} diff --git a/cortex-js/src/extensions/anthropic.engine.ts b/cortex-js/src/extensions/anthropic.engine.ts deleted file mode 100644 index 6875d72b5..000000000 --- a/cortex-js/src/extensions/anthropic.engine.ts +++ /dev/null @@ -1,113 +0,0 @@ -import stream from 'stream'; -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { pick } from 'lodash'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class AnthropicEngineExtension extends OAIEngineExtension { - apiUrl = 'https://api.anthropic.com/v1/messages'; - name = 'anthropic'; - productName = 'Anthropic Inference Engine'; - description = 'This extension enables Anthropic chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - - override async inference( - dto: any, - headers: Record, - ): Promise { - headers['x-api-key'] = this.apiKey as string; - headers['Content-Type'] = 'application/json'; - headers['anthropic-version'] = '2023-06-01'; - return super.inference(dto, headers); - } - - transformPayload = (data: any): any => { - const system = data.messages.find((m: any) => m.role === 'system'); - const messages = data.messages.filter((m: any) => m.role !== 'system'); - return { - system: system?.content ?? '', - messages, - ...pick(data, ['model', 'stream', 'max_tokens']), - }; - }; - - transformResponse = (data: any) => { - // handling stream response - if (typeof data === 'string' && data.trim().length === 0) { - return ''; - } - if (typeof data === 'string' && data.startsWith('event: ')) { - return ''; - } - if (typeof data === 'string' && data.startsWith('data: ')) { - data = data.replace('data: ', ''); - const parsedData = JSON.parse(data); - if (parsedData.type !== 'content_block_delta') { - return ''; - } - const text = parsedData.delta?.text; - //convert to have this format data.choices[0]?.delta?.content - return JSON.stringify({ - choices: [ - { - delta: { - content: text, - }, - }, - ], - }); - } - // non-stream response - if (data.content && data.content.length > 0 && data.content[0].text) { - return { - choices: [ - { - message: { - content: data.content[0].text, - }, - }, - ], - }; - } - - console.error('Invalid response format:', data); - return ''; - }; -} diff --git a/cortex-js/src/extensions/cohere.engine.ts b/cortex-js/src/extensions/cohere.engine.ts deleted file mode 100644 index 9c719c05c..000000000 --- a/cortex-js/src/extensions/cohere.engine.ts +++ /dev/null @@ -1,122 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; -import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; - -enum RoleType { - user = 'USER', - chatbot = 'CHATBOT', - system = 'SYSTEM', -} - -type CoherePayloadType = { - chat_history?: Array<{ role: RoleType; message: string }>; - message?: string; - preamble?: string; -}; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class CoHereEngineExtension extends OAIEngineExtension { - apiUrl = 'https://api.cohere.ai/v1/chat'; - name = 'cohere'; - productName = 'Cohere Inference Engine'; - description = 'This extension enables Cohere chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - - transformPayload = (payload: any): CoherePayloadType => { - console.log('payload', payload); - if (payload.messages.length === 0) { - return {}; - } - - const { messages, ...params } = payload; - const convertedData: CoherePayloadType = { - ...params, - chat_history: [], - message: '', - }; - (messages as ChatCompletionMessage[]).forEach( - (item: ChatCompletionMessage, index: number) => { - // Assign the message of the last item to the `message` property - if (index === messages.length - 1) { - convertedData.message = item.content as string; - return; - } - if (item.role === 'user') { - convertedData.chat_history!.push({ - role: 'USER' as RoleType, - message: item.content as string, - }); - } else if (item.role === 'assistant') { - convertedData.chat_history!.push({ - role: 'CHATBOT' as RoleType, - message: item.content as string, - }); - } else if (item.role === 'system') { - convertedData.preamble = item.content as string; - } - }, - ); - return convertedData; - }; - - transformResponse = (data: any, stream: boolean) => { - const text = - typeof data === 'object' ? data.text : (JSON.parse(data).text ?? ''); - return stream - ? JSON.stringify({ - choices: [ - { - delta: { - content: text, - }, - }, - ], - }) - : { - choices: [ - { - message: { - content: text, - }, - }, - ], - }; - }; -} diff --git a/cortex-js/src/extensions/extensions.module.ts b/cortex-js/src/extensions/extensions.module.ts deleted file mode 100644 index 690a739f5..000000000 --- a/cortex-js/src/extensions/extensions.module.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { Module } from '@nestjs/common'; -import GroqEngineExtension from './groq.engine'; -import MistralEngineExtension from './mistral.engine'; -import OpenAIEngineExtension from './openai.engine'; -import { HttpModule, HttpService } from '@nestjs/axios'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { ConfigsModule } from '@/usecases/configs/configs.module'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import AnthropicEngineExtension from './anthropic.engine'; -import OpenRouterEngineExtension from './openrouter.engine'; -import CoHereEngineExtension from './cohere.engine'; -import MartianEngineExtension from './martian.engine'; -import NvidiaEngineExtension from './nvidia.engine'; - -const provider = { - provide: 'EXTENSIONS_PROVIDER', - inject: [HttpService, ConfigsUsecases, EventEmitter2], - useFactory: ( - httpService: HttpService, - configUsecases: ConfigsUsecases, - eventEmitter: EventEmitter2, - ) => [ - new OpenAIEngineExtension(httpService, configUsecases, eventEmitter), - new GroqEngineExtension(httpService, configUsecases, eventEmitter), - new MistralEngineExtension(httpService, configUsecases, eventEmitter), - new AnthropicEngineExtension(httpService, configUsecases, eventEmitter), - new OpenRouterEngineExtension(httpService, configUsecases, eventEmitter), - new CoHereEngineExtension(httpService, configUsecases, eventEmitter), - new MartianEngineExtension(httpService, configUsecases, eventEmitter), - new NvidiaEngineExtension(httpService, configUsecases, eventEmitter), - ], -}; - -@Module({ - imports: [HttpModule, ConfigsModule], - controllers: [], - providers: [provider], - exports: [provider], -}) -export class ExtensionsModule {} diff --git a/cortex-js/src/extensions/groq.engine.ts b/cortex-js/src/extensions/groq.engine.ts deleted file mode 100644 index c9835f8f8..000000000 --- a/cortex-js/src/extensions/groq.engine.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class GroqEngineExtension extends OAIEngineExtension { - apiUrl = 'https://api.groq.com/openai/v1/chat/completions'; - name = 'groq'; - productName = 'Groq Inference Engine'; - description = 'This extension enables fast Groq chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } -} diff --git a/cortex-js/src/extensions/martian.engine.ts b/cortex-js/src/extensions/martian.engine.ts deleted file mode 100644 index 18ac19d8f..000000000 --- a/cortex-js/src/extensions/martian.engine.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class MartianEngineExtension extends OAIEngineExtension { - apiUrl = 'https://withmartian.com/api/openai/v1/chat/completions'; - name = 'martian'; - productName = 'Martian Inference Engine'; - description = 'This extension enables Martian chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } -} diff --git a/cortex-js/src/extensions/mistral.engine.ts b/cortex-js/src/extensions/mistral.engine.ts deleted file mode 100644 index 10d1725f0..000000000 --- a/cortex-js/src/extensions/mistral.engine.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class MistralEngineExtension extends OAIEngineExtension { - apiUrl = 'https://api.mistral.ai/v1/chat/completions'; - name = 'mistral'; - productName = 'Mistral Inference Engine'; - description = 'This extension enables Mistral chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } -} diff --git a/cortex-js/src/extensions/nvidia.engine.ts b/cortex-js/src/extensions/nvidia.engine.ts deleted file mode 100644 index caec1a09c..000000000 --- a/cortex-js/src/extensions/nvidia.engine.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class NvidiaEngineExtension extends OAIEngineExtension { - apiUrl = 'https://integrate.api.nvidia.com/v1/chat/completions'; - name = 'nvidia'; - productName = 'Nvidia Inference Engine'; - description = 'This extension enables Nvidia chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } -} diff --git a/cortex-js/src/extensions/openai.engine.ts b/cortex-js/src/extensions/openai.engine.ts deleted file mode 100644 index e1ab01455..000000000 --- a/cortex-js/src/extensions/openai.engine.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class OpenAIEngineExtension extends OAIEngineExtension { - apiUrl = 'https://api.openai.com/v1/chat/completions'; - name = 'openai'; - productName = 'OpenAI Inference Engine'; - description = 'This extension enables OpenAI chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } -} diff --git a/cortex-js/src/extensions/openrouter.engine.ts b/cortex-js/src/extensions/openrouter.engine.ts deleted file mode 100644 index 95bd1b8b6..000000000 --- a/cortex-js/src/extensions/openrouter.engine.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { HttpService } from '@nestjs/axios'; -import { OAIEngineExtension } from '../domain/abstracts/oai.abstract'; -import { ConfigsUsecases } from '@/usecases/configs/configs.usecase'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -/** - * A class that implements the InferenceExtension interface from the @janhq/core package. - * The class provides methods for initializing and stopping a model, and for making inference requests. - * It also subscribes to events emitted by the @janhq/core package and handles new message requests. - */ -export default class OpenRouterEngineExtension extends OAIEngineExtension { - apiUrl = 'https://openrouter.ai/api/v1/chat/completions'; - name = 'openrouter'; - productName = 'OpenRouter Inference Engine'; - description = 'This extension enables OpenRouter chat completion API calls'; - version = '0.0.1'; - apiKey?: string; - - constructor( - protected readonly httpService: HttpService, - protected readonly configsUsecases: ConfigsUsecases, - protected readonly eventEmmitter: EventEmitter2, - ) { - super(httpService); - - eventEmmitter.on('config.updated', async (data) => { - if (data.engine === this.name) { - this.apiKey = data.value; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - }); - } - - async onLoad() { - const configs = (await this.configsUsecases.getGroupConfigs( - this.name, - )) as unknown as { apiKey: string }; - this.apiKey = configs?.apiKey; - this.status = - (this.apiKey?.length ?? 0) > 0 - ? EngineStatus.READY - : EngineStatus.MISSING_CONFIGURATION; - } - - transformPayload = (data: any): any => { - return { - ...data, - model: 'openrouter/auto', - }; - }; -} diff --git a/cortex-js/src/index.ts b/cortex-js/src/index.ts deleted file mode 100644 index 092876409..000000000 --- a/cortex-js/src/index.ts +++ /dev/null @@ -1,103 +0,0 @@ -import { - CORTEX_JS_SYSTEM_URL, - defaultCortexCppPort, - defaultCortexJsHost, - defaultCortexJsPort, -} from '@/infrastructure/constants/cortex'; -import { getApp } from './app'; -import { fileManagerService } from './infrastructure/services/file-manager/file-manager.service'; -import { CortexUsecases } from './usecases/cortex/cortex.usecases'; - -let host: string; -let port: number; -let enginePort: number; - -/** - * Start the API server - */ -export async function start( - name?: string, - address?: string, - portNumber?: number, - enginePortNumber?: number, - dataFolder?: string, -) { - if (name) { - fileManagerService.setConfigProfile(name); - const isProfileConfigExists = fileManagerService.profileConfigExists(name); - if (!isProfileConfigExists) { - await fileManagerService.writeConfigFile({ - ...fileManagerService.defaultConfig(), - apiServerHost: address || defaultCortexJsHost, - apiServerPort: port || defaultCortexJsPort, - cortexCppPort: Number(enginePort) || defaultCortexCppPort, - }); - } - } - const { - apiServerHost: configApiServerHost, - apiServerPort: configApiServerPort, - cortexCppPort: configCortexCppPort, - } = await fileManagerService.getConfig(); - - host = address || configApiServerHost || defaultCortexJsHost; - port = portNumber || configApiServerPort || defaultCortexJsPort; - if (host === 'localhost') { - host = '127.0.0.1'; - } - enginePort = - Number(enginePortNumber) || configCortexCppPort || defaultCortexCppPort; - const dataFolderPath = dataFolder; - - return startServer(dataFolderPath); -} - -async function startServer(dataFolderPath?: string) { - const config = await fileManagerService.getConfig(); - try { - if (dataFolderPath) { - await fileManagerService.writeConfigFile({ - ...config, - dataFolderPath, - }); - // load config again to create the data folder - await fileManagerService.getConfig(dataFolderPath); - } - const app = await getApp(host, port); - const cortexUsecases = await app.resolve(CortexUsecases); - await cortexUsecases.startCortex().catch((e) => { - throw e; - }); - const isServerOnline = await cortexUsecases.isAPIServerOnline(); - if (isServerOnline) { - console.log( - `Server is already running at http://${host}:${port}. Please use 'cortex stop' to stop the server.`, - ); - } - await app.listen(port, host); - await fileManagerService.writeConfigFile({ - ...config, - apiServerHost: host, - apiServerPort: port, - dataFolderPath: dataFolderPath || config.dataFolderPath, - cortexCppPort: enginePort, - }); - return app; - } catch (e) { - console.error(e); - // revert the data folder path if it was set - await fileManagerService.writeConfigFile({ - ...config, - }); - console.error(`Failed to start server. Is port ${port} in use?`); - } -} -/** - * Stop the API server - * @returns - */ -export async function stop(host?: string, port?: number) { - return fetch(CORTEX_JS_SYSTEM_URL(host, port), { - method: 'DELETE', - }); -} diff --git a/cortex-js/src/infrastructure/commanders/base.command.ts b/cortex-js/src/infrastructure/commanders/base.command.ts deleted file mode 100644 index e6b23463f..000000000 --- a/cortex-js/src/infrastructure/commanders/base.command.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { CommandRunner } from 'nest-commander'; -import { Injectable } from '@nestjs/common'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import ora from 'ora'; -import { CortexClient } from './services/cortex.client'; - -@Injectable() -export abstract class BaseCommand extends CommandRunner { - cortex: CortexClient; - constructor(readonly cortexUseCases: CortexUsecases) { - super(); - } - protected abstract runCommand( - passedParam: string[], - options?: Record, - ): Promise; - - async run( - passedParam: string[], - options?: Record, - ): Promise { - const checkingSpinner = ora('Checking API server online...').start(); - const result = await this.cortexUseCases.isAPIServerOnline(); - if (!result) { - checkingSpinner.fail( - 'API server is offline. Please run "cortex" before running this command', - ); - process.exit(1); - } - checkingSpinner.succeed('API server is online'); - if (!this.cortex) { - this.cortex = new CortexClient(); - } - await this.runCommand(passedParam, options); - } - protected setCortex(cortex: CortexClient) { - this.cortex = cortex; - } -} diff --git a/cortex-js/src/infrastructure/commanders/benchmark.command.ts b/cortex-js/src/infrastructure/commanders/benchmark.command.ts deleted file mode 100644 index 2820ded7e..000000000 --- a/cortex-js/src/infrastructure/commanders/benchmark.command.ts +++ /dev/null @@ -1,338 +0,0 @@ -import { SubCommand, Option } from 'nest-commander'; -import { - BenchmarkConfig, - ParametersConfig, -} from './types/benchmark-config.interface'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from './base.command'; -import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; -import { join } from 'path'; -import yaml from 'js-yaml'; -import si from 'systeminformation'; -import { Presets, SingleBar } from 'cli-progress'; -import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; -import { BenchmarkHardware } from '@/domain/telemetry/telemetry.interface'; -import { defaultBenchmarkConfiguration } from '../constants/benchmark'; -import { inspect } from 'util'; -import { Cortex } from '@cortexso/cortex.js'; -import { fileManagerService } from '../services/file-manager/file-manager.service'; - -@SubCommand({ - name: 'benchmark', - arguments: '[model_id]', - argsDescription: { - model_id: 'Model to benchmark with', - }, - description: - 'Benchmark and analyze the performance of a specific AI model using a variety of system resources', -}) -export class BenchmarkCommand extends BaseCommand { - constructor( - readonly cortexUsecases: CortexUsecases, - private readonly telemetryUsecases: TelemetryUsecases, - ) { - super(cortexUsecases); - } - - async runCommand( - passedParams: string[], - options?: Partial, - ): Promise { - return this.benchmark( - options ?? {}, - passedParams[0] - ? ({ model: passedParams[0] } as ParametersConfig) - : undefined, - ); - } - - @Option({ - flags: '-n, --num_rounds ', - description: 'Number of rounds to run the benchmark', - }) - parseRounds(value: number) { - return value; - } - - @Option({ - flags: '-c, --concurrency ', - description: 'Number of concurrent requests to run the benchmark', - }) - parseConcurrency(value: number) { - return value; - } - - @Option({ - flags: '-o, --output ', - description: 'Output format for the benchmark results. json or table', - }) - parseOutput(value: string) { - return value; - } - - config: BenchmarkConfig; - /** - * Benchmark and analyze the performance of a specific AI model using a variety of system resources - */ - async benchmark( - options: Partial, - params?: ParametersConfig, - ) { - return this.getBenchmarkConfig() - .then(async (config) => { - this.config = { - ...config, - ...options, - }; - - const modelId = params?.model ?? this.config.api.parameters.model; - - // TODO: Do we really need to submit these data? or just model information is enough? - const model = await this.cortex.models.retrieve(modelId); - - if (model) { - await this.cortex.models.start(modelId); - await this.runBenchmarks(model); - } - - process.exit(0); - }) - .catch((error) => console.log(error.message ?? error)); - } - - /** - * Get the benchmark configuration - * @returns the benchmark configuration - */ - private async getBenchmarkConfig() { - const benchmarkFolder = await fileManagerService.getBenchmarkPath(); - const configurationPath = join(benchmarkFolder, 'config.yaml'); - if (existsSync(configurationPath)) { - return yaml.load( - readFileSync(configurationPath, 'utf8'), - ) as BenchmarkConfig; - } else { - const config = yaml.dump(defaultBenchmarkConfiguration); - if (!existsSync(benchmarkFolder)) { - mkdirSync(benchmarkFolder, { - recursive: true, - }); - } - await writeFileSync(configurationPath, config, 'utf8'); - return defaultBenchmarkConfiguration; - } - } - - /** - * Get the system resources for benchmarking - * using the systeminformation library - * @returns the system resources - */ - private async getSystemResources(): Promise< - BenchmarkHardware & { - cpuLoad: any; - mem: any; - } - > { - return { - cpuLoad: await si.currentLoad(), - mem: await si.mem(), - gpu: (await si.graphics()).controllers, - cpu: await si.cpu(), - board: await si.baseboard(), - disk: await si.diskLayout(), - chassis: await si.chassis(), - memLayout: await si.memLayout(), - os: await si.osInfo(), - }; - } - - /** - * Get the resource change between two data points - * @param startData the start data point - * @param endData the end data point - * @returns the resource change - */ - private async getResourceChange(startData: any, endData: any) { - return { - cpuLoad: - startData.cpuLoad && endData.cpuLoad - ? ((endData.cpuLoad.currentLoad - startData.cpuLoad.currentLoad) / - startData.cpuLoad.currentLoad) * - 100 - : null, - mem: - startData.mem && endData.mem - ? ((endData.mem.used - startData.mem.used) / startData.mem.total) * - 100 - : null, - }; - } - - /** - * Benchmark a user using the OpenAI API - * @returns - */ - private async benchmarkUser(model: Cortex.Models.Model) { - const startResources = await this.getSystemResources(); - const start = Date.now(); - let tokenCount = 0; - let firstTokenTime = null; - - try { - const stream = await this.cortex.chat.completions.create({ - model: model.id, - messages: this.config.api.parameters.messages, - max_tokens: this.config.api.parameters.max_tokens, - stream: true, - }); - - for await (const chunk of stream) { - if (!firstTokenTime && chunk.choices[0]?.delta?.content) { - firstTokenTime = Date.now(); - } - tokenCount += (chunk.choices[0]?.delta?.content || '').split( - /\s+/, - ).length; - } - } catch (error) { - console.error('Error during API call:', error); - return null; - } - - const latency = Date.now() - start; - const ttft = firstTokenTime ? firstTokenTime - start : null; - const endResources = await this.getSystemResources(); - const resourceChange = await this.getResourceChange( - startResources, - endResources, - ); - - return { - tokens: this.config.api.parameters.max_tokens, - token_length: tokenCount, // Dynamically calculated token count - latency, - resourceChange, - tpot: tokenCount ? latency / tokenCount : 0, - throughput: tokenCount / (latency / 1000), - ttft, - }; - } - - /** - * Calculate the percentiles of the data - * @param data the data to calculate percentiles for - * @param percentile the percentile to calculate - * @returns the percentile value - */ - private calculatePercentiles(data: number[], percentile: number) { - if (data.length === 0) return null; - const sorted = data - .filter((x: number) => x !== null) - .sort((a: number, b: number) => a - b); - const pos = (percentile / 100) * sorted.length; - if (pos < 1) return sorted[0]; - if (pos >= sorted.length) return sorted[sorted.length - 1]; - const lower = sorted[Math.floor(pos) - 1]; - const upper = sorted[Math.ceil(pos) - 1]; - return lower + (upper - lower) * (pos - Math.floor(pos)); - } - - /** - * Run the benchmarks - */ - private async runBenchmarks(model: Cortex.Models.Model) { - const allResults: any[] = []; - const rounds = this.config.num_rounds || 1; - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(rounds, 0); - - for (let i = 0; i < rounds; i++) { - const roundResults = []; - const hardwareBefore = await this.getSystemResources(); - - for (let j = 0; j < this.config.concurrency; j++) { - const result = await this.benchmarkUser(model); - if (result) { - roundResults.push(result); - } - } - - const hardwareAfter = await this.getSystemResources(); - const hardwareChanges = await this.getResourceChange( - hardwareBefore, - hardwareAfter, - ); - - allResults.push({ - round: i + 1, - results: roundResults, - hardwareChanges, - }); - - bar.update(i + 1); - } - - const metrics: any = { - p50: {}, - p75: {}, - p95: {}, - }; - const keys = ['latency', 'tpot', 'throughput', 'ttft']; - keys.forEach((key) => { - const data = allResults.flatMap((r) => - r.results.map((res: object) => res[key as keyof typeof res]), - ); - metrics.p50[key] = this.calculatePercentiles(data, 50); - metrics.p75[key] = this.calculatePercentiles(data, 75); - metrics.p95[key] = this.calculatePercentiles(data, 95); - }); - - const output = { - hardware: await this.getSystemResources(), - results: allResults, - metrics, - model, - }; - bar.stop(); - - const outputFilePath = join( - await fileManagerService.getBenchmarkPath(), - 'output.json', - ); - await this.telemetryUsecases.sendBenchmarkEvent({ - hardware: { - cpu: output.hardware.cpu, - gpu: output.hardware.gpu, - memLayout: output.hardware.memLayout, - board: output.hardware.board, - disk: output.hardware.disk, - chassis: output.hardware.chassis, - os: output.hardware.os, - }, - results: output.results, - metrics: output.metrics, - model, - }); - writeFileSync(outputFilePath, JSON.stringify(output, null, 2)); - console.log(`Benchmark results and metrics saved to ${outputFilePath}`); - - if (this.config.output === 'table') { - console.log('Results:'); - output.results.forEach((round) => { - console.log('Round ' + round.round + ':'); - console.table(round.results); - }); - console.log('Metrics:'); - console.table(output.metrics); - } else - console.log( - inspect(output, { - showHidden: false, - depth: null, - colors: true, - }), - ); - } -} diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts deleted file mode 100644 index 35b2ef5ba..000000000 --- a/cortex-js/src/infrastructure/commanders/chat.command.ts +++ /dev/null @@ -1,178 +0,0 @@ -import { existsSync } from 'fs'; -import { SubCommand, Option, InquirerService } from 'nest-commander'; -import { exit } from 'node:process'; -import { SetCommandContext } from './decorators/CommandContext'; -import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; -import { - EventName, - TelemetrySource, -} from '@/domain/telemetry/telemetry.interface'; -import { ContextService } from '../services/context/context.service'; -import { BaseCommand } from './base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { Engines } from './types/engine.interface'; -import { join } from 'path'; -import { fileManagerService } from '../services/file-manager/file-manager.service'; -import { isRemoteEngine } from '@/utils/normalize-model-id'; -import { Cortex } from '@cortexso/cortex.js'; -import { ChatClient } from './services/chat-client'; -import { downloadProgress } from '@/utils/download-progress'; -import { DownloadType } from '@/domain/models/download.interface'; -import { checkRequiredVersion } from '@/utils/model-check'; - -type ChatOptions = { - threadId?: string; - message?: string; - attach: boolean; - preset?: string; -}; - -@SubCommand({ - name: 'chat', - description: 'Send a chat request to a model', - arguments: '[model_id] [message]', - argsDescription: { - model_id: - 'Model ID to chat with. If there is no model_id provided, it will prompt to select from running models.', - message: 'Message to send to the model', - }, -}) -@SetCommandContext() -export class ChatCommand extends BaseCommand { - chatClient: ChatClient; - - constructor( - private readonly inquirerService: InquirerService, - private readonly telemetryUsecases: TelemetryUsecases, - protected readonly cortexUsecases: CortexUsecases, - protected readonly contextService: ContextService, - ) { - super(cortexUsecases); - } - - async runCommand( - passedParams: string[], - options: ChatOptions, - ): Promise { - this.chatClient = new ChatClient(this.cortex); - let modelId = passedParams[0]; - // First attempt to get message from input or options - // Extract input from 1 to end of array - let message = options.message ?? passedParams.slice(1).join(' '); - - // Check for model existing - if (!modelId || !(await this.cortex.models.retrieve(modelId))) { - // Model ID is not provided - // first input might be message input - message = passedParams.length - ? passedParams.join(' ') - : (options.message ?? ''); - // If model ID is not provided, prompt user to select from running models - const { data: models } = await this.cortex.models.list(); - if (models.length === 1) { - modelId = models[0].id; - } else if (models.length > 0) { - modelId = await this.modelInquiry(models); - } else { - exit(1); - } - } - - const existingModel = await this.cortex.models.retrieve(modelId); - if (!existingModel) { - process.exit(1); - } - - const engine = existingModel.engine || Engines.llamaCPP; - // Pull engine if not exist - if ( - !isRemoteEngine(engine) && - !existsSync( - join(await fileManagerService.getCortexCppEnginePath(), engine), - ) - ) { - console.log('Downloading engine...'); - await this.cortex.engines.init(engine); - await downloadProgress(this.cortex, undefined, DownloadType.Engine); - } - - const { version: engineVersion } = - await this.cortex.engines.retrieve(engine); - if ( - existingModel.engine_version && - !checkRequiredVersion(existingModel.engine_version, engineVersion) - ) { - console.log( - `Model engine version ${existingModel.engine_version} is not compatible with engine version ${engineVersion}`, - ); - process.exit(1); - } - - if (!message) options.attach = true; - void this.telemetryUsecases.sendEvent( - [ - { - name: EventName.CHAT, - modelId, - }, - ], - TelemetrySource.CLI, - ); - - const preset = await fileManagerService.getPreset(options.preset); - - return this.chatClient.chat( - modelId, - options.threadId, - message, // Accept both message from inputs or arguments - preset ? preset : {}, - ); - } - - modelInquiry = async (models: Cortex.Model[]) => { - const { model } = await this.inquirerService.inquirer.prompt({ - type: 'list', - name: 'model', - message: 'Select a model to chat with:', - choices: models.map((e) => ({ - name: e.id, - value: e.id, - })), - }); - return model; - }; - - @Option({ - flags: '-t, --thread ', - description: 'Thread Id. If not provided, will create new thread', - }) - parseThreadId(value: string) { - return value; - } - - @Option({ - flags: '-m, --message ', - description: 'Message to send to the model', - }) - parseModelId(value: string) { - return value; - } - - @Option({ - flags: '-a, --attach', - description: 'Attach to interactive chat session', - defaultValue: false, - name: 'attach', - }) - parseAttach() { - return true; - } - - @Option({ - flags: '-p, --preset ', - description: 'Apply a chat preset to the chat session', - }) - parsePreset(value: string) { - return value; - } -} diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts deleted file mode 100644 index f03d9e242..000000000 --- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts +++ /dev/null @@ -1,240 +0,0 @@ -import pkg from '@/../package.json'; -import { RootCommand, CommandRunner, Option } from 'nest-commander'; -import { ChatCommand } from './chat.command'; -import { ModelsCommand } from './models.command'; -import { RunCommand } from './run.command'; -import { ModelPullCommand } from './models/model-pull.command'; -import { PSCommand } from './ps.command'; -import { PresetCommand } from './presets.command'; -import { TelemetryCommand } from './telemetry.command'; -import { SetCommandContext } from './decorators/CommandContext'; -import { EmbeddingCommand } from './embeddings.command'; -import { BenchmarkCommand } from './benchmark.command'; -import chalk from 'chalk'; -import { ContextService } from '../services/context/context.service'; -import { EnginesCommand } from './engines.command'; -import { - defaultCortexCppPort, - defaultCortexJsHost, - defaultCortexJsPort, -} from '../constants/cortex'; -import { getApp } from '@/app'; -import { fileManagerService } from '../services/file-manager/file-manager.service'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { ServeStopCommand } from './serve-stop.command'; -import ora from 'ora'; -import { EnginesSetCommand } from './engines/engines-set.command'; - -type ServeOptions = { - address?: string; - port?: number; - logs?: boolean; - dataFolder?: string; - version?: boolean; - name?: string; - configPath?: string; - enginePort?: string; -}; - -@RootCommand({ - subCommands: [ - ModelsCommand, - ChatCommand, - RunCommand, - ModelPullCommand, - PSCommand, - PresetCommand, - TelemetryCommand, - EmbeddingCommand, - BenchmarkCommand, - EnginesCommand, - ServeStopCommand, - EnginesSetCommand, - ], - description: 'Cortex CLI', -}) -@SetCommandContext() -export class CortexCommand extends CommandRunner { - host: string; - port: number; - configHost: string; - configPort: number; - enginePort: number; - constructor( - readonly contextService: ContextService, - readonly cortexUseCases: CortexUsecases, - ) { - super(); - } - - async run(passedParams: string[], options?: ServeOptions): Promise { - if (options?.configPath) { - fileManagerService.setConfigPath(options.configPath); - } - if (options?.name) { - fileManagerService.setConfigProfile(options.name); - } - if (options?.name) { - const isProfileConfigExists = fileManagerService.profileConfigExists( - options.name, - ); - if (!isProfileConfigExists) { - await fileManagerService.writeConfigFile({ - ...fileManagerService.defaultConfig(), - apiServerHost: options?.address || defaultCortexJsHost, - apiServerPort: options?.port || defaultCortexJsPort, - cortexCppPort: Number(options?.enginePort) || defaultCortexCppPort, - }); - } - } - const { - apiServerHost: configApiServerHost, - apiServerPort: configApiServerPort, - cortexCppPort: configCortexCppPort, - } = await fileManagerService.getConfig(); - - this.configHost = configApiServerHost || defaultCortexJsHost; - this.configPort = configApiServerPort || defaultCortexJsPort; - - this.host = options?.address || configApiServerHost || defaultCortexJsHost; - this.port = options?.port || configApiServerPort || defaultCortexJsPort; - if (this.host === 'localhost') { - this.host = '127.0.0.1'; - } - this.enginePort = - Number(options?.enginePort) || - configCortexCppPort || - defaultCortexCppPort; - const showLogs = options?.logs || false; - const showVersion = options?.version || false; - const dataFolderPath = options?.dataFolder; - if (showVersion) { - console.log('\n'); - console.log(`Cortex CLI - v${pkg.version}`); - console.log(chalk.blue(`Github: ${pkg.homepage}`)); - return; - } - return this.startServer(showLogs, dataFolderPath); - } - - private async startServer(attach: boolean, dataFolderPath?: string) { - const config = await fileManagerService.getConfig(); - try { - const startEngineSpinner = ora('Starting Cortex engine...'); - await this.cortexUseCases.startCortex().catch((e) => { - startEngineSpinner.fail('Failed to start Cortex engine'); - throw e; - }); - startEngineSpinner.succeed('Cortex started successfully'); - const isServerOnline = await this.cortexUseCases.isAPIServerOnline(); - if (isServerOnline) { - console.log( - `Server is already running at http://${this.configHost}:${this.configPort}. Please use 'cortex stop' to stop the server.`, - ); - process.exit(0); - } - if (dataFolderPath) { - await fileManagerService.writeConfigFile({ - ...config, - dataFolderPath, - }); - // load config again to create the data folder - await fileManagerService.getConfig(dataFolderPath); - } - if (attach) { - const app = await getApp(this.host, this.port); - await app.listen(this.port, this.host); - } else { - await this.cortexUseCases.startServerDetached(this.host, this.port); - } - console.log( - chalk.blue(`Started server at http://${this.host}:${this.port}`), - ); - console.log( - chalk.blue( - `API Playground available at http://${this.host}:${this.port}/api`, - ), - ); - await fileManagerService.writeConfigFile({ - ...config, - apiServerHost: this.host, - apiServerPort: this.port, - dataFolderPath: dataFolderPath || config.dataFolderPath, - cortexCppPort: this.enginePort, - }); - if (!attach) process.exit(0); - } catch (e) { - console.error(e); - // revert the data folder path if it was set - await fileManagerService.writeConfigFile({ - ...config, - }); - console.error(`Failed to start server. Is port ${this.port} in use?`); - process.exit(1); - } - } - - @Option({ - flags: '-a, --address
', - description: 'Address to use', - }) - parseHost(value: string) { - return value; - } - - @Option({ - flags: '-p, --port ', - description: 'Port to serve the application', - }) - parsePort(value: string) { - return parseInt(value, 10); - } - - @Option({ - flags: '-l, --logs', - description: 'Show logs', - }) - parseLogs() { - return true; - } - - @Option({ - flags: '-df, --dataFolder ', - description: 'Set the data folder directory', - }) - parseDataFolder(value: string) { - return value; - } - - @Option({ - flags: '-cp, --configPath ', - description: 'Set the config folder directory', - }) - parseConfigFolder(value: string) { - return value; - } - - @Option({ - flags: '-v, --version', - description: 'Show version', - }) - parseVersion() { - return true; - } - - @Option({ - flags: '-n, --name ', - description: 'Name of the process', - }) - parseName(value: string) { - return value; - } - - @Option({ - flags: '-ep, --engine-port ', - description: 'Port to serve the engine', - }) - parseEnginePort(value: string) { - return value; - } -} diff --git a/cortex-js/src/infrastructure/commanders/decorators/CommandContext.ts b/cortex-js/src/infrastructure/commanders/decorators/CommandContext.ts deleted file mode 100644 index 04033c7cd..000000000 --- a/cortex-js/src/infrastructure/commanders/decorators/CommandContext.ts +++ /dev/null @@ -1,34 +0,0 @@ -type Constructor = new (...args: any[]) => any; - -export const SetCommandContext = () => { - return (constructor: Constructor) => { - const classMethods = Object.getOwnPropertyNames(constructor.prototype); - - classMethods.forEach((methodName) => { - if (methodName !== 'run') return; - const originalMethod = constructor.prototype[methodName]; - if (typeof originalMethod === 'function') { - constructor.prototype[methodName] = async function (...args: any[]) { - if (this.contextService) { - const parentCommandName = this.command.parent - ? this.command.parent.name() - : ''; - const comamndName = this.command.name(); - this.contextService.set( - 'command', - `${parentCommandName} ${comamndName}`, - ); - if (parentCommandName === 'models') { - const modelId = args[0]?.[0]; - if (modelId) { - this.contextService.set('model', modelId); - } - } - } - const result = await originalMethod.apply(this, args); - return result; - }; - } - }); - }; -}; diff --git a/cortex-js/src/infrastructure/commanders/embeddings.command.ts b/cortex-js/src/infrastructure/commanders/embeddings.command.ts deleted file mode 100644 index bf2dc8df3..000000000 --- a/cortex-js/src/infrastructure/commanders/embeddings.command.ts +++ /dev/null @@ -1,107 +0,0 @@ -import { InquirerService, Option, SubCommand } from 'nest-commander'; -import { inspect } from 'util'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from './base.command'; -import { Cortex } from '@cortexso/cortex.js'; -import ora from 'ora'; - -interface EmbeddingCommandOptions { - encoding_format?: string; - input?: string; - dimensions?: number; -} - -@SubCommand({ - name: 'embeddings', - arguments: '[model_id]', - description: 'Creates an embedding vector representing the input text.', - argsDescription: { - model_id: - 'Model to use for embedding. If not provided, it will prompt to select from running models.', - }, -}) -export class EmbeddingCommand extends BaseCommand { - constructor( - private readonly inquirerService: InquirerService, - readonly cortexUsecases: CortexUsecases, - ) { - super(cortexUsecases); - } - async runCommand( - passedParams: string[], - options: EmbeddingCommandOptions, - ): Promise { - let model = passedParams[0]; - const checkingSpinner = ora('Checking model...').start(); - // First attempt to get message from input or options - let input: string | string[] = options.input ?? passedParams.splice(1); - - // Check for model existing - if (!model || !(await this.cortex.models.retrieve(model))) { - // Model ID is not provided - // first input might be message input - input = passedParams ?? options.input; - // If model ID is not provided, prompt user to select from running models - const { data: models } = await this.cortex.models.list(); - if (models.length === 1) { - model = models[0].id; - } else if (models.length > 0) { - model = await this.modelInquiry(models); - } else { - checkingSpinner.fail('Model ID is required'); - process.exit(1); - } - } - checkingSpinner.succeed(`Model found`); - return this.cortex.embeddings - .create({ - input, - model, - }) - .then((res) => - inspect(res, { showHidden: false, depth: null, colors: true }), - ) - .then(console.log) - .catch((e) => console.error(e.message ?? e)); - } - - modelInquiry = async (models: Cortex.Model[]) => { - const { model } = await this.inquirerService.inquirer.prompt({ - type: 'list', - name: 'model', - message: 'Select model to chat with:', - choices: models.map((e) => ({ - name: e.id, - value: e.id, - })), - }); - return model; - }; - - @Option({ - flags: '-i, --input ', - description: - 'Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays.', - }) - parseInput(value: string) { - return value; - } - - @Option({ - flags: '-e, --encoding_format ', - description: - 'Encoding format for the embeddings. Supported formats are float and int.', - }) - parseEncodingFormat(value: string) { - return value; - } - - @Option({ - flags: '-d, --dimensions ', - description: - 'The number of dimensions the resulting output embeddings should have. Only supported in some models.', - }) - parseDimensionsFormat(value: string) { - return value; - } -} diff --git a/cortex-js/src/infrastructure/commanders/engines.command.ts b/cortex-js/src/infrastructure/commanders/engines.command.ts deleted file mode 100644 index c4861f325..000000000 --- a/cortex-js/src/infrastructure/commanders/engines.command.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { invert } from 'lodash'; -import { Option, SubCommand } from 'nest-commander'; -import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { EnginesListCommand } from './engines/engines-list.command'; -import { EnginesGetCommand } from './engines/engines-get.command'; -import { EnginesInitCommand } from './engines/engines-init.command'; -import { ModuleRef } from '@nestjs/core'; -import { EngineNamesMap } from './types/engine.interface'; -import { BaseCommand } from './base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { EnginesSetCommand } from './engines/engines-set.command'; - -@SubCommand({ - name: 'engines', - subCommands: [EnginesListCommand, EnginesGetCommand, EnginesInitCommand], - description: 'Get cortex engines', - arguments: ' [subcommand]', -}) -@SetCommandContext() -export class EnginesCommand extends BaseCommand { - commandMap: { [key: string]: any } = { - list: EnginesListCommand, - get: EnginesGetCommand, - init: EnginesInitCommand, - set: EnginesSetCommand, - }; - - constructor( - readonly contextService: ContextService, - readonly moduleRef: ModuleRef, - readonly cortexUsecases: CortexUsecases, - ) { - super(cortexUsecases); - } - async runCommand(passedParams: string[], options: { vulkan: boolean }) { - const [parameter, command] = passedParams; - if (command !== 'list' && !parameter) { - console.error('Engine name is required.'); - return; - } - - // Handle the commands accordingly - const commandClass = this.commandMap[command as string]; - if (!commandClass) { - this.command?.help(); - return; - } - const engine = invert(EngineNamesMap)[parameter] || parameter; - await this.runEngineCommand( - commandClass, - [engine, ...passedParams.slice(2)], - options, - ); - } - - private async runEngineCommand( - commandClass: any, - params: string[] = [], - options?: { vulkan: boolean }, - ) { - const commandInstance = this.moduleRef.get(commandClass, { strict: false }); - if (commandInstance) { - commandInstance.setCortex(this.cortex); - await commandInstance.runCommand(params, options); - } else { - console.error('Command not found.'); - } - } - - @Option({ - flags: '-vk, --vulkan', - description: 'Install Vulkan engine', - defaultValue: false, - }) - parseVulkan() { - return true; - } - - @Option({ - flags: '-v, --version ', - description: 'Select version to install', - }) - parseVersion(value: string) { - return value; - } -} diff --git a/cortex-js/src/infrastructure/commanders/engines/engines-get.command.ts b/cortex-js/src/infrastructure/commanders/engines/engines-get.command.ts deleted file mode 100644 index 912087a6d..000000000 --- a/cortex-js/src/infrastructure/commanders/engines/engines-get.command.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { EngineNamesMap, Engines } from '../types/engine.interface'; -import { BaseCommand } from '../base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -@SubCommand({ - name: ' get', - description: 'Get an engine', - argsDescription: { - name: 'Engine name to get', - }, -}) -@SetCommandContext() -export class EnginesGetCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUsecases: CortexUsecases, - ) { - super(cortexUsecases); - } - - async runCommand(passedParams: string[]): Promise { - return this.cortex.engines.retrieve(passedParams[0]).then((engine) => { - if (!engine) { - console.error('Engine not found.'); - } else { - console.table({ - ...engine, - name: EngineNamesMap[engine.name as Engines] || engine.name, - }); - } - }); - } -} diff --git a/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts b/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts deleted file mode 100644 index 28d15a7f1..000000000 --- a/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts +++ /dev/null @@ -1,79 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { Engines } from '../types/engine.interface'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from '../base.command'; -import { defaultInstallationOptions } from '@/utils/init'; -import { Presets, SingleBar } from 'cli-progress'; -import ora from 'ora'; -import { InitEngineDto } from '@/infrastructure/dtos/engines/engines.dto'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -@SubCommand({ - name: ' init', - description: 'Setup engine', - argsDescription: { - name: 'Engine name to setup', - }, -}) -@SetCommandContext() -export class EnginesInitCommand extends BaseCommand { - constructor( - private readonly cortexUsecases: CortexUsecases, - readonly contextService: ContextService, - ) { - super(cortexUsecases); - } - - async runCommand( - passedParams: string[], - options: InitEngineDto, - ): Promise { - const engine = passedParams[0]; - const params = passedParams.includes(Engines.llamaCPP) - ? { - ...(await defaultInstallationOptions()), - ...options, - } - : {}; - - const configs = await fileManagerService.getConfig(); - const host = configs.cortexCppHost; - const port = configs.cortexCppPort; - // Should unload engine before installing engine - const unloadCortexEngineSpinner = ora('Unloading cortex...').start(); - if (await this.cortexUsecases.healthCheck(host, port)) { - await this.cortexUsecases.unloadCortexEngine(engine as Engines); - } - - unloadCortexEngineSpinner.succeed('Cortex unloaded'); - console.log(`Installing engine ${engine}...`); - - await this.cortex.engines.init(engine, params); - const response = await this.cortex.events.downloadEvent(); - - const progressBar = new SingleBar({}, Presets.shades_classic); - progressBar.start(100, 0); - - for await (const stream of response) { - if (stream.length) { - const data = stream[0] as any; - if (data.status === 'downloaded') break; - let totalBytes = 0; - let totalTransferred = 0; - data.children.forEach((child: any) => { - totalBytes += child.size.total; - totalTransferred += child.size.transferred; - }); - progressBar.update(Math.floor((totalTransferred / totalBytes) * 100)); - } - } - progressBar.stop(); - const startCortexSpinner = ora('Starting cortex...').start(); - await this.cortexUsecases.startCortex(); - startCortexSpinner.succeed('Cortex started'); - console.log('Engine installed successfully'); - process.exit(0); - } -} diff --git a/cortex-js/src/infrastructure/commanders/engines/engines-list.command.ts b/cortex-js/src/infrastructure/commanders/engines/engines-list.command.ts deleted file mode 100644 index 25f85949f..000000000 --- a/cortex-js/src/infrastructure/commanders/engines/engines-list.command.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { EngineNamesMap } from '../types/engine.interface'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from '../base.command'; - -@SubCommand({ - name: 'list', - description: 'Get all cortex engines', -}) -@SetCommandContext() -export class EnginesListCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUseCases: CortexUsecases, - ) { - super(cortexUseCases); - } - - async runCommand(): Promise { - return this.cortex.engines.list().then(({ data: engines }) => { - const enginesTable = engines.map((engine) => ({ - ...engine, - name: EngineNamesMap[engine.name as string] || engine.name, - })); - console.table(enginesTable); - }); - } -} diff --git a/cortex-js/src/infrastructure/commanders/engines/engines-set.command.ts b/cortex-js/src/infrastructure/commanders/engines/engines-set.command.ts deleted file mode 100644 index 655c7d614..000000000 --- a/cortex-js/src/infrastructure/commanders/engines/engines-set.command.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { BaseCommand } from '../base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -@SubCommand({ - name: ' set ', - description: 'Update an engine configurations', - argsDescription: { - name: 'Engine name to update', - }, -}) -@SetCommandContext() -export class EnginesSetCommand extends BaseCommand { - constructor(readonly cortexUsecases: CortexUsecases) { - super(cortexUsecases); - } - - async runCommand(passedParams: string[]): Promise { - const engineName = passedParams[0]; - const config = passedParams[1]; - const value = passedParams[2]; - return this.cortex.engines - .update(engineName, { config, value }) - .then(() => console.log('Update engine successfully')) - .catch((error) => console.error(error.message ?? error)); - } -} diff --git a/cortex-js/src/infrastructure/commanders/models.command.ts b/cortex-js/src/infrastructure/commanders/models.command.ts deleted file mode 100644 index d66dd0989..000000000 --- a/cortex-js/src/infrastructure/commanders/models.command.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { ModelStartCommand } from './models/model-start.command'; -import { ModelGetCommand } from './models/model-get.command'; -import { ModelListCommand } from './models/model-list.command'; -import { ModelStopCommand } from './models/model-stop.command'; -import { ModelRemoveCommand } from './models/model-remove.command'; -import { ModelUpdateCommand } from './models/model-update.command'; -import { BaseCommand } from './base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { ModuleRef } from '@nestjs/core'; -import { ModelPullCommand } from './models/model-pull.command'; - -@SubCommand({ - name: 'models', - subCommands: [ - ModelStartCommand, - ModelStopCommand, - ModelListCommand, - ModelGetCommand, - ModelRemoveCommand, - ModelUpdateCommand, - ], - description: 'Subcommands for managing models', -}) -export class ModelsCommand extends BaseCommand { - constructor( - private readonly moduleRef: ModuleRef, - readonly cortexUsecases: CortexUsecases, - ) { - super(cortexUsecases); - } - - commandMap: { [key: string]: any } = { - pull: ModelPullCommand, - start: ModelStartCommand, - stop: ModelStopCommand, - list: ModelListCommand, - get: ModelGetCommand, - remove: ModelRemoveCommand, - update: ModelUpdateCommand, - }; - async runCommand(passedParam: string[], options: any) { - const [parameter, command, ...restParams] = passedParam; - if (command !== 'list' && !parameter) { - console.error('Model id is required.'); - return; - } - - // Handle the commands accordingly - const commandClass = this.commandMap[command as string]; - if (!commandClass) { - this.command?.help(); - return; - } - const modelId = parameter; - await this.runModelCommand( - commandClass, - [modelId, ...(restParams || [])], - options, - ); - } - - private async runModelCommand( - commandClass: any, - params: string[] = [], - options?: any, - ) { - const commandInstance = this.moduleRef.get(commandClass, { strict: false }); - if (commandInstance) { - commandInstance.setCortex(this.cortex); - await commandInstance.runCommand(params, options); - } else { - console.error('Command not found.'); - } - } - - help() { - console.log('Usage: cortex models [subcommand]'); - console.log('Commands:'); - console.log(' start - Start a model by ID'); - console.log(' stop - Stop a model by ID'); - console.log(' list - List all available models'); - console.log(' get - Get model details by ID'); - console.log(' remove - Remove a model by ID'); - console.log(' update - Update a model by ID'); - } -} diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts deleted file mode 100644 index 3bc4e8fa5..000000000 --- a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { BaseCommand } from '../base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -@SubCommand({ - name: 'get', - description: 'Get a model by ID.', - arguments: '', - argsDescription: { - model_id: 'Model ID to get information about.', - }, -}) -@SetCommandContext() -export class ModelGetCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUseCases: CortexUsecases, - ) { - super(cortexUseCases); - } - - async runCommand(passedParams: string[]): Promise { - const model = await this.cortex.models.retrieve(passedParams[0]); - if (!model) console.error('Model not found'); - else console.log(model); - } -} diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts deleted file mode 100644 index 9e169733a..000000000 --- a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { SubCommand, Option } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { BaseCommand } from '../base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -interface ModelListOptions { - format: 'table' | 'json'; -} -@SubCommand({ name: 'list', description: 'List all models locally.' }) -@SetCommandContext() -export class ModelListCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUseCases: CortexUsecases, - ) { - super(cortexUseCases); - } - - async runCommand( - passedParams: string[], - option: ModelListOptions, - ): Promise { - const { data: models } = await this.cortex.models.list(); - option.format === 'table' - ? console.table( - models.map((e) => ({ - id: e.id, - engine: e.engine, - version: e.version, - })), - ) - : console.log(models); - } - - @Option({ - flags: '-f, --format ', - defaultValue: 'table', - description: 'Print models list in table or json format', - }) - parseModelId(value: string) { - return value; - } -} diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts deleted file mode 100644 index 179484845..000000000 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ /dev/null @@ -1,93 +0,0 @@ -import { exit } from 'node:process'; -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; -import { - EventName, - TelemetrySource, -} from '@/domain/telemetry/telemetry.interface'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { existsSync } from 'fs'; -import { join } from 'node:path'; -import { checkModelCompatibility } from '@/utils/model-check'; -import { Engines } from '../types/engine.interface'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from '../base.command'; -import { downloadProgress } from '@/utils/download-progress'; -import { DownloadType } from '@/domain/models/download.interface'; -import ora from 'ora'; -import { isRemoteEngine } from '@/utils/normalize-model-id'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -@SubCommand({ - name: 'pull', - aliases: ['download'], - arguments: '', - argsDescription: { model_id: 'Model repo to pull' }, - description: - 'Download a model from a registry. Working with HuggingFace repositories. For available models, please visit https://huggingface.co/cortexso', -}) -@SetCommandContext() -export class ModelPullCommand extends BaseCommand { - constructor( - private readonly telemetryUsecases: TelemetryUsecases, - readonly contextService: ContextService, - readonly cortexUsecases: CortexUsecases, - ) { - super(cortexUsecases); - } - - async runCommand(passedParams: string[]) { - if (passedParams.length < 1) { - console.error('Model Id is required'); - exit(1); - } - const modelId = passedParams[0]; - - await checkModelCompatibility(modelId); - if (await this.cortex.models.retrieve(modelId)) { - console.error('Model already exists.'); - exit(1); - } - - console.log('Downloading model...'); - await this.cortex.models.download(modelId).catch((e: Error) => { - if (e instanceof ModelNotFoundException) - console.error('Model does not exist.'); - else console.error(e.message ?? e); - exit(1); - }); - - await downloadProgress(this.cortex, modelId); - ora().succeed('Model downloaded'); - - const existingModel = await this.cortex.models.retrieve(modelId); - const engine = existingModel?.engine || Engines.llamaCPP; - - // Pull engine if not exist - if ( - !isRemoteEngine(engine) && - !existsSync( - join(await fileManagerService.getCortexCppEnginePath(), engine), - ) - ) { - console.log('\n'); - console.log('Downloading engine...'); - await this.cortex.engines.init(engine); - await downloadProgress(this.cortex, undefined, DownloadType.Engine); - } - this.telemetryUsecases.sendEvent( - [ - { - name: EventName.DOWNLOAD_MODEL, - modelId: passedParams[0], - }, - ], - TelemetrySource.CLI, - ); - console.log('\nDownload complete!'); - - exit(0); - } -} diff --git a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts deleted file mode 100644 index 52e8aefdc..000000000 --- a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from '../base.command'; - -@SubCommand({ - name: 'remove', - description: 'Remove a model by ID locally.', - arguments: '', - argsDescription: { - model_id: 'Model to remove', - }, -}) -@SetCommandContext() -export class ModelRemoveCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUseCases: CortexUsecases, - ) { - super(cortexUseCases); - } - - async runCommand(passedParams: string[]): Promise { - await this.cortex.models.del(passedParams[0]).then(console.log); - } -} diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts deleted file mode 100644 index f345287c6..000000000 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ /dev/null @@ -1,115 +0,0 @@ -import { SubCommand, Option, InquirerService } from 'nest-commander'; -import ora from 'ora'; -import { exit } from 'node:process'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { existsSync } from 'node:fs'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { join } from 'node:path'; -import { Engines } from '../types/engine.interface'; -import { checkModelCompatibility } from '@/utils/model-check'; -import { BaseCommand } from '../base.command'; -import { isRemoteEngine } from '@/utils/normalize-model-id'; -import { downloadProgress } from '@/utils/download-progress'; -import { DownloadType } from '@/domain/models/download.interface'; -import { printLastErrorLines } from '@/utils/logs'; - -type ModelStartOptions = { - preset?: string; -}; - -@SubCommand({ - name: 'start', - description: 'Start a model by ID.', - arguments: '[model_id]', - argsDescription: { - model_id: - 'Model ID to start. If there is no model ID, it will prompt you to select from the available models.', - }, -}) -@SetCommandContext() -export class ModelStartCommand extends BaseCommand { - constructor( - private readonly inquirerService: InquirerService, - readonly cortexUsecases: CortexUsecases, - readonly contextService: ContextService, - ) { - super(cortexUsecases); - } - - async runCommand( - passedParams: string[], - options: ModelStartOptions, - ): Promise { - let modelId = passedParams[0]; - const checkingSpinner = ora('Checking model...').start(); - if (!modelId) { - try { - modelId = await this.modelInquiry(); - } catch { - checkingSpinner.fail('Model ID is required'); - exit(1); - } - } - - const existingModel = await this.cortex.models.retrieve(modelId); - if (!existingModel) { - checkingSpinner.fail( - `Model ${modelId} not found on filesystem.\nPlease try 'cortex pull ${modelId}' first.`, - ); - process.exit(1); - } - - await checkModelCompatibility(modelId); - checkingSpinner.succeed('Model found'); - - const engine = existingModel.engine || Engines.llamaCPP; - // Pull engine if not exist - if ( - !isRemoteEngine(engine) && - !existsSync( - join(await fileManagerService.getCortexCppEnginePath(), engine), - ) - ) { - console.log('Downloading engine...'); - await this.cortex.engines.init(engine); - await downloadProgress(this.cortex, undefined, DownloadType.Engine); - } - - const parsedPreset = await fileManagerService.getPreset(options.preset); - - const startingSpinner = ora('Loading model...').start(); - - await this.cortex.models - .start(modelId, parsedPreset) - .then(() => startingSpinner.succeed('Model loaded')) - .catch(async (error) => { - startingSpinner.fail(error.message ?? error); - printLastErrorLines(await fileManagerService.getLogPath()); - }); - } - - modelInquiry = async () => { - const { data: models } = await this.cortex.models.list(); - if (!models.length) throw 'No models found'; - const { model } = await this.inquirerService.inquirer.prompt({ - type: 'list', - name: 'model', - message: 'Select a model to start:', - choices: models.map((e) => ({ - name: e.id, - value: e.id, - })), - }); - return model; - }; - - @Option({ - flags: '-p, --preset ', - description: 'Apply a chat preset to the chat session', - }) - parseTemplate(value: string) { - return value; - } -} diff --git a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts deleted file mode 100644 index 2eb65666a..000000000 --- a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts +++ /dev/null @@ -1,34 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { BaseCommand } from '../base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import ora from 'ora'; - -@SubCommand({ - name: 'stop', - description: 'Stop a model by ID.', - arguments: '', - argsDescription: { - model_id: 'Model ID to stop.', - }, -}) -@SetCommandContext() -export class ModelStopCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUseCases: CortexUsecases, - ) { - super(cortexUseCases); - } - - async runCommand(passedParams: string[]): Promise { - const loadingSpinner = ora('Unloading model...').start(); - await this.cortex.models - .stop(passedParams[0]) - .then(() => loadingSpinner.succeed('Model unloaded')) - .catch((e) => - loadingSpinner.fail(`Failed to unload model: ${e.message ?? e}`), - ); - } -} diff --git a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts deleted file mode 100644 index a9d5f84ed..000000000 --- a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { SubCommand, Option } from 'nest-commander'; -import { exit } from 'node:process'; -import { SetCommandContext } from '../decorators/CommandContext'; -import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { BaseCommand } from '../base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -type UpdateOptions = { - model?: string; - options?: string[]; -}; - -@SubCommand({ - name: 'update', - description: 'Update configuration of a model.', - arguments: '', - argsDescription: { - model_id: 'Model ID to update configuration.', - }, -}) -@SetCommandContext() -export class ModelUpdateCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUseCases: CortexUsecases, - ) { - super(cortexUseCases); - } - - async runCommand( - passedParams: string[], - option: UpdateOptions, - ): Promise { - const modelId = option.model; - if (!modelId) { - console.error('Model Id is required'); - exit(1); - } - - const options = option.options; - if (!options || options.length === 0) { - console.log('Nothing to update'); - exit(0); - } - - const toUpdate: UpdateModelDto = {}; - - options.forEach((option) => { - const [key, stringValue] = option.split('='); - Object.assign(toUpdate, { key, stringValue }); - }); - this.cortex.models.update(modelId, toUpdate as any); - } - - @Option({ - flags: '-m, --model ', - required: true, - description: 'Model Id to update', - }) - parseModelId(value: string) { - return value; - } - - @Option({ - flags: '-c, --options ', - description: - 'Specify the options to update the model. Syntax: -c option1=value1 option2=value2. For example: cortex models update -c max_tokens=100 temperature=0.5', - }) - parseOptions(option: string, optionsAccumulator: string[] = []): string[] { - optionsAccumulator.push(option); - return optionsAccumulator; - } -} diff --git a/cortex-js/src/infrastructure/commanders/presets.command.ts b/cortex-js/src/infrastructure/commanders/presets.command.ts deleted file mode 100644 index ba1ea5057..000000000 --- a/cortex-js/src/infrastructure/commanders/presets.command.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { readdirSync } from 'fs'; -import { SubCommand } from 'nest-commander'; -import { join } from 'path'; -import { SetCommandContext } from './decorators/CommandContext'; -import { ContextService } from '../services/context/context.service'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from './base.command'; -import { fileManagerService } from '../services/file-manager/file-manager.service'; - -@SubCommand({ - name: 'presets', - description: 'Show all available presets', -}) -@SetCommandContext() -export class PresetCommand extends BaseCommand { - constructor( - readonly contextService: ContextService, - readonly cortexUsecases: CortexUsecases, - ) { - super(cortexUsecases); - } - async runCommand(): Promise { - return console.table( - readdirSync( - join(await fileManagerService.getDataFolderPath(), `presets`), - ).map((e) => ({ - preset: e.replace('.yaml', ''), - })), - ); - } -} diff --git a/cortex-js/src/infrastructure/commanders/ps.command.ts b/cortex-js/src/infrastructure/commanders/ps.command.ts deleted file mode 100644 index f2268cdd6..000000000 --- a/cortex-js/src/infrastructure/commanders/ps.command.ts +++ /dev/null @@ -1,146 +0,0 @@ -import ora from 'ora'; -import systeminformation from 'systeminformation'; -import { SubCommand } from 'nest-commander'; -import { SetCommandContext } from './decorators/CommandContext'; -import { ModelStat } from './types/model-stat.interface'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { BaseCommand } from './base.command'; -import { HttpStatus } from '@nestjs/common'; -import { Engines } from './types/engine.interface'; -import { ModelStatResponse } from '../providers/cortex/cortex.provider'; -import { firstValueFrom } from 'rxjs'; -import { HttpService } from '@nestjs/axios'; -import { CORTEX_CPP_MODELS_URL } from '../constants/cortex'; -import { fileManagerService } from '../services/file-manager/file-manager.service'; -import { getMemoryInformation } from '@/utils/system-resource'; - -@SubCommand({ - name: 'ps', - description: 'Show running models and their status', -}) -@SetCommandContext() -export class PSCommand extends BaseCommand { - constructor( - readonly cortexUsecases: CortexUsecases, - private readonly httpService: HttpService, - ) { - super(cortexUsecases); - } - async runCommand(): Promise { - const runningSpinner = ora('Running PS command...').start(); - return this.getModels() - .then((models: ModelStat[]) => { - runningSpinner.succeed(); - if (models.length) console.table(models); - else console.log('No models running'); - }) - .then(async () => { - const cpuUsage = ( - await systeminformation.currentLoad() - ).currentLoad.toFixed(2); - const gpusLoad = []; - const gpus = await systeminformation.graphics(); - for (const gpu of gpus.controllers) { - const totalVram = gpu.vram || 0; - gpusLoad.push({ - totalVram, - }); - } - const { total, used } = await getMemoryInformation() - const memoryUsage = ( - (used / total) * - 100 - ).toFixed(2); - const consumedTable = { - 'CPU Usage': `${cpuUsage}%`, - 'Memory Usage': `${memoryUsage}%`, - } as { - 'CPU Usage': string; - 'Memory Usage': string; - VRAM?: string; - }; - - if ( - gpusLoad.length > 0 && - gpusLoad.filter((gpu) => gpu.totalVram > 0).length > 0 - ) { - consumedTable['VRAM'] = gpusLoad - .map((gpu) => `${gpu.totalVram} MB`) - .join(', '); - } - console.table([consumedTable]); - }); - } - - /** - * Get models running in the Cortex C++ server - */ - async getModels(): Promise { - const configs = await fileManagerService.getConfig(); - const runningSpinner = ora('Getting models...').start(); - return new Promise((resolve, reject) => - firstValueFrom( - this.httpService.get( - CORTEX_CPP_MODELS_URL(configs.cortexCppHost, configs.cortexCppPort), - ), - ) - .then((res) => { - const data = res.data as ModelStatResponse; - if ( - res.status === HttpStatus.OK && - data && - Array.isArray(data.data) && - data.data.length > 0 - ) { - runningSpinner.succeed(); - resolve( - data.data.map((e) => { - const startTime = e.start_time ?? new Date(); - const currentTime = new Date(); - const duration = - currentTime.getTime() - new Date(startTime).getTime(); - return { - modelId: e.id, - engine: e.engine ?? Engines.llamaCPP, - status: 'running', - duration: this.formatDuration(duration), - ram: e.ram ?? '-', - vram: e.vram ?? '-', - }; - }), - ); - } else reject(); - }) - .catch(reject), - ).catch(() => { - runningSpinner.succeed(''); - return []; - }); - } - - private formatDuration(milliseconds: number): string { - const days = Math.floor(milliseconds / (1000 * 60 * 60 * 24)); - const hours = Math.floor( - (milliseconds % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60), - ); - const minutes = Math.floor((milliseconds % (1000 * 60 * 60)) / (1000 * 60)); - const seconds = Math.floor((milliseconds % (1000 * 60)) / 1000); - - let formattedDuration = ''; - - if (days > 0) { - formattedDuration += `${days}d `; - } - if (hours > 0) { - formattedDuration += `${hours}h `; - } - if (minutes > 0) { - formattedDuration += `${minutes}m `; - } - if (seconds > 0) { - formattedDuration += `${seconds}s `; - } - - return formattedDuration.trim(); - } -} diff --git a/cortex-js/src/infrastructure/commanders/questions/init.questions.ts b/cortex-js/src/infrastructure/commanders/questions/init.questions.ts deleted file mode 100644 index ee4675320..000000000 --- a/cortex-js/src/infrastructure/commanders/questions/init.questions.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { Question, QuestionSet } from 'nest-commander'; -import { platform } from 'node:process'; - -@QuestionSet({ name: 'init-run-mode-questions' }) -export class InitRunModeQuestions { - @Question({ - type: 'list', - message: 'Select run mode', - name: 'runMode', - default: 'CPU', - choices: ['CPU', 'GPU'], - when: () => platform !== 'darwin', - }) - parseRunMode(val: string) { - return val; - } - - @Question({ - type: 'list', - message: 'Select GPU type', - name: 'gpuType', - default: 'Nvidia', - choices: ['Nvidia', 'Others (Vulkan)'], - when: (answers: any) => answers.runMode === 'GPU', - }) - parseGPUType(val: string) { - return val; - } - - @Question({ - type: 'list', - message: 'Select CPU instructions set', - name: 'instructions', - choices: ['AVX2', 'AVX', 'AVX512'], - when: () => platform !== 'darwin', - }) - parseContent(val: string) { - return val; - } -} diff --git a/cortex-js/src/infrastructure/commanders/run.command.ts b/cortex-js/src/infrastructure/commanders/run.command.ts deleted file mode 100644 index 3d30b6a16..000000000 --- a/cortex-js/src/infrastructure/commanders/run.command.ts +++ /dev/null @@ -1,170 +0,0 @@ -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { SubCommand, Option, InquirerService } from 'nest-commander'; -import { exit } from 'node:process'; -import ora from 'ora'; -import { existsSync } from 'fs'; -import { join } from 'path'; -import { Engines } from './types/engine.interface'; -import { - checkModelCompatibility, - checkRequiredVersion, -} from '@/utils/model-check'; -import { BaseCommand } from './base.command'; -import { isRemoteEngine } from '@/utils/normalize-model-id'; -import { ChatClient } from './services/chat-client'; -import { downloadProgress } from '@/utils/download-progress'; -import { DownloadType } from '@/domain/models/download.interface'; -import { isLocalFile } from '@/utils/urls'; -import { parse } from 'node:path'; -import { printLastErrorLines } from '@/utils/logs'; -import { fileManagerService } from '../services/file-manager/file-manager.service'; - -type RunOptions = { - threadId?: string; - preset?: string; - chat?: boolean; -}; - -@SubCommand({ - name: 'run', - arguments: '[model_id]', - argsDescription: { - model_id: - 'Model to run. If the model is not available, it will attempt to pull.', - }, - description: 'Shortcut to start a model and chat', -}) -export class RunCommand extends BaseCommand { - chatClient: ChatClient; - constructor( - protected readonly cortexUsecases: CortexUsecases, - private readonly inquirerService: InquirerService, - ) { - super(cortexUsecases); - } - - async runCommand(passedParams: string[], options: RunOptions): Promise { - this.chatClient = new ChatClient(this.cortex); - let modelId = passedParams[0]; - const checkingSpinner = ora('Checking model...').start(); - if (!modelId) { - try { - modelId = await this.modelInquiry(); - } catch { - checkingSpinner.fail('Model ID is required'); - exit(1); - } - } - - // Check model compatibility on this machine - await checkModelCompatibility(modelId, checkingSpinner); - - let existingModel = await this.cortex.models.retrieve(modelId); - // If not exist - // Try Pull - if (!existingModel) { - checkingSpinner.succeed(); - - console.log('Downloading model...'); - await this.cortex.models.download(modelId).catch((e: Error) => { - checkingSpinner.fail(e.message ?? e); - exit(1); - }); - await downloadProgress(this.cortex, modelId); - checkingSpinner.succeed('Model downloaded'); - - // Update to persisted modelId - // TODO: Should be retrieved from the request - if (isLocalFile(modelId)) { - modelId = parse(modelId).name; - } - - // Second check if model is available - existingModel = await this.cortex.models.retrieve(modelId); - if (!existingModel) { - checkingSpinner.fail(`Model is not available`); - process.exit(1); - } - } else { - checkingSpinner.succeed('Model found'); - } - - const engine = existingModel.engine || Engines.llamaCPP; - // Pull engine if not exist - if ( - !isRemoteEngine(engine) && - !existsSync( - join(await fileManagerService.getCortexCppEnginePath(), engine), - ) - ) { - console.log('Downloading engine...'); - await this.cortex.engines.init(engine); - await downloadProgress(this.cortex, undefined, DownloadType.Engine); - } - const { version: engineVersion } = - await this.cortex.engines.retrieve(engine); - if ( - existingModel.engine_version && - !checkRequiredVersion(existingModel.engine_version, engineVersion) - ) { - console.log( - `Model engine version ${existingModel.engine_version} is not compatible with engine version ${engineVersion}`, - ); - process.exit(1); - } - - const startingSpinner = ora('Loading model...').start(); - - return this.cortex.models - .start(modelId, await fileManagerService.getPreset(options.preset)) - .then(() => { - startingSpinner.succeed('Model loaded'); - if (options.chat) this.chatClient.chat(modelId, options.threadId); - else console.log("To start a chat session, use the '--chat' flag"); - }) - .catch(async (e) => { - startingSpinner.fail(e.message ?? e); - - printLastErrorLines(await fileManagerService.getLogPath()); - }); - } - - @Option({ - flags: '-t, --thread ', - description: 'Thread Id. If not provided, will create new thread', - }) - parseThreadId(value: string) { - return value; - } - - @Option({ - flags: '-p, --preset ', - description: 'Apply a chat preset to the chat session', - }) - parseTemplate(value: string) { - return value; - } - - @Option({ - flags: '-c, --chat', - description: 'Start a chat session after starting the model', - }) - parseChat() { - return true; - } - - modelInquiry = async () => { - const { data: models } = await this.cortex.models.list(); - if (!models.length) throw 'No models found'; - const { model } = await this.inquirerService.inquirer.prompt({ - type: 'list', - name: 'model', - message: 'Select a model to start:', - choices: models.map((e) => ({ - name: e.id, - value: e.id, - })), - }); - return model; - }; -} diff --git a/cortex-js/src/infrastructure/commanders/serve-stop.command.ts b/cortex-js/src/infrastructure/commanders/serve-stop.command.ts deleted file mode 100644 index ea9039a4d..000000000 --- a/cortex-js/src/infrastructure/commanders/serve-stop.command.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { SubCommand } from 'nest-commander'; -import { BaseCommand } from './base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -@SubCommand({ - name: 'stop', - description: 'Stop the API server', -}) -export class ServeStopCommand extends BaseCommand { - constructor(private readonly cortexUsecases: CortexUsecases) { - super(cortexUsecases); - } - async runCommand(): Promise { - return this.cortexUsecases - .stopApiServer() - .then(() => console.log('API server stopped')); - } -} diff --git a/cortex-js/src/infrastructure/commanders/services/chat-client.ts b/cortex-js/src/infrastructure/commanders/services/chat-client.ts deleted file mode 100644 index 1f9105a5f..000000000 --- a/cortex-js/src/infrastructure/commanders/services/chat-client.ts +++ /dev/null @@ -1,193 +0,0 @@ -import { exit, stdin, stdout } from 'node:process'; -import * as readline from 'node:readline/promises'; -import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; -import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto'; -import { CreateThreadAssistantDto } from '@/infrastructure/dtos/threads/create-thread-assistant.dto'; -import { ModelParameterParser } from '@/utils/model-parameter.parser'; -import { TextContentBlock } from '@/domain/models/message.interface'; -import { Cortex } from '@cortexso/cortex.js'; - -export class ChatClient { - private exitClause = 'exit()'; - private userIndicator = '>> '; - private exitMessage = 'Bye!'; - private cortex: Cortex; - constructor(readonly cortexInstance: Cortex) { - this.cortex = cortexInstance; - } - - async chat( - modelId: string, - threadId?: string, - message?: string, - settings?: Partial, - ): Promise { - console.log(`In order to exit, type '${this.exitClause}'.`); - const thread = await this.getOrCreateNewThread(modelId, threadId); - const messages: ChatCompletionMessage[] = ( - await this.cortex.beta.threads.messages.list(thread.id, { - limit: 10, - order: 'desc', - }) - ).data.map((message) => ({ - content: (message.content[0] as TextContentBlock).text.value, - role: message.role, - })); - - const rl = readline.createInterface({ - input: stdin, - output: stdout, - prompt: this.userIndicator, - }); - - if (message) - this.sendCompletionMessage( - message, - messages, - modelId, - thread.id, - rl, - settings, - ); - rl.prompt(); - - rl.on('close', async () => { - console.log(this.exitMessage); - exit(0); - }); - - rl.on('line', (input) => - this.sendCompletionMessage( - input, - messages, - modelId, - thread.id, - rl, - settings, - ), - ); - } - - async sendCompletionMessage( - userInput: string, - messages: Cortex.ChatCompletionMessageParam[], - modelId: string, - threadId: string, - rl: readline.Interface, - settings: Partial = {}, - ) { - if (!userInput || userInput.trim() === '') return; - - if (userInput.trim() === this.exitClause) { - rl.close(); - return; - } - - if ( - 'pre_prompt' in settings && - !messages.some((m) => m.role === 'system') - ) { - messages = [ - { - content: settings.pre_prompt as string, - role: 'system', - }, - ...messages, - ]; - } - - const model = await this.cortex.models.retrieve(modelId); - - messages.push({ - content: userInput, - role: 'user', - }); - - const createMessageDto: Cortex.Beta.Threads.Messages.MessageCreateParams = { - role: 'user', - content: userInput, - }; - this.cortex.beta.threads.messages.create(threadId, createMessageDto); - - const parser = new ModelParameterParser(); - const chatDto: Cortex.ChatCompletionCreateParamsStreaming = { - // Default results params - messages, - model: modelId, - max_tokens: 4098, - temperature: 0.7, - ...settings, - // Override with model inference params - ...parser.parseModelInferenceParams(model), - stream: true, - }; - - try { - const stream = await this.cortex.chat.completions.create({ - ...chatDto, - }); - - let assistantResponse = ''; - for await (const part of stream) { - const output = part.choices[0]?.delta?.content || ''; - assistantResponse += output; - stdout.write(output); - } - - messages.push({ - content: assistantResponse, - role: 'assistant', - }); - - this.cortex.beta.threads.messages - .create(threadId, { - role: 'assistant', - content: assistantResponse, - }) - .then(() => { - assistantResponse = ''; - console.log('\n'); - rl.prompt(); - }); - } catch (e) { - stdout.write( - `Something went wrong! Please check model status.\n${e.message}\n`, - ); - rl.prompt(); - } - } - - // Get or create a new thread - private async getOrCreateNewThread( - modelId: string, - threadId?: string, - ): Promise { - if (threadId) { - const thread = await this.cortex.beta.threads.retrieve(threadId); - if (!thread) throw new Error(`Cannot find thread with id: ${threadId}`); - return thread; - } - - const model = await this.cortex.models.retrieve(modelId); - if (!model) throw new Error(`Cannot find model with id: ${modelId}`); - - const assistantDto: CreateThreadAssistantDto = { - avatar: '', - id: 'jan', - object: 'assistant', - created_at: Date.now(), - name: 'Jan', - description: 'A default assistant that can use all downloaded models', - model: modelId, - instructions: '', - tools: [], - metadata: {}, - }; - - const createThreadDto: CreateThreadDto = { - assistants: [assistantDto], - }; - - return this.cortex.beta.threads.create(createThreadDto as any); - } -} diff --git a/cortex-js/src/infrastructure/commanders/services/cortex.client.module.ts b/cortex-js/src/infrastructure/commanders/services/cortex.client.module.ts deleted file mode 100644 index 8a363ee27..000000000 --- a/cortex-js/src/infrastructure/commanders/services/cortex.client.module.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { Module } from '@nestjs/common'; -import { CortexClient } from './cortex.client'; - -@Module({ - imports: [], - providers: [CortexClient], - exports: [CortexClient], -}) -export class CortexClientModule {} diff --git a/cortex-js/src/infrastructure/commanders/services/cortex.client.ts b/cortex-js/src/infrastructure/commanders/services/cortex.client.ts deleted file mode 100644 index ce921a1f0..000000000 --- a/cortex-js/src/infrastructure/commanders/services/cortex.client.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { - cortexNamespace, - cortexServerAPI, -} from '@/infrastructure/constants/cortex'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import Cortex from '@cortexso/cortex.js'; - -export class CortexClient extends Cortex { - serverConfigs: { host: string; port: number }; - - constructor() { - const configs = fileManagerService.getServerConfig(); - super({ - baseURL: cortexServerAPI(configs.host, configs.port), - apiKey: cortexNamespace, - }); - this.serverConfigs = configs; - } -} diff --git a/cortex-js/src/infrastructure/commanders/telemetry.command.ts b/cortex-js/src/infrastructure/commanders/telemetry.command.ts deleted file mode 100644 index b4f3b3ec5..000000000 --- a/cortex-js/src/infrastructure/commanders/telemetry.command.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { SubCommand, Option } from 'nest-commander'; -import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; -import { TelemetryOptions } from './types/telemetry-options.interface'; -import { SetCommandContext } from './decorators/CommandContext'; -import { BaseCommand } from './base.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -@SubCommand({ - name: 'telemetry', - description: 'Get telemetry logs', -}) -@SetCommandContext() -export class TelemetryCommand extends BaseCommand { - constructor( - private readonly telemetryUseCase: TelemetryUsecases, - readonly cortexUseCases: CortexUsecases, - ) { - super(cortexUseCases); - } - - async runCommand( - _input: string[], - options?: TelemetryOptions, - ): Promise { - if (options?.type === 'crash') { - try { - await this.telemetryUseCase.readCrashReports((telemetryEvent) => { - console.log(JSON.stringify(telemetryEvent, null, 2)); - }); - } catch (e) { - console.error('Error reading crash reports', e); - } - } - } - @Option({ - flags: '-t, --type', - description: 'Type of telemetry', - defaultValue: 'crash', - choices: ['crash'], - }) - parseType(value: string) { - return value; - } -} diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts deleted file mode 100644 index 83259eb8b..000000000 --- a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts +++ /dev/null @@ -1,146 +0,0 @@ -// import { TestingModule } from '@nestjs/testing'; -// import { spy, Stub, stubMethod } from 'hanbi'; -// import { CommandTestFactory } from 'nest-commander-testing'; -// import { CommandModule } from '@/command.module'; -// import { LogService } from '@/infrastructure/commanders/test/log.service'; -// import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -// import { join } from 'path'; -// import { rmSync } from 'fs'; -// import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -// let commandInstance: TestingModule, -// exitSpy: Stub, -// stdoutSpy: Stub, -// stderrSpy: Stub; -export const timeout = 500000; - -// beforeAll( -// () => -// new Promise(async (res) => { -// stubMethod(process.stderr, 'write'); -// exitSpy = stubMethod(process, 'exit'); -// stdoutSpy = stubMethod(process.stdout, 'write'); -// stderrSpy = stubMethod(process.stderr, 'write'); -// commandInstance = await CommandTestFactory.createTestingCommand({ -// imports: [CommandModule], -// }) -// .overrideProvider(LogService) -// .useValue({ log: spy().handler }) -// .compile(); - -// const fileService = -// await commandInstance.resolve(FileManagerService); - -// // Attempt to create test folder -// await fileService.writeConfigFile({ -// dataFolderPath: join(__dirname, 'test_data'), -// cortexCppHost: 'localhost', -// cortexCppPort: 3929, -// }); -// const cortexUseCases = -// await commandInstance.resolve(CortexUsecases); -// jest -// .spyOn(cortexUseCases, 'isAPIServerOnline') -// .mockImplementation(() => Promise.resolve(true)); -// res(); -// }), -// ); - -// afterEach(() => { -// stdoutSpy.reset(); -// stderrSpy.reset(); -// exitSpy.reset(); -// }); - -// afterAll( -// () => -// new Promise(async (res) => { -// // Attempt to clean test folder -// rmSync(join(__dirname, 'test_data'), { -// recursive: true, -// force: true, -// }); -// res(); -// }), -// ); - -describe('Helper commands', () => { - test( - 'Init with hardware auto detection', - async () => { - // // await CommandTestFactory.run(commandInstance, ['init', '-s']); - // // - // // // Wait for a brief period to allow the command to execute - // // await new Promise((resolve) => setTimeout(resolve, 1000)); - // // - // // expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0); - }, - timeout, - ); - - // // test('Chat with option -m', async () => { - // // const logMock = stubMethod(console, 'log'); - // // - // // await CommandTestFactory.run(commandInstance, [ - // // 'chat', - // // // '-m', - // // // 'hello', - // // // '>output.txt', - // // ]); - // // expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'."); - // // // expect(exitSpy.callCount).toBe(1); - // // // expect(exitSpy.firstCall?.args[0]).toBe(1); -}); - -// test( -// 'Show stop running models', -// async () => { -// // const tableMock = stubMethod(console, 'table'); -// // const logMock = stubMethod(console, 'log'); -// // await CommandTestFactory.run(commandInstance, ['stop']); -// // await CommandTestFactory.run(commandInstance, ['ps']); -// // expect(logMock.firstCall?.args[0]).toEqual('API server stopped'); -// // expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array); -// // expect(tableMock.firstCall?.args[0].length).toEqual(0); -// }, -// timeout, -// ); - -// test('Help command return guideline to users', async () => { -// // await CommandTestFactory.run(commandInstance, ['-h']); -// // expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array); -// // expect(stdoutSpy.firstCall?.args.length).toBe(1); -// // expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command'); -// // expect(exitSpy.callCount).toBeGreaterThan(1); -// // expect(exitSpy.firstCall?.args[0]).toBe(0); -// }); - -// test('Should handle missing command', async () => { -// // await CommandTestFactory.run(commandInstance, ['--unknown']); -// // expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option'); -// // expect(stderrSpy.firstCall?.args[0]).toContain('--unknown'); -// // expect(exitSpy.callCount).toBeGreaterThan(0); -// // expect(exitSpy.firstCall?.args[0]).toBe(1); -// }); - -// // test('Local API server via default host/port localhost:1337/api', async () => { -// // await CommandTestFactory.run(commandInstance, ['serve', '--detach']); -// // -// // await new Promise((resolve) => setTimeout(resolve, 2000)); -// // -// // expect(stdoutSpy.firstCall?.args[0]).toContain( -// // 'Started server at http://localhost:1337', -// // ); -// // // Add a delay -// // // Temporally disable for further investigation -// // return new Promise(async (resolve) => { -// // setTimeout(async () => { -// // // Send a request to the API server to check if it's running -// // const response = await axios.get('http://localhost:1337/api'); -// // expect(response.status).toBe(200); -// // resolve(); -// // }, 5000); -// // }); -// // }, 15000); -// }); diff --git a/cortex-js/src/infrastructure/commanders/test/log.service.ts b/cortex-js/src/infrastructure/commanders/test/log.service.ts deleted file mode 100644 index 1151f5fb5..000000000 --- a/cortex-js/src/infrastructure/commanders/test/log.service.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { Injectable } from '@nestjs/common'; - -@Injectable() -export class LogService { - log(...args: any[]): void { - console.log(...args); - } -} diff --git a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts deleted file mode 100644 index ca3932ecd..000000000 --- a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts +++ /dev/null @@ -1,131 +0,0 @@ -import { TestingModule } from '@nestjs/testing'; -import { CommandTestFactory } from 'nest-commander-testing'; -import { CommandModule } from '@/command.module'; -import { join } from 'path'; -import { rmSync } from 'fs'; -import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -let commandInstance: TestingModule; - -beforeAll( - () => - new Promise(async (res) => { - commandInstance = await CommandTestFactory.createTestingCommand({ - imports: [CommandModule], - }) - // .overrideProvider(LogService) - // .useValue({}) - .compile(); - const fileService = - await commandInstance.resolve(FileManagerService); - - // Attempt to create test folder - await fileService.writeConfigFile({ - dataFolderPath: join(__dirname, 'test_data'), - cortexCppHost: 'localhost', - cortexCppPort: 3929, - }); - const cortexUseCases = - await commandInstance.resolve(CortexUsecases); - jest - .spyOn(cortexUseCases, 'isAPIServerOnline') - .mockImplementation(() => Promise.resolve(true)); - res(); - }), -); - -afterAll( - () => - new Promise(async (res) => { - // Attempt to clean test folder - rmSync(join(__dirname, 'test_data'), { - recursive: true, - force: true, - }); - res(); - }), -); - -export const modelName = 'tinyllama'; -describe('Action with models', () => { - // test('Init with CPU', async () => { - // const logMock = stubMethod(console, 'log'); - // - // logMock.passThrough(); - // CommandTestFactory.setAnswers(['CPU', '', 'AVX2']); - // - // await CommandTestFactory.run(commandInstance, ['setup']); - // expect(logMock.firstCall?.args[0]).toContain('engine file'); - // }, 50000); - // - - test('Empty model list', async () => { - // const logMock = stubMethod(console, 'table'); - // await CommandTestFactory.run(commandInstance, ['models', 'list']); - // expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); - // expect(logMock.firstCall?.args[0].length).toBe(0); - }, 20000); - - // - // test( - // 'Pull model and check with cortex ps', - // async () => { - // const logMock = stubMethod(console, 'log'); - // - // await CommandTestFactory.run(commandInstance, ['pull', modelName]); - // expect(logMock.lastCall?.args[0]).toContain('Download complete!'); - // - // const tableMock = stubMethod(console, 'table'); - // await CommandTestFactory.run(commandInstance, ['ps']); - // expect(tableMock.firstCall?.args[0].length).toBeGreaterThan(0); - // }, - // timeout, - // ); - // - // test( - // 'Run model and check with cortex ps', - // async () => { - // const logMock = stubMethod(console, 'log'); - // - // await CommandTestFactory.run(commandInstance, ['run', modelName]); - // expect([ - // "Inorder to exit, type 'exit()'.", - // `Model ${modelName} not found. Try pulling model...`, - // ]).toContain(logMock.lastCall?.args[0]); - // - // const tableMock = stubMethod(console, 'table'); - // await CommandTestFactory.run(commandInstance, ['ps']); - // expect(tableMock.firstCall?.args[0].length).toBeGreaterThan(0); - // }, - // timeout, - // ); - // - // test('Get model', async () => { - // const logMock = stubMethod(console, 'log'); - // - // await CommandTestFactory.run(commandInstance, ['models', 'get', modelName]); - // expect(logMock.firstCall?.args[0]).toBeInstanceOf(Object); - // expect(logMock.firstCall?.args[0].files.length).toBe(1); - // }); - // - // test('Many models in the list', async () => { - // const logMock = stubMethod(console, 'table'); - // await CommandTestFactory.run(commandInstance, ['models', 'list']); - // expect(logMock.firstCall?.args[0]).toBeInstanceOf(Array); - // expect(logMock.firstCall?.args[0].length).toBe(1); - // expect(logMock.firstCall?.args[0][0].id).toBe(modelName); - // }); - // - // test( - // 'Model already exists', - // async () => { - // const stdoutSpy = stubMethod(process.stdout, 'write'); - // const exitSpy = stubMethod(process, 'exit'); - // await CommandTestFactory.run(commandInstance, ['pull', modelName]); - // expect(stdoutSpy.firstCall?.args[0]).toContain('Model already exists'); - // expect(exitSpy.firstCall?.args[0]).toBe(1); - // }, - // timeout, - // ); -}); diff --git a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts b/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts deleted file mode 100644 index 7a9bf980c..000000000 --- a/cortex-js/src/infrastructure/commanders/types/benchmark-config.interface.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { Cortex } from '@cortexso/cortex.js'; - -export interface ApiConfig { - base_url: string; - api_key: string; - parameters: ParametersConfig; -} - -export interface ParametersConfig { - messages: Cortex.ChatCompletionMessageParam[]; - model: string; - stream?: boolean; - max_tokens?: number; - stop?: string[]; - frequency_penalty?: number; - presence_penalty?: number; - temperature?: number; - top_p?: number; -} - -export interface BenchmarkConfig { - api: ApiConfig; - prompts?: { - min: number; - max: number; - samples: number; - }; - output: string; - concurrency: number; - num_rounds: number; - hardware: string[]; -} diff --git a/cortex-js/src/infrastructure/commanders/types/engine.interface.ts b/cortex-js/src/infrastructure/commanders/types/engine.interface.ts deleted file mode 100644 index c3562ecdb..000000000 --- a/cortex-js/src/infrastructure/commanders/types/engine.interface.ts +++ /dev/null @@ -1,34 +0,0 @@ -export enum Engines { - llamaCPP = 'cortex.llamacpp', - onnx = 'cortex.onnx', - tensorrtLLM = 'cortex.tensorrt-llm', - - // Remote engines - groq = 'groq', - mistral = 'mistral', - openai = 'openai', - anthropic = 'anthropic', - openrouter = 'openrouter', - cohere = 'cohere', - martian = 'martian', - nvidia = 'nvidia', -} - -export const EngineNamesMap: { - [key in string]: string; -} = { - [Engines.llamaCPP]: 'llamacpp', - [Engines.onnx]: 'onnx', - [Engines.tensorrtLLM]: 'tensorrt-llm', -}; - -export const RemoteEngines: Engines[] = [ - Engines.groq, - Engines.mistral, - Engines.openai, - Engines.anthropic, - Engines.openrouter, - Engines.cohere, - Engines.martian, - Engines.nvidia, -]; diff --git a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts deleted file mode 100644 index c8586a78c..000000000 --- a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts +++ /dev/null @@ -1,9 +0,0 @@ -export interface InitOptions { - runMode?: 'CPU' | 'GPU'; - gpuType?: 'Nvidia' | 'Others (Vulkan)'; - instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; - cudaVersion?: '11' | '12'; - silent?: boolean; - vulkan?: boolean; - version?: string; -} diff --git a/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts deleted file mode 100644 index 336917b4f..000000000 --- a/cortex-js/src/infrastructure/commanders/types/model-stat.interface.ts +++ /dev/null @@ -1,8 +0,0 @@ -export interface ModelStat { - modelId: string; - engine?: string; - duration?: string; - status: string; - vram?: string; - ram?: string; -} diff --git a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts deleted file mode 100644 index 60a5b3290..000000000 --- a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts +++ /dev/null @@ -1,8 +0,0 @@ -export interface ModelMetadata { - contextLength: number; - ngl: number; - stopWord?: string; - promptTemplate: string; - version: number; - name?: string; -} diff --git a/cortex-js/src/infrastructure/commanders/types/telemetry-options.interface.ts b/cortex-js/src/infrastructure/commanders/types/telemetry-options.interface.ts deleted file mode 100644 index de0db5a1e..000000000 --- a/cortex-js/src/infrastructure/commanders/types/telemetry-options.interface.ts +++ /dev/null @@ -1,3 +0,0 @@ -export interface TelemetryOptions { - type: 'crash'; -} diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts deleted file mode 100644 index 477c4eba3..000000000 --- a/cortex-js/src/infrastructure/constants/benchmark.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { BenchmarkConfig } from '@commanders/types/benchmark-config.interface'; - -export const defaultBenchmarkConfiguration: BenchmarkConfig = { - api: { - base_url: 'http://localhost:1337/v1', - api_key: '', - parameters: { - messages: [ - { - content: 'You are a helpful assistant.', - role: 'system', - }, - { - content: 'Hello!', - role: 'user', - }, - ], - model: 'tinyllama', - stream: true, - max_tokens: 2048, - frequency_penalty: 0, - presence_penalty: 0, - temperature: 0.7, - top_p: 0.95, - }, - }, - prompts: { - min: 1024, - max: 2048, - samples: 10, - }, - output: 'table', - hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'], - concurrency: 1, - num_rounds: 10, -}; diff --git a/cortex-js/src/infrastructure/constants/cortex.ts b/cortex-js/src/infrastructure/constants/cortex.ts deleted file mode 100644 index 9e07868f8..000000000 --- a/cortex-js/src/infrastructure/constants/cortex.ts +++ /dev/null @@ -1,64 +0,0 @@ -export const cortexNamespace = 'cortex'; - -export const databaseName = 'cortex'; - -export const databaseFile = `${databaseName}.db`; - -export const defaultCortexJsHost = '127.0.0.1'; -export const defaultCortexJsPort = 1337; - -export const defaultCortexCppHost = '127.0.0.1'; -export const defaultCortexCppPort = 3929; - -export const defaultEmbeddingModel = 'nomic-embed-text-v1'; - -export const cortexServerAPI = (host: string, port: number) => - `http://${host}:${port}/v1/`; - -// CORTEX CPP -export const CORTEX_CPP_EMBEDDINGS_URL = ( - host: string = defaultCortexCppHost, - port: number = defaultCortexCppPort, -) => `http://${host}:${port}/inferences/server/embedding`; - -export const CORTEX_CPP_PROCESS_DESTROY_URL = ( - host: string = defaultCortexCppHost, - port: number = defaultCortexCppPort, -) => `http://${host}:${port}/processmanager/destroy`; - -export const CORTEX_CPP_UNLOAD_ENGINE_URL = ( - host: string = defaultCortexCppHost, - port: number = defaultCortexCppPort, -) => `http://${host}:${port}/inferences/server/unloadengine`; - -export const CORTEX_CPP_HEALTH_Z_URL = ( - host: string = defaultCortexCppHost, - port: number = defaultCortexCppPort, -) => `http://${host}:${port}/healthz`; - -export const CORTEX_CPP_MODELS_URL = ( - host: string = defaultCortexCppHost, - port: number = defaultCortexCppPort, -) => `http://${host}:${port}/inferences/server/models`; - -export const CORTEX_JS_SYSTEM_URL = ( - host: string = defaultCortexJsHost, - port: number = defaultCortexJsPort, -) => `http://${host}:${port}/v1/system`; - -export const CORTEX_JS_HEALTH_URL_WITH_API_PATH = (apiUrl: string) => - `${apiUrl}/v1/system`; - -// INITIALIZATION -export const CORTEX_RELEASES_URL = - 'https://api.github.com/repos/janhq/cortex/releases'; - -export const CORTEX_ENGINE_RELEASES_URL = (engine: string) => - `https://api.github.com/repos/janhq/${engine}/releases`; - -export const CUDA_DOWNLOAD_URL = - 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz'; - -export const telemetryServerUrl = 'https://telemetry.jan.ai'; - -export const MIN_CUDA_VERSION = '12.4'; diff --git a/cortex-js/src/infrastructure/constants/huggingface.ts b/cortex-js/src/infrastructure/constants/huggingface.ts deleted file mode 100644 index 6f4a2bbe1..000000000 --- a/cortex-js/src/infrastructure/constants/huggingface.ts +++ /dev/null @@ -1,39 +0,0 @@ -export const HUGGING_FACE_TREE_REF_URL = ( - repo: string, - tree: string, - path: string, -) => `https://huggingface.co/cortexso/${repo}/resolve/${tree}/${path}`; - -export const HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL = ( - modelId: string, - fileName: string, -) => `https://huggingface.co/${modelId}/resolve/main/${fileName}`; - -export const HUGGING_FACE_REPO_URL = (author: string, repo: string) => - `https://huggingface.co/${author}/${repo}`; - -export const HUGGING_FACE_REPO_MODEL_API_URL = (repo: string) => - `https://huggingface.co/api/models/${repo}`; - -export const AllQuantizations = [ - 'Q3_K_S', - 'Q3_K_M', - 'Q3_K_L', - 'Q4_K_S', - 'Q4_K_M', - 'Q5_K_S', - 'Q5_K_M', - 'Q4_0', - 'Q4_1', - 'Q5_0', - 'Q5_1', - 'IQ2_XXS', - 'IQ2_XS', - 'Q2_K', - 'Q2_K_S', - 'Q6_K', - 'Q8_0', - 'F16', - 'F32', - 'COPY', -]; diff --git a/cortex-js/src/infrastructure/constants/prompt-constants.ts b/cortex-js/src/infrastructure/constants/prompt-constants.ts deleted file mode 100644 index 7d5dc522a..000000000 --- a/cortex-js/src/infrastructure/constants/prompt-constants.ts +++ /dev/null @@ -1,45 +0,0 @@ -//// HF Chat template -export const OPEN_CHAT_3_5_JINJA = `{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}`; - -export const ZEPHYR_JINJA = `{% for message in messages %} -{% if message['role'] == 'user' %} -{{ '<|user|> -' + message['content'] + eos_token }} -{% elif message['role'] == 'system' %} -{{ '<|system|> -' + message['content'] + eos_token }} -{% elif message['role'] == 'assistant' %} -{{ '<|assistant|> -' + message['content'] + eos_token }} -{% endif %} -{% if loop.last and add_generation_prompt %} -{{ '<|assistant|>' }} -{% endif %} -{% endfor %}`; - -export const LLAMA_3_JINJA = `{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> - -'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> - -' }}{% endif %}`; - -//// Corresponding prompt template -export const OPEN_CHAT_3_5 = `GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:`; - -export const ZEPHYR = `<|system|> -{system_message} -<|user|> -{prompt} -<|assistant|> -`; - -export const COMMAND_R = `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{response} -`; - -// getting from https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF -export const LLAMA_2 = `[INST] <> -{system_message} -<> -{prompt}[/INST]`; -export const LLAMA_3 = - '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'; diff --git a/cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts b/cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts deleted file mode 100644 index 9d642fcea..000000000 --- a/cortex-js/src/infrastructure/controllers/assistants.controller.spec.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { AssistantsController } from './assistants.controller'; -import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { ModelRepositoryModule } from '../repositories/models/model.module'; -import { DownloadManagerModule } from '../services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; - -describe('AssistantsController', () => { - let controller: AssistantsController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ModelRepositoryModule, - DownloadManagerModule, - ], - controllers: [AssistantsController], - providers: [AssistantsUsecases], - exports: [AssistantsUsecases], - }).compile(); - - controller = module.get(AssistantsController); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/assistants.controller.ts b/cortex-js/src/infrastructure/controllers/assistants.controller.ts deleted file mode 100644 index 1856a1d77..000000000 --- a/cortex-js/src/infrastructure/controllers/assistants.controller.ts +++ /dev/null @@ -1,128 +0,0 @@ -import { - Body, - Controller, - DefaultValuePipe, - Delete, - Get, - Param, - Post, - Query, - UseInterceptors, -} from '@nestjs/common'; -import { AssistantsUsecases } from '@/usecases/assistants/assistants.usecases'; -import { CreateAssistantDto } from '@/infrastructure/dtos/assistants/create-assistant.dto'; -import { DeleteAssistantResponseDto } from '@/infrastructure/dtos/assistants/delete-assistant.dto'; -import { - ApiCreatedResponse, - ApiOkResponse, - ApiOperation, - ApiParam, - ApiTags, - ApiResponse, - ApiQuery, -} from '@nestjs/swagger'; -import { AssistantEntity } from '../entities/assistant.entity'; -import { TransformInterceptor } from '../interceptors/transform.interceptor'; - -@ApiTags('Assistants') -@Controller('assistants') -@UseInterceptors(TransformInterceptor) -export class AssistantsController { - constructor(private readonly assistantsService: AssistantsUsecases) {} - - @ApiOperation({ - summary: 'Create assistant', - description: 'Creates a new assistant.', - }) - @ApiCreatedResponse({ - description: 'The assistant has been successfully created.', - }) - @Post() - create(@Body() createAssistantDto: CreateAssistantDto) { - return this.assistantsService.create(createAssistantDto); - } - - @ApiOperation({ - summary: 'List assistants', - description: 'Returns a list of assistants.', - }) - @ApiOkResponse({ - description: 'Ok', - type: [AssistantEntity], - }) - @ApiQuery({ - name: 'limit', - type: Number, - required: false, - description: - 'A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.', - }) - @ApiQuery({ - name: 'order', - type: String, - required: false, - description: - 'Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.', - }) - @ApiQuery({ - name: 'after', - type: String, - required: false, - description: - 'A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.', - }) - @ApiQuery({ - name: 'before', - type: String, - required: false, - description: - 'A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.', - }) - @Get() - findAll( - @Query('limit', new DefaultValuePipe(20)) limit: number, - @Query('order', new DefaultValuePipe('desc')) order: 'asc' | 'desc', - @Query('after') after?: string, - @Query('before') before?: string, - ) { - return this.assistantsService.listAssistants(limit, order, after, before); - } - - @ApiOperation({ - summary: 'Get assistant', - description: - "Retrieves a specific assistant defined by an assistant's `id`.", - }) - @ApiOkResponse({ - description: 'Ok', - type: AssistantEntity, - }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the assistant.', - }) - @Get(':id') - findOne(@Param('id') id: string) { - return this.assistantsService.findOne(id); - } - - @ApiOperation({ - summary: 'Delete assistant', - description: "Deletes a specific assistant defined by an assistant's `id`.", - }) - @ApiResponse({ - status: 200, - description: 'The assistant has been successfully deleted.', - type: DeleteAssistantResponseDto, - }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the assistant.', - }) - @Delete(':id') - remove(@Param('id') id: string) { - return this.assistantsService.remove(id); - } -} diff --git a/cortex-js/src/infrastructure/controllers/chat.controller.spec.ts b/cortex-js/src/infrastructure/controllers/chat.controller.spec.ts deleted file mode 100644 index a372f760f..000000000 --- a/cortex-js/src/infrastructure/controllers/chat.controller.spec.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { ChatController } from './chat.controller'; -import { ChatUsecases } from '@/usecases/chat/chat.usecases'; -import { DatabaseModule } from '../database/database.module'; -import { ExtensionModule } from '../repositories/extensions/extension.module'; -import { ModelRepositoryModule } from '../repositories/models/model.module'; -import { HttpModule } from '@nestjs/axios'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; -import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; -import { FileManagerModule } from '../services/file-manager/file-manager.module'; -import { ModelsModule } from '@/usecases/models/models.module'; - -describe('ChatController', () => { - let controller: ChatController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ExtensionModule, - ModelRepositoryModule, - HttpModule, - DownloadManagerModule, - EventEmitterModule.forRoot(), - TelemetryModule, - FileManagerModule, - ModelsModule, - ], - controllers: [ChatController], - providers: [ChatUsecases], - }).compile(); - - controller = module.get(ChatController); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/chat.controller.ts b/cortex-js/src/infrastructure/controllers/chat.controller.ts deleted file mode 100644 index ad04ab015..000000000 --- a/cortex-js/src/infrastructure/controllers/chat.controller.ts +++ /dev/null @@ -1,103 +0,0 @@ -import { Body, Controller, Post, Headers, Res, HttpCode } from '@nestjs/common'; -import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; -import { ChatUsecases } from '@/usecases/chat/chat.usecases'; -import { Response } from 'express'; -import { ApiOperation, ApiTags, ApiResponse } from '@nestjs/swagger'; -import { ChatCompletionResponseDto } from '../dtos/chat/chat-completion-response.dto'; -import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; -import { EventName } from '@/domain/telemetry/telemetry.interface'; -import { extractCommonHeaders } from '@/utils/request'; - -@ApiTags('Inference') -@Controller('chat') -export class ChatController { - constructor( - private readonly chatService: ChatUsecases, - private readonly telemetryUsecases: TelemetryUsecases, - ) {} - - @ApiOperation({ - summary: 'Create chat completion', - description: - 'Creates a model response for the given conversation. The following parameters are not working for the `TensorRT-LLM` engine:\n- `frequency_penalty`\n- `presence_penalty`\n- `top_p`', - }) - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: ChatCompletionResponseDto, - }) - @Post('completions') - async create( - @Headers() headers: Record, - @Body() createChatDto: CreateChatCompletionDto, - @Res() res: Response, - ) { - const { stream } = createChatDto; - this.chatService - .inference(createChatDto, extractCommonHeaders(headers)) - .then((response) => { - if (stream) { - res.header('Content-Type', 'text/event-stream'); - response.pipe(res); - } else { - res.header('Content-Type', 'application/json'); - res.json(response); - } - }) - .catch((error) => { - const statusCode = error.response?.status ?? 400; - let errorMessage; - if (!stream) { - const data = error.response?.data; - if (!data) { - return res.status(500).send(error.message || 'An error occurred'); - } - return res - .status(statusCode) - .send( - data.error?.message || - data.message || - error.message || - 'An error occurred', - ); - } - const streamResponse = error.response?.data; - if (!streamResponse) { - return res.status(500).send(error.message || 'An error occurred'); - } - let data = ''; - streamResponse.on('data', (chunk: any) => { - data += chunk; - }); - - streamResponse.on('end', () => { - try { - const jsonResponse = JSON.parse(data); - errorMessage = - jsonResponse.error?.message || - jsonResponse.message || - error.message || - 'An error occurred'; - } catch (err) { - errorMessage = 'An error occurred while processing the response'; - } - return res - .status(error.statusCode ?? 400) - .send(errorMessage || error.message || 'An error occurred'); - }); - - streamResponse.on('error', (streamError: any) => { - errorMessage = streamError.message ?? 'An error occurred'; - return res - .status(error.statusCode ?? 400) - .send(errorMessage || error.message || 'An error occurred'); - }); - }); - - this.telemetryUsecases.addEventToQueue({ - name: EventName.CHAT, - modelId: createChatDto.model, - }); - } -} diff --git a/cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts b/cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts deleted file mode 100644 index f60087678..000000000 --- a/cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { EmbeddingsController } from './embeddings.controller'; -import { ChatUsecases } from '@/usecases/chat/chat.usecases'; -import { DatabaseModule } from '../database/database.module'; -import { ModelRepositoryModule } from '../repositories/models/model.module'; -import { ExtensionModule } from '../repositories/extensions/extension.module'; -import { HttpModule } from '@nestjs/axios'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; -import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; -import { FileManagerModule } from '../services/file-manager/file-manager.module'; -import { ModelsModule } from '@/usecases/models/models.module'; -import { CortexModule } from '@/usecases/cortex/cortex.module'; - -describe('EmbeddingsController', () => { - let controller: EmbeddingsController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ModelRepositoryModule, - ExtensionModule, - HttpModule, - DownloadManagerModule, - EventEmitterModule.forRoot(), - TelemetryModule, - FileManagerModule, - ModelsModule, - CortexModule, - ], - controllers: [EmbeddingsController], - providers: [ChatUsecases], - exports: [ChatUsecases], - }).compile(); - - controller = module.get(EmbeddingsController); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/embeddings.controller.ts b/cortex-js/src/infrastructure/controllers/embeddings.controller.ts deleted file mode 100644 index 005c7327f..000000000 --- a/cortex-js/src/infrastructure/controllers/embeddings.controller.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { Body, Controller, Post, HttpCode, Res } from '@nestjs/common'; -import { ChatUsecases } from '@/usecases/chat/chat.usecases'; -import { ApiOperation, ApiTags, ApiResponse } from '@nestjs/swagger'; -import { CreateEmbeddingsDto } from '../dtos/embeddings/embeddings-request.dto'; -import { EmbeddingsResponseDto } from '../dtos/chat/embeddings-response.dto'; - -@ApiTags('Embeddings') -@Controller('embeddings') -export class EmbeddingsController { - constructor(private readonly chatService: ChatUsecases) {} - - @ApiOperation({ - summary: 'Create embedding vector', - description: 'Creates an embedding vector representing the input text.', - }) - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: EmbeddingsResponseDto, - }) - @Post() - async create(@Body() createEmbeddingsDto: CreateEmbeddingsDto) { - return this.chatService.embeddings(createEmbeddingsDto); - } -} diff --git a/cortex-js/src/infrastructure/controllers/engines.controller.spec.ts b/cortex-js/src/infrastructure/controllers/engines.controller.spec.ts deleted file mode 100644 index 6eb91ff36..000000000 --- a/cortex-js/src/infrastructure/controllers/engines.controller.spec.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { DatabaseModule } from '../database/database.module'; -import { ExtensionModule } from '../repositories/extensions/extension.module'; -import { ModelRepositoryModule } from '../repositories/models/model.module'; -import { HttpModule } from '@nestjs/axios'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; -import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; -import { FileManagerModule } from '../services/file-manager/file-manager.module'; -import { EnginesController } from './engines.controller'; -import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; -import { EnginesModule } from '@/usecases/engines/engines.module'; -import { ConfigsModule } from '@/usecases/configs/configs.module'; - -describe('ConfigsController', () => { - let controller: EnginesController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ExtensionModule, - ModelRepositoryModule, - HttpModule, - DownloadManagerModule, - EventEmitterModule.forRoot(), - TelemetryModule, - FileManagerModule, - EnginesModule, - ConfigsModule, - ], - controllers: [EnginesController], - providers: [EnginesUsecases], - }).compile(); - - controller = module.get(EnginesController); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/engines.controller.ts b/cortex-js/src/infrastructure/controllers/engines.controller.ts deleted file mode 100644 index 9eb7fdcb5..000000000 --- a/cortex-js/src/infrastructure/controllers/engines.controller.ts +++ /dev/null @@ -1,122 +0,0 @@ -import { - Controller, - Get, - Param, - HttpCode, - UseInterceptors, - Post, - Body, - Patch, - Res, -} from '@nestjs/common'; -import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; -import { Response } from 'express'; -import { TransformInterceptor } from '../interceptors/transform.interceptor'; -import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; -import { EngineDto, InitEngineDto } from '../dtos/engines/engines.dto'; -import { CommonResponseDto } from '../dtos/common/common-response.dto'; -import { ConfigUpdateDto } from '../dtos/configs/config-update.dto'; - -@ApiTags('Engines') -@Controller('engines') -@UseInterceptors(TransformInterceptor) -export class EnginesController { - constructor( - private readonly enginesUsecases: EnginesUsecases, - private readonly initUsescases: EnginesUsecases, - ) {} - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: [EngineDto], - }) - @ApiOperation({ - summary: 'List available engines', - description: - 'Lists the currently available engines, including local and remote engines.', - }) - @Get() - findAll() { - return this.enginesUsecases.getEngines(); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: EngineDto, - }) - @ApiOperation({ - summary: 'Get an engine', - description: - 'Retrieves an engine instance, providing basic information about the engine.', - }) - @ApiParam({ - name: 'name', - required: true, - description: 'The unique identifier of the engine.', - }) - @Get(':name(*)') - findOne(@Param('name') name: string) { - return this.enginesUsecases.getEngine(name); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: CommonResponseDto, - }) - @ApiOperation({ - summary: 'Initialize an engine', - description: - 'Initializes an engine instance with the given name. It will download the engine if it is not available locally.', - }) - @ApiParam({ - name: 'name', - required: true, - description: 'The unique identifier of the engine.', - }) - @Post(':name(*)/init') - initialize( - @Param('name') name: string, - @Body() body: InitEngineDto | undefined, - @Res() res: Response, - ) { - try { - this.initUsescases.installEngine(body, name, true); - res.json({ - message: 'Engine initialization started successfully.', - }); - } catch (error) { - res.status(400).send(error.message); - } - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: CommonResponseDto, - }) - @ApiOperation({ - summary: 'Update an engine', - description: 'Updates the engine with configurations.', - }) - @ApiParam({ - name: 'name', - required: true, - description: 'The unique identifier of the engine.', - }) - @Patch(':name(*)') - update(@Param('name') name: string, @Body() configs?: any | undefined) { - console.log('configs', configs); - return this.enginesUsecases.updateConfigs( - configs.config, - configs.value, - name, - ); - } -} diff --git a/cortex-js/src/infrastructure/controllers/models.controller.spec.ts b/cortex-js/src/infrastructure/controllers/models.controller.spec.ts deleted file mode 100644 index 5f9cc2741..000000000 --- a/cortex-js/src/infrastructure/controllers/models.controller.spec.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { ModelsController } from './models.controller'; -import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { DatabaseModule } from '../database/database.module'; -import { ExtensionModule } from '../repositories/extensions/extension.module'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { HttpModule } from '@nestjs/axios'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { ModelRepositoryModule } from '../repositories/models/model.module'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; -import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; -import { ContextModule } from '../services/context/context.module'; - -describe('ModelsController', () => { - let controller: ModelsController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ExtensionModule, - FileManagerModule, - HttpModule, - DownloadManagerModule, - ModelRepositoryModule, - DownloadManagerModule, - EventEmitterModule.forRoot(), - TelemetryModule, - ContextModule, - ], - controllers: [ModelsController], - providers: [ModelsUsecases, CortexUsecases], - exports: [ModelsUsecases], - }).compile(); - - controller = module.get(ModelsController); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts deleted file mode 100644 index a2f2ac80c..000000000 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ /dev/null @@ -1,303 +0,0 @@ -import { - Controller, - Get, - Post, - Body, - Param, - Delete, - HttpCode, - UseInterceptors, - BadRequestException, - Patch, - Res, - HttpStatus, -} from '@nestjs/common'; -import { Response } from 'express'; -import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; -import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; -import { ModelDto } from '@/infrastructure/dtos/models/model.dto'; -import { ListModelsResponseDto } from '@/infrastructure/dtos/models/list-model-response.dto'; -import { DeleteModelResponseDto } from '@/infrastructure/dtos/models/delete-model.dto'; -import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; -import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; -import { TransformInterceptor } from '../interceptors/transform.interceptor'; -import { ModelSettingsDto } from '../dtos/models/model-settings.dto'; -import { EventName } from '@/domain/telemetry/telemetry.interface'; -import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; -import { CommonResponseDto } from '../dtos/common/common-response.dto'; -import { HuggingFaceRepoSibling } from '@/domain/models/huggingface.interface'; - -@ApiTags('Models') -@Controller('models') -@UseInterceptors(TransformInterceptor) -export class ModelsController { - constructor( - private readonly modelsUsecases: ModelsUsecases, - private readonly telemetryUsecases: TelemetryUsecases, - ) {} - - @HttpCode(201) - @ApiResponse({ - status: 201, - description: 'The model has been successfully created.', - type: StartModelSuccessDto, - }) - @ApiOperation({ - summary: 'Create model', - description: 'Creates a model `.json` instance file manually.', - }) - @Post() - create(@Body() createModelDto: CreateModelDto) { - return this.modelsUsecases.create(createModelDto); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'The model has been successfully started.', - type: StartModelSuccessDto, - }) - @ApiOperation({ - summary: 'Start model', - description: 'Starts a model operation defined by a model `id`.', - }) - @ApiParam({ - name: 'modelId', - required: true, - description: 'The unique identifier of the model.', - }) - @Post(':modelId(*)/start') - startModel( - @Param('modelId') modelId: string, - @Body() params: ModelSettingsDto, - ) { - return this.modelsUsecases.startModel(modelId, params); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'The model has been successfully started.', - type: StartModelSuccessDto, - }) - @ApiOperation({ - summary: 'Start model by file path', - description: - 'Starts a model operation defined by a model `id` with a file path.', - }) - @ApiParam({ - name: 'modelId', - required: true, - description: 'The unique identifier of the model.', - }) - @Post(':modelId(*)/start-by-file') - startModelByFilePath( - @Param('modelId') modelId: string, - @Body() - params: ModelSettingsDto & { filePath: string; metadataPath: string }, - ) { - const { filePath, metadataPath, ...settings } = params; - return this.modelsUsecases.startModel( - modelId, - settings, - filePath, - metadataPath, - ); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'The model has been successfully stopped.', - type: StartModelSuccessDto, - }) - @ApiOperation({ - summary: 'Stop model', - description: 'Stops a model operation defined by a model `id`.', - }) - @ApiParam({ - name: 'modelId', - required: true, - description: 'The unique identifier of the model.', - }) - @Post(':modelId(*)/stop') - stopModel(@Param('modelId') modelId: string) { - return this.modelsUsecases.stopModel(modelId); - } - - @ApiOperation({ - summary: 'Abort model pull', - description: 'Abort the model pull operation.', - parameters: [ - { - in: 'path', - name: 'pull_id', - required: true, - description: 'The unique identifier of the pull.', - }, - ], - }) - @Delete(':pull_id(*)/pull') - abortPullModel(@Param('pull_id') pullId: string) { - return this.modelsUsecases.abortDownloadModel(pullId); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: CommonResponseDto, - }) - @ApiOperation({ - summary: 'Pull a model', - description: - 'Pulls a model from cortex hub or huggingface and downloads it.', - }) - @ApiParam({ - name: 'modelId', - required: true, - description: 'The unique identifier of the model.', - }) - @ApiParam({ - name: 'fileName', - required: false, - description: 'The file name of the model to download.', - }) - @ApiParam({ - name: 'persistedModelId', - required: false, - description: 'The unique identifier of the model in your local storage.', - }) - @Post(':modelId(*)/pull') - pullModel( - @Res() res: Response, - @Param('modelId') modelId: string, - @Body() - body?: { - fileName?: string; - persistedModelId?: string; - }, - ) { - const { fileName, persistedModelId } = body || {}; - return this.modelsUsecases - .pullModel( - modelId, - false, - (files) => { - return new Promise( - async (resolve, reject) => { - const file = files.find( - (e) => - e.quantization && (!fileName || e.rfilename === fileName), - ); - if (!file) { - return reject(new BadRequestException('File not found')); - } - return resolve(file); - }, - ); - }, - persistedModelId, - ) - .then(() => - this.telemetryUsecases.addEventToQueue({ - name: EventName.DOWNLOAD_MODEL, - modelId, - }), - ) - .then(() => - res.status(HttpStatus.OK).json({ - message: 'Download model started successfully.', - }), - ) - .catch((e) => - res - .status(HttpStatus.CONFLICT) - .json({ error: { message: e.message ?? e }, code: 409 }), - ); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: ListModelsResponseDto, - }) - @ApiOperation({ - summary: 'List models', - description: - "Lists the currently available models, and provides basic information about each one such as the owner and availability. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/list).", - }) - @Get() - findAll() { - return this.modelsUsecases.findAll(); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - type: ModelDto, - }) - @ApiOperation({ - summary: 'Get model', - description: - "Retrieves a model instance, providing basic information about the model such as the owner and permissions. [Equivalent to OpenAI's list model](https://platform.openai.com/docs/api-reference/models/retrieve).", - }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the model.', - }) - @Get(':id(*)') - findOne(@Param('id') id: string) { - return this.modelsUsecases.findOne(id); - } - - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'The model has been successfully updated.', - type: UpdateModelDto, - }) - @ApiOperation({ - summary: 'Update model', - description: "Updates a model instance defined by a model's `id`.", - parameters: [ - { - in: 'path', - name: 'model', - required: true, - description: 'The unique identifier of the model.', - }, - ], - }) - @Patch(':model(*)') - async update( - @Param('model') model: string, - @Body() updateModelDto: UpdateModelDto, - ) { - return this.modelsUsecases.update(model, updateModelDto); - } - - @ApiResponse({ - status: 200, - description: 'The model has been successfully deleted.', - type: DeleteModelResponseDto, - }) - @ApiOperation({ - summary: 'Delete model', - description: - "Deletes a model. [Equivalent to OpenAI's delete model](https://platform.openai.com/docs/api-reference/models/delete).", - }) - @ApiParam({ - name: 'id', - required: true, - description: 'The unique identifier of the model.', - }) - @Delete(':id(*)') - remove(@Param('id') id: string) { - return this.modelsUsecases.remove(id); - } -} diff --git a/cortex-js/src/infrastructure/controllers/system.controller.ts b/cortex-js/src/infrastructure/controllers/system.controller.ts deleted file mode 100644 index b6a52ff20..000000000 --- a/cortex-js/src/infrastructure/controllers/system.controller.ts +++ /dev/null @@ -1,138 +0,0 @@ -import { - DownloadState, - DownloadStateEvent, -} from '@/domain/models/download.interface'; -import { - EmptyModelEvent, - ModelEvent, - ModelId, - ModelStatus, - ModelStatusAndEvent, -} from '@/domain/models/model.event'; -import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; -import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { Controller, Delete, Get, HttpCode, Sse } from '@nestjs/common'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { ApiOperation, ApiResponse, ApiTags } from '@nestjs/swagger'; -import { - Observable, - catchError, - combineLatest, - distinctUntilChanged, - from, - fromEvent, - interval, - map, - merge, - of, - startWith, - switchMap, -} from 'rxjs'; -import { ResourcesManagerService } from '../services/resources-manager/resources-manager.service'; -import { ResourceEvent } from '@/domain/models/resource.interface'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; - -@ApiTags('System') -@Controller('system') -export class SystemController { - constructor( - private readonly downloadManagerService: DownloadManagerService, - private readonly modelsUsecases: ModelsUsecases, - private readonly cortexUsecases: CortexUsecases, - private readonly eventEmitter: EventEmitter2, - private readonly resourcesManagerService: ResourcesManagerService, - ) {} - - @ApiOperation({ - summary: 'Stop api server', - description: 'Stops the Cortex API endpoint server for the detached mode.', - }) - @Delete() - async delete() { - await this.cortexUsecases.stopCortex().catch(() => {}); - process.exit(0); - } - - @ApiOperation({ - summary: 'Get health status', - description: "Retrieves the health status of your Cortex's system.", - }) - @HttpCode(200) - @ApiResponse({ - status: 200, - description: 'Ok', - }) - @Get() - async get() { - return 'OK'; - } - - @ApiOperation({ - summary: 'Get download status', - description: "Retrieves the model's download status.", - }) - @Sse('events/download') - downloadEvent(): Observable { - const latestDownloadState$: Observable = of({ - data: this.downloadManagerService.getDownloadStates(), - }); - const downloadEvent$ = fromEvent( - this.eventEmitter, - 'download.event', - ).pipe( - map((downloadState) => ({ data: downloadState })), - distinctUntilChanged(), - ); - - return merge(latestDownloadState$, downloadEvent$).pipe(); - } - - @ApiOperation({ - summary: 'Get model status', - description: 'Retrieves all the available model statuses within Cortex.', - }) - @Sse('events/model') - modelEvent(): Observable { - const latestModelStatus$: Observable> = of( - this.modelsUsecases.getModelStatuses(), - ); - - const modelEvent$ = fromEvent( - this.eventEmitter, - 'model.event', - ).pipe(startWith(EmptyModelEvent)); - - return combineLatest([latestModelStatus$, modelEvent$]).pipe( - map(([status, event]) => ({ data: { status, event } })), - ); - } - - @ApiOperation({ - summary: 'Get resources status', - description: 'Retrieves the resources status of the system.', - }) - @Sse('events/resources') - resourcesEvent(): Observable { - const initialData$ = from( - this.resourcesManagerService.getResourceStatuses(), - ).pipe( - map((data) => ({ data: data })), - catchError((error) => { - console.error('Error fetching initial resource statuses', error); - return of(); // Ensure the stream is kept alive even if initial fetch fails - }), - ); - - const getResourceStatuses$ = interval(2000).pipe( - switchMap(() => this.resourcesManagerService.getResourceStatuses()), - map((data) => ({ data: data })), - catchError((error) => { - console.error('Error fetching resource statuses', error); - return of(); // Keep the stream alive on error - }), - ); - - // Merge the initial data with the interval updates - return merge(initialData$, getResourceStatuses$); - } -} diff --git a/cortex-js/src/infrastructure/controllers/threads.controller.spec.ts b/cortex-js/src/infrastructure/controllers/threads.controller.spec.ts deleted file mode 100644 index 1cf065219..000000000 --- a/cortex-js/src/infrastructure/controllers/threads.controller.spec.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { ThreadsController } from './threads.controller'; -import { ThreadsUsecases } from '@/usecases/threads/threads.usecases'; -import { DatabaseModule } from '../database/database.module'; - -describe('ThreadsController', () => { - let controller: ThreadsController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [DatabaseModule], - controllers: [ThreadsController], - providers: [ThreadsUsecases], - }).compile(); - - controller = module.get(ThreadsController); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/threads.controller.ts b/cortex-js/src/infrastructure/controllers/threads.controller.ts deleted file mode 100644 index dfa47a37d..000000000 --- a/cortex-js/src/infrastructure/controllers/threads.controller.ts +++ /dev/null @@ -1,342 +0,0 @@ -import { - Controller, - Get, - Post, - Body, - Param, - Delete, - UseInterceptors, - Query, - DefaultValuePipe, -} from '@nestjs/common'; -import { ThreadsUsecases } from '@/usecases/threads/threads.usecases'; -import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto'; -import { UpdateThreadDto } from '@/infrastructure/dtos/threads/update-thread.dto'; -import { DeleteThreadResponseDto } from '@/infrastructure/dtos/threads/delete-thread.dto'; -import { GetThreadResponseDto } from '@/infrastructure/dtos/threads/get-thread.dto'; -import { ApiOkResponse, ApiOperation, ApiResponse } from '@nestjs/swagger'; -import { TransformInterceptor } from '../interceptors/transform.interceptor'; -import { ListMessagesResponseDto } from '../dtos/messages/list-message.dto'; -import { CreateMessageDto } from '../dtos/threads/create-message.dto'; -import { UpdateMessageDto } from '../dtos/threads/update-message.dto'; -import DeleteMessageDto from '../dtos/threads/delete-message.dto'; -import { GetMessageResponseDto } from '../dtos/messages/get-message.dto'; - -@Controller('threads') -@UseInterceptors(TransformInterceptor) -export class ThreadsController { - constructor(private readonly threadsUsecases: ThreadsUsecases) {} - - @ApiOperation({ - tags: ['Threads'], - summary: 'Create thread', - description: 'Creates a new thread.', - }) - @Post() - create(@Body() createThreadDto: CreateThreadDto) { - return this.threadsUsecases.create(createThreadDto); - } - - @ApiOperation({ - tags: ['Threads'], - summary: 'List threads', - description: - 'Lists all the available threads along with its configurations.', - }) - @Get() - findAll( - @Query('limit', new DefaultValuePipe(20)) limit: number, - @Query('order', new DefaultValuePipe('desc')) order: 'asc' | 'desc', - @Query('after') after?: string, - @Query('before') before?: string, - ) { - return this.threadsUsecases.findAll(limit, order, after, before); - } - - @ApiOperation({ - tags: ['Messages'], - summary: 'Retrieve message', - description: 'Retrieves a message.', - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The ID of the thread to which this message belongs.', - }, - { - in: 'path', - name: 'message_id', - required: true, - description: 'The ID of the message to retrieve.', - }, - ], - }) - @ApiOkResponse({ - description: 'The message object matching the specified ID.', - type: GetMessageResponseDto, - }) - @Get(':thread_id/messages/:message_id') - async retrieveMessage( - @Param('thread_id') threadId: string, - @Param('message_id') messageId: string, - ) { - return this.threadsUsecases.retrieveMessage(threadId, messageId); - } - - @ApiOperation({ - tags: ['Messages'], - summary: 'List messages', - description: 'Returns a list of messages for a given thread.', - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The ID of the thread the messages belong to.', - }, - { - in: 'query', - name: 'limit', - required: false, - description: - 'A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.', - }, - { - in: 'query', - name: 'order', - required: false, - description: - 'Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.', - }, - { - in: 'query', - name: 'after', - required: false, - description: - 'A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.', - }, - { - in: 'query', - name: 'before', - required: false, - description: - 'A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.', - }, - { - in: 'query', - name: 'run_id', - required: false, - description: 'Filter messages by the run ID that generated them.', - }, - ], - }) - @ApiOkResponse({ - description: 'A list of message objects.', - type: ListMessagesResponseDto, - }) - @Get(':thread_id/messages') - getMessagesOfThread( - @Param('thread_id') threadId: string, - @Query('limit', new DefaultValuePipe(20)) limit: number, - @Query('order', new DefaultValuePipe('desc')) order: 'asc' | 'desc', - @Query('after') after?: string, - @Query('before') before?: string, - @Query('run_id') runId?: string, - ) { - return this.threadsUsecases.getMessagesOfThread( - threadId, - limit, - order, - after, - before, - runId, - ); - } - - @ApiOperation({ - tags: ['Messages'], - summary: 'Create message', - description: 'Create a message.', - responses: { - '201': { - description: 'A message object.', - }, - }, - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The ID of the thread to create a message for.', - }, - ], - }) - @Post(':thread_id/messages') - createMessageInThread( - @Param('thread_id') threadId: string, - @Body() createMessageDto: CreateMessageDto, - ) { - return this.threadsUsecases.createMessageInThread( - threadId, - createMessageDto, - ); - } - - @ApiOperation({ - tags: ['Messages'], - summary: 'Modify message', - description: 'Modifies a message.', - responses: { - '200': { - description: 'The modified message object.', - }, - }, - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The ID of the thread to which this message belongs.', - }, - { - in: 'path', - name: 'message_id', - required: true, - description: 'The ID of the message to modify.', - }, - ], - }) - @Post(':thread_id/messages/:message_id') - updateMessage( - @Param('thread_id') threadId: string, - @Param('message_id') messageId: string, - @Body() updateMessageDto: UpdateMessageDto, - ) { - return this.threadsUsecases.updateMessage( - threadId, - messageId, - updateMessageDto, - ); - } - - @ApiOperation({ - summary: 'Clean thread', - description: 'Deletes all messages in a thread.', - tags: ['Threads'], - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The ID of the thread to clean.', - }, - ], - }) - @Post(':thread_id/clean') - async cleanThread(@Param('thread_id') threadId: string) { - return this.threadsUsecases.clean(threadId); - } - - @ApiOperation({ - summary: 'Delete message', - description: 'Deletes a message.', - tags: ['Messages'], - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The ID of the thread to which this message belongs.', - }, - { - in: 'path', - name: 'message_id', - required: true, - description: 'The ID of the message to delete.', - }, - ], - }) - @ApiOkResponse({ - description: 'Deletion status.', - type: DeleteMessageDto, - }) - @Delete(':thread_id/messages/:message_id') - deleteMessage( - @Param('thread_id') threadId: string, - @Param('message_id') messageId: string, - ) { - return this.threadsUsecases.deleteMessage(threadId, messageId); - } - - @ApiOperation({ - tags: ['Threads'], - summary: 'Retrieve thread', - description: 'Retrieves a thread.', - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The unique identifier of the thread.', - }, - ], - }) - @ApiOkResponse({ - description: 'Retrieves a thread.', - type: GetThreadResponseDto, - }) - @Get(':thread_id') - retrieveThread(@Param('thread_id') threadId: string) { - return this.threadsUsecases.findOne(threadId); - } - - @ApiResponse({ - status: 200, - description: 'The thread has been successfully updated.', - type: UpdateThreadDto, - }) - @ApiOperation({ - tags: ['Threads'], - summary: 'Modify thread', - description: 'Modifies a thread.', - parameters: [ - { - in: 'path', - name: 'thread_id', - required: true, - description: 'The unique identifier of the thread.', - }, - ], - }) - @Post(':thread_id') - modifyThread( - @Param('thread_id') threadId: string, - @Body() updateThreadDto: UpdateThreadDto, - ) { - return this.threadsUsecases.update(threadId, updateThreadDto); - } - - @ApiResponse({ - status: 200, - description: 'The thread has been successfully deleted.', - type: DeleteThreadResponseDto, - }) - @ApiOperation({ - tags: ['Threads'], - summary: 'Delete thread', - description: 'Deletes a specific thread defined by a thread `id` .', - parameters: [ - { - in: 'path', - name: 'id', - required: true, - description: 'The unique identifier of the thread.', - }, - ], - }) - @Delete(':thread_id') - remove(@Param('thread_id') threadId: string) { - return this.threadsUsecases.remove(threadId); - } -} diff --git a/cortex-js/src/infrastructure/database/database.module.ts b/cortex-js/src/infrastructure/database/database.module.ts deleted file mode 100644 index e3a05c85b..000000000 --- a/cortex-js/src/infrastructure/database/database.module.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { Module } from '@nestjs/common'; -import { threadProviders } from './providers/thread.providers'; -import { sqliteDatabaseProviders } from './sqlite-database.providers'; -import { assistantProviders } from './providers/assistant.providers'; -import { messageProviders } from './providers/message.providers'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; - -@Module({ - imports: [FileManagerModule], - providers: [ - ...sqliteDatabaseProviders, - ...threadProviders, - ...assistantProviders, - ...messageProviders, - ], - exports: [...threadProviders, ...assistantProviders, ...messageProviders], -}) -export class DatabaseModule {} diff --git a/cortex-js/src/infrastructure/database/providers/assistant.providers.ts b/cortex-js/src/infrastructure/database/providers/assistant.providers.ts deleted file mode 100644 index 7a7dc9462..000000000 --- a/cortex-js/src/infrastructure/database/providers/assistant.providers.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { AssistantEntity } from '@/infrastructure/entities/assistant.entity'; -import { Sequelize } from 'sequelize-typescript'; - -export const assistantProviders = [ - { - provide: 'ASSISTANT_REPOSITORY', - useFactory: async (sequelize: Sequelize) => { - return sequelize.getRepository(AssistantEntity); - }, - inject: ['DATA_SOURCE'], - }, -]; diff --git a/cortex-js/src/infrastructure/database/providers/message.providers.ts b/cortex-js/src/infrastructure/database/providers/message.providers.ts deleted file mode 100644 index 9b1b50227..000000000 --- a/cortex-js/src/infrastructure/database/providers/message.providers.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { MessageEntity } from '@/infrastructure/entities/message.entity'; -import { Sequelize } from 'sequelize-typescript'; - -export const messageProviders = [ - { - provide: 'MESSAGE_REPOSITORY', - useFactory: async (sequelize: Sequelize) => { - return sequelize.getRepository(MessageEntity); - }, - inject: ['DATA_SOURCE'], - }, -]; diff --git a/cortex-js/src/infrastructure/database/providers/thread.providers.ts b/cortex-js/src/infrastructure/database/providers/thread.providers.ts deleted file mode 100644 index 0db54a6dd..000000000 --- a/cortex-js/src/infrastructure/database/providers/thread.providers.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { ThreadEntity } from '@/infrastructure/entities/thread.entity'; -import { Sequelize } from 'sequelize-typescript'; - -export const threadProviders = [ - { - provide: 'THREAD_REPOSITORY', - useFactory: async (sequelize: Sequelize) => { - return sequelize.getRepository(ThreadEntity); - }, - inject: ['DATA_SOURCE'], - }, -]; diff --git a/cortex-js/src/infrastructure/database/sqlite-database.providers.ts b/cortex-js/src/infrastructure/database/sqlite-database.providers.ts deleted file mode 100644 index 8956abea0..000000000 --- a/cortex-js/src/infrastructure/database/sqlite-database.providers.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { databaseFile } from '@/infrastructure/constants/cortex'; -import { join } from 'path'; -import { ThreadEntity } from '../entities/thread.entity'; -import { MessageEntity } from '../entities/message.entity'; -import { AssistantEntity } from '../entities/assistant.entity'; -import { Sequelize } from 'sequelize-typescript'; -import { fileManagerService } from '../services/file-manager/file-manager.service'; - -export const sqliteDatabaseProviders = [ - { - provide: 'DATA_SOURCE', - inject: [], - useFactory: async () => { - const dataFolderPath = await fileManagerService.getDataFolderPath(); - const sqlitePath = join(dataFolderPath, databaseFile); - const sequelize = new Sequelize({ - dialect: 'sqlite', - storage: sqlitePath, - logging: false, - }); - sequelize.addModels([ThreadEntity, MessageEntity, AssistantEntity]); - await ThreadEntity.sync(); - await MessageEntity.sync(); - await AssistantEntity.sync(); - return sequelize; - }, - }, -]; diff --git a/cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts deleted file mode 100644 index 65e387a26..000000000 --- a/cortex-js/src/infrastructure/dtos/assistants/create-assistant.dto.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; -import { Assistant } from '@/domain/models/assistant.interface'; -import { ApiProperty } from '@nestjs/swagger'; - -export class CreateAssistantDto implements Partial { - @ApiProperty({ - description: 'The unique identifier of the assistant.', - example: 'jan', - default: 'jan', - }) - @IsString() - id: string; - - @ApiProperty({ - description: 'The avatar of the assistant.', - example: '', - default: '', - }) - @IsOptional() - @IsString() - avatar?: string; - - @ApiProperty({ - description: 'The name of the assistant.', - example: 'Jan', - default: 'Jan', - }) - @IsString() - name: string; - - @ApiProperty({ - description: 'The description of the assistant.', - example: 'A default assistant that can use all downloaded models', - default: 'A default assistant that can use all downloaded models', - }) - @IsString() - description: string; - - @ApiProperty({ - description: 'The model of the assistant.', - }) - @IsString() - model: string; - - @ApiProperty({ - description: 'The instructions for the assistant.', - example: '', - default: '', - }) - @IsString() - instructions: string; - - @ApiProperty({ - description: 'The tools associated with the assistant.', - example: [], - default: [], - }) - @IsArray() - tools: any[]; - - @ApiProperty({ - description: 'The metadata of the assistant.', - }) - @IsOptional() - metadata: unknown | null; - - @ApiProperty({ - description: 'Top p.', - example: '0.7', - default: '0.7', - }) - @IsOptional() - @IsNumber() - top_p?: number; - - @ApiProperty({ - description: 'Temperature.', - example: '0.7', - default: '0.7', - }) - @IsOptional() - @IsNumber() - temperature?: number; -} diff --git a/cortex-js/src/infrastructure/dtos/assistants/delete-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/assistants/delete-assistant.dto.ts deleted file mode 100644 index a6010342f..000000000 --- a/cortex-js/src/infrastructure/dtos/assistants/delete-assistant.dto.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class DeleteAssistantResponseDto { - @ApiProperty({ - example: 'assistant_123', - description: 'The identifier of the assistant that was deleted.', - }) - id: string; - - @ApiProperty({ - example: 'assistant', - description: "Type of the object, indicating it's a assistant.", - default: 'assistant', - }) - object: string; - - @ApiProperty({ - example: true, - description: 'Indicates whether the assistant was successfully deleted.', - }) - deleted: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/assistants/model-setting.dto.ts b/cortex-js/src/infrastructure/dtos/assistants/model-setting.dto.ts deleted file mode 100644 index 53c06d497..000000000 --- a/cortex-js/src/infrastructure/dtos/assistants/model-setting.dto.ts +++ /dev/null @@ -1,208 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsArray, IsOptional } from 'class-validator'; - -export class ModelSettingDto { - @ApiProperty({ - type: 'number', - minimum: 0, - maximum: 1, - required: false, - default: 1, - description: `What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.`, - }) - temperature: number; - - @ApiProperty({ - type: 'number', - minimum: 0, - maximum: 1, - required: false, - default: 1, - description: `An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.`, - }) - top_p: number; - - @ApiProperty({ - required: false, - example: '', - description: 'GGUF metadata: tokenizer.chat_template', - }) - prompt_template?: string; - - @ApiProperty({ - required: false, - example: [], - description: - 'Defines specific tokens or phrases at which the model will stop generating further output.', - default: [], - }) - @IsArray() - @IsOptional() - stop?: string[]; - - @ApiProperty({ - required: false, - type: 'number', - example: 0, - description: - 'Adjusts the likelihood of the model repeating words or phrases in its output.', - }) - frequency_penalty?: number; - - @ApiProperty({ - required: false, - type: 'number', - example: 0, - description: - 'Influences the generation of new and varied concepts in the model’s output.', - }) - presence_penalty?: number; - - @ApiProperty({ - required: false, - type: 'number', - example: 4096, - default: 4096, - description: - 'The context length for model operations varies; the maximum depends on the specific model used.', - }) - ctx_len?: number; - - @ApiProperty({ - required: false, - type: 'boolean', - example: true, - default: true, - description: 'Enable real-time data processing for faster predictions.', - }) - stream?: boolean; - - @ApiProperty({ - required: false, - type: 'number', - example: 2048, - default: 2048, - description: - 'The maximum number of tokens the model will generate in a single response.', - }) - max_tokens?: number; - - @ApiProperty({ - required: false, - type: 'number', - example: 1, - default: 1, - description: 'The number of layers to load onto the GPU for acceleration.', - }) - ngl?: number; - - @ApiProperty({ - required: false, - type: 'number', - example: 1, - default: 1, - description: 'Number of parallel sequences to decode', - }) - n_parallel?: number; - - @ApiProperty({ - required: false, - type: 'number', - example: 1, - default: 1, - description: - 'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)', - }) - cpu_threads?: number; - - @ApiProperty({ - required: false, - type: 'string', - example: '', - default: '', - description: 'The prompt to use for internal configuration', - }) - pre_prompt?: string; - - @ApiProperty({ - required: false, - type: 'number', - example: 0, - default: 0, - description: 'The batch size for prompt eval step', - }) - n_batch?: number; - - @ApiProperty({ - required: false, - type: 'boolean', - example: true, - default: true, - description: 'To enable prompt caching or not', - }) - caching_enabled?: boolean; - - @ApiProperty({ - required: false, - type: 'number', - example: 0, - default: 0, - description: 'Group attention factor in self-extend', - }) - grp_attn_n?: number; - - @ApiProperty({ - required: false, - type: 'number', - example: 0, - default: 0, - description: 'Group attention width in self-extend', - }) - grp_attn_w?: number; - - @ApiProperty({ - required: false, - type: 'boolean', - example: false, - default: false, - description: 'Prevent system swapping of the model to disk in macOS', - }) - mlock?: boolean; - - @ApiProperty({ - required: false, - type: 'string', - example: '', - default: '', - description: - 'You can constrain the sampling using GBNF grammars by providing path to a grammar file', - }) - grammar_file?: string; - - @ApiProperty({ - required: false, - type: 'boolean', - example: true, - default: true, - description: 'To enable Flash Attention, default is true', - }) - flash_attn?: boolean; - - @ApiProperty({ - required: false, - type: 'string', - example: '', - default: '', - description: 'KV cache type: f16, q8_0, q4_0, default is f16', - }) - cache_type?: string; - - @ApiProperty({ - required: false, - type: 'boolean', - example: true, - default: true, - description: 'To enable mmap, default is true', - }) - use_mmap?: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts b/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts deleted file mode 100644 index 19fd17999..000000000 --- a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { IsString } from 'class-validator'; -import { ApiProperty } from '@nestjs/swagger'; - -export class ChatCompletionMessage { - @ApiProperty({ description: 'The Content of the chat message.' }) - @IsString() - content: string; - - @ApiProperty({ - description: 'The role of the entity in the chat completion.', - example: 'user', - }) - role: 'user' | 'assistant'; -} diff --git a/cortex-js/src/infrastructure/dtos/chat/chat-completion-response.dto.ts b/cortex-js/src/infrastructure/dtos/chat/chat-completion-response.dto.ts deleted file mode 100644 index ed1ba7a42..000000000 --- a/cortex-js/src/infrastructure/dtos/chat/chat-completion-response.dto.ts +++ /dev/null @@ -1,52 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { UsageDto } from './usage.dto'; -import { ChoiceDto } from './choice.dto'; - -export class ChatCompletionResponseDto { - @ApiProperty({ - description: 'A list of choices generated by the chat model.', - type: [ChoiceDto], - }) - choices: ChoiceDto[]; - - @ApiProperty({ - description: - 'The timestamp of when the chat completion was created, expressed as a Unix timestamp.', - type: Number, - }) - created: number; - - @ApiProperty({ - description: 'The unique identifier for the chat completion.', - type: String, - }) - id: string; - - @ApiProperty({ - description: - 'The identifier of the model used to generate the chat completion.', - type: String, - }) - model: string; - - @ApiProperty({ - description: - "The type of object, typically set to 'chat_completion' to denote the nature of the API response.", - type: String, - }) - object: string; - - @ApiProperty({ - description: - 'A unique fingerprint that identifies the system configuration used during the chat completion.', - type: String, - }) - system_fingerprint: string; - - @ApiProperty({ - description: - 'An object representing the usage statistics of the model for the current completion.', - type: UsageDto, - }) - usage: UsageDto; -} diff --git a/cortex-js/src/infrastructure/dtos/chat/choice.dto.ts b/cortex-js/src/infrastructure/dtos/chat/choice.dto.ts deleted file mode 100644 index 0594506f7..000000000 --- a/cortex-js/src/infrastructure/dtos/chat/choice.dto.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { MessageDto } from './message.dto'; - -export class ChoiceDto { - @ApiProperty({ - description: - 'The reason the chat completion ended, typically indicating whether the model completed the text naturally or was cut off.', - type: String, - }) - finish_reason: string; - - @ApiProperty({ - description: - 'The index of the completion relative to other generated completions, useful for identifying its order in a batch request.', - type: Number, - }) - index: number; - - @ApiProperty({ - description: - 'An object representing the message details involved in the chat completion, encapsulated within a MessageDto.', - type: MessageDto, - }) - message: MessageDto; -} diff --git a/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts b/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts deleted file mode 100644 index 07b2f429c..000000000 --- a/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { - IsArray, - IsBoolean, - IsNumber, - IsOptional, - IsString, - ValidateNested, -} from 'class-validator'; -import { ChatCompletionMessage } from './chat-completion-message.dto'; -import { Type } from 'class-transformer'; -import { ApiProperty } from '@nestjs/swagger'; - -export class CreateChatCompletionDto { - @ApiProperty({ - description: - 'Array of chat messages to be used for generating the chat completion.', - }) - @IsArray() - @ValidateNested({ each: true }) - @Type(() => ChatCompletionMessage) - messages: ChatCompletionMessage[]; - - @ApiProperty({ - description: 'The unique identifier of the model.', - example: 'mistral', - }) - @IsString() - model: string; - - @ApiProperty({ - description: - 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', - example: true, - }) - @IsOptional() - @IsBoolean() - stream?: boolean; - - @ApiProperty({ - description: - 'Sets the upper limit on the number of tokens the model can generate in a single output.', - example: 4096, - }) - @IsOptional() - @IsNumber() - max_tokens?: number; - - @ApiProperty({ - description: - 'Defines specific tokens or phrases that signal the model to stop producing further output.', - example: ['End'], - }) - @IsOptional() - @IsArray() - stop?: string[]; - - @ApiProperty({ - description: - 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', - example: 0.2, - }) - @IsOptional() - @IsNumber() - frequency_penalty?: number; - - @ApiProperty({ - description: - 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', - example: 0.6, - }) - @IsOptional() - @IsNumber() - presence_penalty?: number; - - @ApiProperty({ - description: "Influences the randomness of the model's output.", - example: 0.8, - }) - @IsOptional() - @IsNumber() - temperature?: number; - - @ApiProperty({ - description: 'Sets probability threshold for more relevant outputs.', - example: 0.95, - }) - @IsOptional() - @IsNumber() - top_p?: number; -} diff --git a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts deleted file mode 100644 index dac89dcfc..000000000 --- a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { UsageDto } from './usage.dto'; - -export class EmbeddingsResponseDto { - @ApiProperty({ - description: 'Type of the result object.', - type: String, - }) - object: string; - - @ApiProperty({ - description: 'Identifier of the model utilized for generating embeddings.', - type: String, - }) - model: string; - - @ApiProperty({ - description: - 'The embedding vector represented as an array of floating-point numbers. ', - type: [Number], - }) - embedding: [number]; - - @ApiProperty({ - description: - 'Details of token usage, including prompt_tokens and total_tokens.', - type: UsageDto, - }) - usage: UsageDto; -} diff --git a/cortex-js/src/infrastructure/dtos/chat/message.dto.ts b/cortex-js/src/infrastructure/dtos/chat/message.dto.ts deleted file mode 100644 index d4450165d..000000000 --- a/cortex-js/src/infrastructure/dtos/chat/message.dto.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class MessageDto { - @ApiProperty({ - description: - 'The textual content of the chat message or completion generated by the model.', - type: String, - }) - content: string; - - @ApiProperty({ - description: - "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message.", - type: String, - }) - role: string; -} diff --git a/cortex-js/src/infrastructure/dtos/chat/usage.dto.ts b/cortex-js/src/infrastructure/dtos/chat/usage.dto.ts deleted file mode 100644 index 95b71ceea..000000000 --- a/cortex-js/src/infrastructure/dtos/chat/usage.dto.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class UsageDto { - @ApiProperty({ - description: - 'The number of tokens used in the completion part of the response generated by the model.', - type: Number, - }) - completion_tokens: number; - - @ApiProperty({ - description: - 'The number of tokens used in the prompt part of the chat input, which is provided to the model.', - type: Number, - }) - prompt_tokens: number; - - @ApiProperty({ - description: - 'The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation.', - type: Number, - }) - total_tokens: number; -} diff --git a/cortex-js/src/infrastructure/dtos/common/common-response.dto.ts b/cortex-js/src/infrastructure/dtos/common/common-response.dto.ts deleted file mode 100644 index dd60f4892..000000000 --- a/cortex-js/src/infrastructure/dtos/common/common-response.dto.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsString } from 'class-validator'; - -export class CommonResponseDto { - @ApiProperty({ - description: 'The response success or error message.', - }) - @IsString() - message: string; -} diff --git a/cortex-js/src/infrastructure/dtos/configs/config-update.dto.ts b/cortex-js/src/infrastructure/dtos/configs/config-update.dto.ts deleted file mode 100644 index 8e31cbcb1..000000000 --- a/cortex-js/src/infrastructure/dtos/configs/config-update.dto.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsOptional, IsString } from 'class-validator'; - -export class ConfigUpdateDto { - @ApiProperty({ - example: 'apiKey', - description: 'The configuration API key.', - }) - @IsString() - @IsOptional() - config: string; - - // Prompt Settings - @ApiProperty({ - type: String, - example: 'sk-xxxxxx', - description: 'The value of the configuration API key.', - }) - @IsString() - @IsOptional() - value: string; -} diff --git a/cortex-js/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts b/cortex-js/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts deleted file mode 100644 index 88d58283d..000000000 --- a/cortex-js/src/infrastructure/dtos/cortex/cortex-operation-successfully.dto.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class CortexOperationSuccessfullyDto { - @ApiProperty({ - description: 'The cortex operation status message.', - }) - message: string; - - @ApiProperty({ - description: 'The cortex operation status code.', - }) - status: string; -} diff --git a/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts b/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts deleted file mode 100644 index 49e143521..000000000 --- a/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsIP, IsNumber, IsString, Max, Min } from 'class-validator'; -import { - defaultCortexCppHost, - defaultCortexCppPort, -} from '@/infrastructure/constants/cortex'; - -export class StartCortexDto { - @ApiProperty({ - name: 'host', - description: 'The Cortexcpp host.', - default: defaultCortexCppHost, - }) - @IsString() - @IsIP() - host: string; - - @ApiProperty({ - name: 'port', - description: 'The Cortexcpp port.', - default: defaultCortexCppPort, - }) - @IsNumber() - @Min(0) - @Max(65535) - port: number; -} diff --git a/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts b/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts deleted file mode 100644 index f6517af78..000000000 --- a/cortex-js/src/infrastructure/dtos/embeddings/embeddings-request.dto.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { Optional } from '@nestjs/common'; -import { ApiProperty } from '@nestjs/swagger'; - -export class CreateEmbeddingsDto { - @ApiProperty({ - example: 'mistral', - description: 'The name of the embedding model to be used.', - type: String, - }) - model: string; - - @ApiProperty({ - example: ['Hello World'], - description: - 'The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.', - type: [String], - }) - input: string | string[]; - - @ApiProperty({ - example: 'float', - description: - 'Specifies the format for the embeddings. Supported formats include `float` and `int`. This field is optional.', - type: String, - }) - @Optional() - encoding_format?: string; - - @ApiProperty({ - example: 3, - description: - 'Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional.', - type: Number, - }) - @Optional() - dimensions?: number; -} diff --git a/cortex-js/src/infrastructure/dtos/engines/engines.dto.ts b/cortex-js/src/infrastructure/dtos/engines/engines.dto.ts deleted file mode 100644 index 5180f7335..000000000 --- a/cortex-js/src/infrastructure/dtos/engines/engines.dto.ts +++ /dev/null @@ -1,106 +0,0 @@ -import { Extension } from '@/domain/abstracts/extension.abstract'; -import { ApiProperty } from '@nestjs/swagger'; -import { IsBoolean, IsOptional, IsString } from 'class-validator'; - -export class EngineDto implements Partial { - @ApiProperty({ - type: String, - example: 'cortex.llamacpp', - description: 'The name of the engine that you want to retrieve.', - }) - @IsString() - name: string; - - // Prompt Settings - @ApiProperty({ - type: String, - example: 'Cortex', - description: 'The display name of the engine.', - }) - @IsString() - @IsOptional() - productName?: string; - - @ApiProperty({ - type: String, - example: 'Cortex engine', - description: 'The description of the engine.', - }) - @IsString() - @IsOptional() - description?: string; - - @ApiProperty({ - type: String, - example: '0.0.1', - description: 'The version of the engine.', - }) - @IsString() - @IsOptional() - version?: string; - - @ApiProperty({ - type: String, - example: true, - description: 'The status of the engine.', - }) - @IsBoolean() - status?: string; -} -export class InitEngineDto { - @ApiProperty({ - type: String, - example: 'CPU', - description: 'The mode that you want to run the engine.', - }) - @IsString() - runMode?: 'CPU' | 'GPU'; - - @ApiProperty({ - type: String, - example: 'Nvidia', - description: 'The type of GPU that you want to use.', - }) - @IsString() - gpuType?: 'Nvidia' | 'Others (Vulkan)'; - - @ApiProperty({ - type: String, - example: 'AVX', - description: 'The instructions that you want to use.', - }) - @IsString() - instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; - - @ApiProperty({ - type: String, - example: '11', - description: 'The version of CUDA that you want to use.', - }) - @IsString() - cudaVersion?: '11' | '12'; - - @ApiProperty({ - type: Boolean, - example: true, - description: 'Silent mode.', - }) - @IsBoolean() - silent?: boolean; - - @ApiProperty({ - type: Boolean, - example: true, - description: 'Install Vulkan engine.', - }) - @IsBoolean() - vulkan?: boolean; - - @ApiProperty({ - type: String, - example: true, - description: 'Engine version.', - }) - @IsBoolean() - version?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts deleted file mode 100644 index a99ad86b6..000000000 --- a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts +++ /dev/null @@ -1,58 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsArray, IsString } from 'class-validator'; -import { Message, MessageContent } from '@/domain/models/message.interface'; - -export class CreateMessageDto implements Partial { - @ApiProperty({ - example: 'thread_456', - description: 'The ID of the thread to which the message will be posted.', - }) - @IsString() - thread_id: string; - - @ApiProperty({ - example: 'assistant_789', - description: "The assistant's unique identifier.", - }) - @IsString() - assistant_id?: string; - - @ApiProperty({ - example: 'user', - description: 'The sources of the messages.', - }) - @IsString() - role: 'user' | 'assistant'; - - @ApiProperty({ - example: [ - { - type: 'text', - data: 'Hello, how can I help you today?', - }, - ], - description: 'The content of the messages.', - }) - @IsArray() - content: MessageContent[]; - - @ApiProperty({ - example: 'in_progress', - description: 'Current status of the message.', - }) - status: 'in_progress' | 'incomplete' | 'completed'; - - @ApiProperty({ - example: { urgency: 'high', tags: ['customer_support'] }, - description: - 'Optional dictionary for additional unstructured message information.', - }) - metadata?: Record; - - @ApiProperty({ - example: 'text', - description: 'Type of the message.', - }) - @IsString() - type?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/messages/delete-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/delete-message.dto.ts deleted file mode 100644 index 4aa9b90b0..000000000 --- a/cortex-js/src/infrastructure/dtos/messages/delete-message.dto.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class DeleteMessageResponseDto { - @ApiProperty({ - example: 'message_123', - description: 'The identifier of the message that was deleted.', - }) - id: string; - - @ApiProperty({ - example: 'message', - description: "Type of the object, indicating it's a message.", - default: 'message', - }) - object: string; - - @ApiProperty({ - example: true, - description: 'Indicates whether the message was successfully deleted.', - }) - deleted: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/messages/get-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/get-message.dto.ts deleted file mode 100644 index a96193435..000000000 --- a/cortex-js/src/infrastructure/dtos/messages/get-message.dto.ts +++ /dev/null @@ -1,91 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class ContentDto { - @ApiProperty({ - example: 'text', - description: 'Type of content, e.g., "text".', - }) - type: string; - - @ApiProperty({ - type: 'object', - example: { - value: 'How does AI work? Explain it in simple terms.', - annotations: [], - }, - description: 'Text content of the message along with any annotations.', - }) - text: { - value: string; - annotations: string[]; - }; -} - -export class GetMessageResponseDto { - @ApiProperty({ - example: 'msg_abc123', - description: 'The identifier of the message.', - }) - id: string; - - @ApiProperty({ - example: 'thread.message', - description: "Type of the object, indicating it's a thread message.", - default: 'thread.message', - }) - object: string; - - @ApiProperty({ - example: 1699017614, - description: - 'Unix timestamp representing the creation time of the message.', - type: 'integer', - }) - created_at: number; - - @ApiProperty({ - example: 'thread_abc123', - description: 'Identifier of the thread to which this message belongs.', - }) - thread_id: string; - - @ApiProperty({ - example: 'user', - description: "Role of the sender, either 'user' or 'assistant'.", - }) - role: string; - - @ApiProperty({ - type: [ContentDto], - description: 'Array of content objects detailing the message content.', - }) - content: ContentDto[]; - - @ApiProperty({ - example: [], - description: 'Array of file IDs associated with the message, if any.', - type: [String], - }) - file_ids: string[]; - - @ApiProperty({ - nullable: true, - example: null, - description: - 'Identifier of the assistant involved in the message, if applicable.', - }) - assistant_id: string | null; - - @ApiProperty({ - nullable: true, - example: null, - description: 'Run ID associated with the message, if applicable.', - }) - run_id: string | null; - - @ApiProperty({ - example: {}, - description: 'Metadata associated with the message.', - }) - metadata: object; -} diff --git a/cortex-js/src/infrastructure/dtos/messages/list-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/list-message.dto.ts deleted file mode 100644 index d7d2de66d..000000000 --- a/cortex-js/src/infrastructure/dtos/messages/list-message.dto.ts +++ /dev/null @@ -1,95 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class ListMessageObjectDto { - @ApiProperty({ - example: 'msg_abc123', - description: 'The identifier of the message.', - }) - id: string; - - @ApiProperty({ - example: 'thread.message', - description: "Type of the object, indicating it's a thread message.", - }) - object: string; - - @ApiProperty({ - example: 1699017614, - description: - 'Unix timestamp representing the creation time of the message.', - type: 'integer', - }) - created_at: number; - - @ApiProperty({ - example: 'thread_abc123', - description: 'Identifier of the thread to which this message belongs.', - }) - thread_id: string; - - @ApiProperty({ - example: 'user', - description: "Role of the sender, either 'user' or 'assistant'.", - }) - role: string; - - @ApiProperty({ - description: 'Array of file IDs associated with the message, if any.', - type: [String], - example: [], - }) - file_ids: string[]; - - @ApiProperty({ - nullable: true, - description: - 'Identifier of the assistant involved in the message, if applicable.', - example: null, - }) - assistant_id: string | null; - - @ApiProperty({ - nullable: true, - description: 'Run ID associated with the message, if applicable.', - example: null, - }) - run_id: string | null; - - @ApiProperty({ - example: {}, - description: 'Metadata associated with the message.', - }) - metadata: object; -} - -export class ListMessagesResponseDto { - @ApiProperty({ - example: 'list', - description: "Type of the object, indicating it's a list.", - }) - object: string; - - @ApiProperty({ - type: [ListMessageObjectDto], - description: 'Array of message objects.', - }) - data: ListMessageObjectDto[]; - - @ApiProperty({ - example: 'msg_abc123', - description: 'Identifier of the first message in the list.', - }) - first_id: string; - - @ApiProperty({ - example: 'msg_abc456', - description: 'Identifier of the last message in the list.', - }) - last_id: string; - - @ApiProperty({ - example: false, - description: 'Indicates whether there are more messages to retrieve.', - }) - has_more: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/messages/update-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/update-message.dto.ts deleted file mode 100644 index a8b82f010..000000000 --- a/cortex-js/src/infrastructure/dtos/messages/update-message.dto.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { PartialType } from '@nestjs/mapped-types'; -import { CreateMessageDto } from './create-message.dto'; - -export class UpdateMessageDto extends PartialType(CreateMessageDto) {} diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts deleted file mode 100644 index 19cca03c1..000000000 --- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts +++ /dev/null @@ -1,158 +0,0 @@ -import { - IsArray, - IsBoolean, - IsNumber, - IsOptional, - IsString, - Min, -} from 'class-validator'; -import { Model } from '@/domain/models/model.interface'; -import { ModelArtifactDto } from './model-artifact.dto'; -import { ApiProperty, getSchemaPath } from '@nestjs/swagger'; - -export class CreateModelDto implements Partial { - // Cortex Meta - @ApiProperty({ - description: 'The unique identifier of the model.', - example: 'mistral', - }) - @IsString() - model: string; - - @ApiProperty({ description: 'The name of the model.', example: 'mistral' }) - @IsString() - name?: string; - - @ApiProperty({ - description: 'The URL sources from which the model downloaded or accessed.', - example: ['https://huggingface.co/cortexso/mistral/tree/gguf'], - oneOf: [ - { type: 'array', items: { type: 'string' } }, - { $ref: getSchemaPath(ModelArtifactDto) }, - ], - }) - @IsArray() - files: string[] | ModelArtifactDto; - - // Model Input / Output Syntax - @ApiProperty({ - description: - "A predefined text or framework that guides the AI model's response generation.", - example: ` - You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.`, - }) - @IsOptional() - @IsString() - prompt_template?: string; - - @ApiProperty({ - description: - 'Defines specific tokens or phrases that signal the model to stop producing further output.', - example: ['End'], - }) - @IsOptional() - @IsArray() - stop?: string[]; - - // Results Preferences - @ApiProperty({ - description: - 'Sets the upper limit on the number of tokens the model can generate in a single output.', - example: 4096, - }) - @IsOptional() - @IsNumber() - max_tokens?: number; - - @ApiProperty({ - description: 'Sets probability threshold for more relevant outputs.', - example: 0.9, - }) - @IsOptional() - @IsNumber() - top_p?: number; - - @ApiProperty({ - description: "Influences the randomness of the model's output.", - example: 0.7, - }) - @IsOptional() - @IsNumber() - temperature?: number; - - @ApiProperty({ - description: - 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', - example: 0.5, - }) - @IsOptional() - @IsNumber() - frequency_penalty?: number; - - @ApiProperty({ - description: - 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', - example: 0.6, - }) - @IsOptional() - @IsNumber() - presence_penalty?: number; - - @ApiProperty({ - description: - 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', - example: true, - }) - @IsOptional() - @IsBoolean() - stream?: boolean; - - // Engine Settings - @ApiProperty({ - description: - 'Sets the maximum input the model can use to generate a response, it varies with the model used.', - example: 4096, - }) - @IsOptional() - @IsNumber() - ctx_len?: number; - - @ApiProperty({ description: 'Determines GPU layer usage.', example: 32 }) - @IsOptional() - @IsNumber() - ngl?: number; - - @ApiProperty({ - description: 'Number of parallel processing units to use.', - example: 1, - }) - @IsOptional() - @IsNumber() - @Min(1) - n_parallel?: number; - - @ApiProperty({ - description: - 'Determines CPU inference threads, limited by hardware and OS. ', - example: 10, - }) - @IsOptional() - @IsNumber() - @Min(1) - cpu_threads?: number; - - @ApiProperty({ - description: 'The engine used to run the model.', - example: 'cortex.llamacpp', - }) - @IsOptional() - @IsString() - engine?: string; - - @ApiProperty({ - description: 'The owner of the model.', - example: '', - default: '', - }) - owned_by?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/delete-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/delete-model.dto.ts deleted file mode 100644 index 4043ed411..000000000 --- a/cortex-js/src/infrastructure/dtos/models/delete-model.dto.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class DeleteModelResponseDto { - @ApiProperty({ - example: 'mistral-ins-7b-q4', - description: 'The identifier of the model that was deleted.', - }) - id: string; - - @ApiProperty({ - example: 'model', - description: "Type of the object, indicating it's a model.", - default: 'model', - }) - object: string; - - @ApiProperty({ - example: true, - description: 'Indicates whether the model was successfully deleted.', - }) - deleted: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts deleted file mode 100644 index e4f1692c2..000000000 --- a/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class DownloadModelResponseDto { - @ApiProperty({ - example: 'Starting download mistral-ins-7b-q4', - description: 'Message indicates Jan starting download corresponding model.', - }) - message: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/list-model-response.dto.ts b/cortex-js/src/infrastructure/dtos/models/list-model-response.dto.ts deleted file mode 100644 index ed4c1e98d..000000000 --- a/cortex-js/src/infrastructure/dtos/models/list-model-response.dto.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { ModelDto } from './model.dto'; // Import the ModelDto class - -export class ListModelsResponseDto { - @ApiProperty({ example: 'list', enum: ['list'] }) - object: string; - - @ApiProperty({ type: [ModelDto], description: 'List of models' }) - data: ModelDto[]; -} diff --git a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts deleted file mode 100644 index b450966ba..000000000 --- a/cortex-js/src/infrastructure/dtos/models/model-artifact.dto.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { IsString } from 'class-validator'; -import { ModelArtifact } from '@/domain/models/model.interface'; -import { ApiProperty } from '@nestjs/swagger'; - -export class ModelArtifactDto implements ModelArtifact { - @ApiProperty({ description: 'The mmproj bin file url.' }) - @IsString() - mmproj?: string; - - @ApiProperty({ description: 'The llama model bin file url (legacy).' }) - @IsString() - llama_model_path?: string; - - @ApiProperty({ description: 'The gguf model bin file url.' }) - @IsString() - model_path?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts b/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts deleted file mode 100644 index 90431b03b..000000000 --- a/cortex-js/src/infrastructure/dtos/models/model-settings.dto.ts +++ /dev/null @@ -1,142 +0,0 @@ -import { ModelSettingParams } from '@/domain/models/model.interface'; -import { ApiProperty } from '@nestjs/swagger'; -import { - IsArray, - IsBoolean, - IsNumber, - IsOptional, - IsString, - Min, -} from 'class-validator'; - -export class ModelSettingsDto implements ModelSettingParams { - // Prompt Settings - @ApiProperty({ - example: 'system\n{system_message}\nuser\n{prompt}\nassistant', - description: - "A predefined text or framework that guides the AI model's response generation.", - }) - @IsOptional() - prompt_template?: string; - - @ApiProperty({ - type: [String], - example: [], - description: - 'Defines specific tokens or phrases that signal the model to stop producing further output.', - }) - @IsArray() - @IsOptional() - stop?: string[]; - - // Engine Settings - @ApiProperty({ description: 'Determines GPU layer usage.', example: 4096 }) - @IsOptional() - @IsNumber() - ngl?: number; - - @ApiProperty({ - description: - 'The context length for model operations varies; the maximum depends on the specific model used.', - example: 4096, - }) - @IsOptional() - @IsNumber() - ctx_len?: number; - - @ApiProperty({ - description: - 'Determines CPU inference threads, limited by hardware and OS. ', - example: 10, - }) - @IsOptional() - @IsNumber() - @Min(1) - cpu_threads?: number; - - @ApiProperty({ - description: 'The prompt to use for internal configuration', - }) - @IsOptional() - @IsString() - pre_prompt?: string; - - @ApiProperty({ - description: 'The batch size for prompt eval step', - example: 2048, - }) - @IsOptional() - @IsNumber() - n_batch?: number; - - @ApiProperty({ - description: 'To enable prompt caching or not', - example: true, - }) - @IsOptional() - @IsBoolean() - caching_enabled?: boolean; - - @ApiProperty({ - description: 'Group attention factor in self-extend', - example: 1, - }) - @IsOptional() - @IsNumber() - grp_attn_n?: number; - - @ApiProperty({ - description: 'Group attention width in self-extend', - example: 512, - }) - @IsOptional() - @IsNumber() - grp_attn_w?: number; - - @ApiProperty({ - description: 'Prevent system swapping of the model to disk in macOS', - example: false, - }) - @IsOptional() - @IsBoolean() - mlock?: boolean; - - @ApiProperty({ - description: - 'You can constrain the sampling using GBNF grammars by providing path to a grammar file', - }) - @IsOptional() - @IsString() - grammar_file?: string; - - @ApiProperty({ - description: 'To enable Flash Attention, default is true', - example: true, - }) - @IsOptional() - @IsBoolean() - flash_attn?: boolean; - - @ApiProperty({ - description: 'KV cache type: f16, q8_0, q4_0, default is f16', - example: 'f16', - }) - @IsOptional() - @IsString() - cache_type?: string; - - @ApiProperty({ - description: 'To enable mmap, default is true', - example: true, - }) - @IsOptional() - @IsBoolean() - use_mmap?: boolean; - - @ApiProperty({ - example: 'cortex.llamacpp', - description: 'The engine to use.', - }) - @IsOptional() - engine?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/model.dto.ts b/cortex-js/src/infrastructure/dtos/models/model.dto.ts deleted file mode 100644 index 4054a8b14..000000000 --- a/cortex-js/src/infrastructure/dtos/models/model.dto.ts +++ /dev/null @@ -1,204 +0,0 @@ -import { Model } from '@/domain/models/model.interface'; -import { ApiProperty } from '@nestjs/swagger'; -import { - IsArray, - IsBoolean, - IsNumber, - IsOptional, - IsString, -} from 'class-validator'; - -export class ModelDto implements Partial { - @ApiProperty({ - example: 'mistral', - description: - 'The model identifier, which can be referenced in the API endpoints.', - }) - @IsOptional() - id: string; - - // Prompt Settings - @ApiProperty({ - example: `You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.`, - description: - "A predefined text or framework that guides the AI model's response generation.", - }) - @IsOptional() - prompt_template?: string; - - @ApiProperty({ - type: [String], - example: ['End'], - description: - 'Defines specific tokens or phrases that signal the model to stop producing further output.', - }) - @IsArray() - @IsOptional() - stop?: string[]; - - // Results Preferences - - @ApiProperty({ - example: 4096, - description: - 'Sets the upper limit on the number of tokens the model can generate in a single output.', - }) - @IsOptional() - @IsNumber() - max_tokens?: number; - - @ApiProperty({ - example: 0.7, - description: "Influences the randomness of the model's output.", - }) - @IsOptional() - @IsNumber() - temperature?: number; - - @ApiProperty({ - example: 0.95, - description: 'Sets probability threshold for more relevant outputs', - }) - @IsOptional() - @IsNumber() - top_p?: number; - - @ApiProperty({ - example: true, - description: - 'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', - }) - @IsOptional() - @IsBoolean() - stream?: boolean; - - @ApiProperty({ - example: 0, - description: - 'Modifies the likelihood of the model repeating the same words or phrases within a single output.', - }) - @IsOptional() - @IsNumber() - frequency_penalty?: number; - - @ApiProperty({ - example: 0, - description: - 'Reduces the likelihood of repeating tokens, promoting novelty in the output.', - }) - @IsOptional() - @IsNumber() - presence_penalty?: number; - - // Engine Settings - @ApiProperty({ description: 'Determines GPU layer usage.', example: 32 }) - @IsOptional() - @IsNumber() - ngl?: number; - - @ApiProperty({ - description: - 'The context length for model operations varies; the maximum depends on the specific model used.', - example: 4096, - }) - @IsOptional() - @IsNumber() - ctx_len?: number; - - @ApiProperty({ - description: - 'Determines CPU inference threads, limited by hardware and OS.', - example: 10, - }) - @IsOptional() - @IsNumber() - cpu_threads?: number; - - @ApiProperty({ - description: 'The prompt to use for internal configuration', - example: - 'You are an assistant with expert knowledge in {subject}. Please provide a detailed and accurate response to the following query: {query}. Ensure that your response is clear, concise, and informative.', - }) - @IsOptional() - @IsString() - pre_prompt?: string; - - @ApiProperty({ - description: 'The batch size for prompt eval step', - example: 512, - }) - @IsOptional() - @IsNumber() - n_batch?: number; - - @ApiProperty({ - description: 'To enable prompt caching or not', - example: true, - }) - @IsOptional() - @IsBoolean() - caching_enabled?: boolean; - - @ApiProperty({ - description: 'Group attention factor in self-extend', - example: 1, - }) - @IsOptional() - @IsNumber() - grp_attn_n?: number; - - @ApiProperty({ - description: 'Group attention width in self-extend', - example: 512, - }) - @IsOptional() - @IsNumber() - grp_attn_w?: number; - - @ApiProperty({ - description: 'Prevent system swapping of the model to disk in macOS', - example: false, - }) - @IsOptional() - @IsBoolean() - mlock?: boolean; - - @ApiProperty({ - description: - 'You can constrain the sampling using GBNF grammars by providing path to a grammar file', - }) - @IsOptional() - @IsString() - grammar_file?: string; - - @ApiProperty({ - description: 'To enable Flash Attention, default is true', - example: true, - }) - @IsOptional() - @IsBoolean() - flash_attn?: boolean; - - @ApiProperty({ - description: 'KV cache type: f16, q8_0, q4_0, default is f16', - example: 'f16', - }) - @IsOptional() - @IsString() - cache_type?: string; - - @ApiProperty({ - description: 'To enable mmap, default is true', - example: true, - }) - @IsOptional() - @IsBoolean() - use_mmap?: boolean; - - @ApiProperty({ - description: 'The engine to use.', - example: 'cortex.llamacpp', - }) - @IsOptional() - engine?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts b/cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts deleted file mode 100644 index 203d9e6f5..000000000 --- a/cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { IsString } from 'class-validator'; -import { ApiProperty } from '@nestjs/swagger'; - -export class StartModelSuccessDto { - @ApiProperty({ - description: - 'The success or error message displayed when a model is successfully loaded or fails to load.', - }) - @IsString() - message: string; - - @ApiProperty({ description: 'The unique identifier of the model.' }) - @IsString() - modelId: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/update-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/update-model.dto.ts deleted file mode 100644 index 8db8180b2..000000000 --- a/cortex-js/src/infrastructure/dtos/models/update-model.dto.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { PartialType } from '@nestjs/mapped-types'; -import { CreateModelDto } from './create-model.dto'; - -export class UpdateModelDto extends PartialType(CreateModelDto) {} diff --git a/cortex-js/src/infrastructure/dtos/page.dto.ts b/cortex-js/src/infrastructure/dtos/page.dto.ts deleted file mode 100644 index f81f3878e..000000000 --- a/cortex-js/src/infrastructure/dtos/page.dto.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsArray } from 'class-validator'; - -export class PageDto { - @ApiProperty() - readonly object: string; - - @IsArray() - @ApiProperty({ isArray: true }) - readonly data: T[]; - - @ApiProperty() - readonly first_id: string | undefined; - - @ApiProperty() - readonly last_id: string | undefined; - - @ApiProperty() - readonly has_more: boolean; - - constructor(data: T[], hasMore: boolean, firstId?: string, lastId?: string) { - this.object = 'list'; - this.data = data; - this.first_id = firstId; - this.last_id = lastId; - this.has_more = hasMore; - } -} diff --git a/cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts deleted file mode 100644 index cd18e4534..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/create-message.dto.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class CreateMessageDto { - @ApiProperty({ - example: 'user', - description: `The role of the entity that is creating the message. Allowed values include: - - user: Indicates the message is sent by an actual user and should be used in most cases to represent user-generated messages. - - assistant: Indicates the message is generated by the assistant. Use this value to insert messages from the assistant into the conversation.`, - }) - role: 'user' | 'assistant'; - - @ApiProperty({ - example: 'Tell me a joke', - description: 'The text contents of the message.', - }) - content: string; -} diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts deleted file mode 100644 index fbca1138c..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts +++ /dev/null @@ -1,134 +0,0 @@ -import { IsArray, IsNumber, IsOptional, IsString } from 'class-validator'; -import { ApiProperty } from '@nestjs/swagger'; -import type { - Assistant, - AssistantResponseFormatOption, - AssistantToolResources, -} from '@/domain/models/assistant.interface'; - -export class CreateThreadAssistantDto implements Assistant { - @ApiProperty({ - example: 'thread_123', - description: 'The unique identifier of the assistant.', - type: 'string', - }) - @IsString() - id: string; - - @ApiProperty({ - example: 'https://example.com/avatar.png', - description: "URL of the assistant's avatar image.", - type: 'string', - }) - @IsOptional() - @IsString() - avatar?: string; - - @ApiProperty({ - example: 'Virtual Helper', - description: 'The name of the assistant.', - type: 'string', - }) - @IsString() - name: string; - - @ApiProperty({ - example: 'mistral', - description: "The model's unique identifier and settings.", - type: 'string', - }) - @IsString() - model: string; - - @ApiProperty({ - example: - 'Assist with customer queries and provide information based on the company database.', - description: "The assistant's specific instructions.", - type: 'string', - }) - @IsString() - instructions: string; - - @ApiProperty({ - example: [ - { - name: 'Knowledge Retrieval', - settings: { - source: 'internal', - endpoint: 'https://api.example.com/knowledge', - }, - }, - ], - description: "The thread's tool(Knowledge Retrieval) configurations.", - type: 'array', - }) - @IsOptional() - @IsArray() - tools: any; - - @ApiProperty({ - example: - 'This assistant helps with customer support by retrieving relevant information.', - description: 'The description of the assistant.', - type: 'string', - }) - @IsString() - @IsOptional() - description: string | null; - - @ApiProperty({ - example: { department: 'support', version: '1.0' }, - description: 'Additional metadata for the assistant.', - type: 'object', - }) - @IsOptional() - metadata: Record | null; - - @ApiProperty({ - example: 'assistant', - description: 'The object type, always "assistant".', - type: 'string', - }) - object: 'assistant'; - - @ApiProperty({ - example: 0.7, - description: 'Sampling temperature for the assistant.', - type: 'number', - }) - @IsNumber() - @IsOptional() - temperature?: number | null; - - @ApiProperty({ - example: 0.9, - description: 'Top-p sampling value for the assistant.', - type: 'number', - }) - @IsNumber() - @IsOptional() - top_p?: number | null; - - @ApiProperty({ - example: 1622470423, - description: 'Timestamp of when the assistant was created.', - type: 'number', - }) - created_at: number; - - @ApiProperty({ - example: { format: 'json' }, - description: 'The response format option for the assistant.', - type: 'object', - }) - @IsOptional() - response_format?: AssistantResponseFormatOption; - - @ApiProperty({ - example: { resources: ['database1', 'database2'] }, - description: 'Tool resources for the assistant.', - type: 'object', - }) - @IsOptional() - tool_resources?: AssistantToolResources; -} diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts deleted file mode 100644 index 7ae7f3694..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/create-thread.dto.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { IsArray } from 'class-validator'; -import { Thread } from '@/domain/models/thread.interface'; -import { CreateThreadAssistantDto } from './create-thread-assistant.dto'; -import { ApiProperty } from '@nestjs/swagger'; - -export class CreateThreadDto implements Partial { - @ApiProperty({ description: "The details of the thread's settings." }) - @IsArray() - assistants: CreateThreadAssistantDto[]; -} diff --git a/cortex-js/src/infrastructure/dtos/threads/delete-message.dto.ts b/cortex-js/src/infrastructure/dtos/threads/delete-message.dto.ts deleted file mode 100644 index 4277143c3..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/delete-message.dto.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export default class DeleteMessageDto { - @ApiProperty({ - example: 'message_123', - description: 'The identifier of the message that was deleted.', - }) - id: string; - - @ApiProperty({ - example: 'message', - description: "Type of the object, indicating it's a message.", - default: 'message', - }) - object: string; - - @ApiProperty({ - example: true, - description: 'Indicates whether the message was successfully deleted.', - }) - deleted: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/threads/delete-thread.dto.ts b/cortex-js/src/infrastructure/dtos/threads/delete-thread.dto.ts deleted file mode 100644 index 5e045cf64..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/delete-thread.dto.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class DeleteThreadResponseDto { - @ApiProperty({ - example: 'thread_123', - description: 'The identifier of the thread that was deleted.', - }) - id: string; - - @ApiProperty({ - example: 'thread', - description: "Type of the object, indicating it's a thread.", - default: 'thread', - }) - object: string; - - @ApiProperty({ - example: true, - description: 'Indicates whether the thread was successfully deleted.', - }) - deleted: boolean; -} diff --git a/cortex-js/src/infrastructure/dtos/threads/get-thread.dto.ts b/cortex-js/src/infrastructure/dtos/threads/get-thread.dto.ts deleted file mode 100644 index 19016a1ac..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/get-thread.dto.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; - -export class GetThreadResponseDto { - @ApiProperty({ - example: 'thread_abc123', - description: 'The identifier of the thread.', - }) - id: string; - - @ApiProperty({ example: 'thread', description: 'Type of the object' }) - object: string; - - @ApiProperty({ - example: 1699014083, - description: 'Unix timestamp representing the creation time of the thread.', - type: 'integer', - }) - created_at: number; - - @ApiProperty({ - example: ['assistant-001'], - description: 'List of assistants involved in the thread.', - type: [String], - }) - assistants: string[]; - - @ApiProperty({ - example: {}, - description: 'Metadata associated with the thread.', - }) - metadata: object; - - @ApiProperty({ - example: [], - description: 'List of messages within the thread.', - type: [String], - }) - messages: string[]; -} diff --git a/cortex-js/src/infrastructure/dtos/threads/update-message.dto.ts b/cortex-js/src/infrastructure/dtos/threads/update-message.dto.ts deleted file mode 100644 index 1ced3e4cb..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/update-message.dto.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { PartialType } from '@nestjs/mapped-types'; -import { MessageEntity } from '@/infrastructure/entities/message.entity'; - -export class UpdateMessageDto extends PartialType(MessageEntity) {} diff --git a/cortex-js/src/infrastructure/dtos/threads/update-thread.dto.ts b/cortex-js/src/infrastructure/dtos/threads/update-thread.dto.ts deleted file mode 100644 index c97704689..000000000 --- a/cortex-js/src/infrastructure/dtos/threads/update-thread.dto.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { PartialType } from '@nestjs/mapped-types'; -import { CreateThreadDto } from './create-thread.dto'; - -export class UpdateThreadDto extends PartialType(CreateThreadDto) {} diff --git a/cortex-js/src/infrastructure/entities/assistant.entity.ts b/cortex-js/src/infrastructure/entities/assistant.entity.ts deleted file mode 100644 index 132a14ad9..000000000 --- a/cortex-js/src/infrastructure/entities/assistant.entity.ts +++ /dev/null @@ -1,96 +0,0 @@ -import { - Table, - Column, - Model, - PrimaryKey, - DataType, -} from 'sequelize-typescript'; -import { Assistant } from '@/domain/models/assistant.interface'; -import type { - AssistantToolResources, - AssistantResponseFormatOption, -} from '@/domain/models/assistant.interface'; - -@Table({ tableName: 'assistants', timestamps: false }) -export class AssistantEntity extends Model implements Assistant { - @PrimaryKey - @Column({ - type: DataType.STRING, - }) - id: string; - - @Column({ - type: DataType.STRING, - allowNull: true, - }) - avatar?: string; - - @Column({ - type: DataType.STRING, - defaultValue: 'assistant', - }) - object: 'assistant'; - - @Column({ - type: DataType.INTEGER, - }) - created_at: number; - - @Column({ - type: DataType.STRING, - allowNull: true, - }) - name: string | null; - - @Column({ - type: DataType.STRING, - allowNull: true, - }) - description: string | null; - - @Column({ - type: DataType.STRING, - }) - model: string; - - @Column({ - type: DataType.STRING, - allowNull: true, - }) - instructions: string | null; - - @Column({ - type: DataType.JSON, - }) - tools: any; - - @Column({ - type: DataType.JSON, - allowNull: true, - }) - metadata: any | null; - - @Column({ - type: DataType.FLOAT, - allowNull: true, - }) - top_p: number | null; - - @Column({ - type: DataType.FLOAT, - allowNull: true, - }) - temperature: number | null; - - @Column({ - type: DataType.JSON, - allowNull: true, - }) - response_format: AssistantResponseFormatOption | null; - - @Column({ - type: DataType.JSON, - allowNull: true, - }) - tool_resources: AssistantToolResources | null; -} diff --git a/cortex-js/src/infrastructure/entities/message.entity.ts b/cortex-js/src/infrastructure/entities/message.entity.ts deleted file mode 100644 index c40beaecc..000000000 --- a/cortex-js/src/infrastructure/entities/message.entity.ts +++ /dev/null @@ -1,94 +0,0 @@ -import { - Table, - Column, - Model, - PrimaryKey, - DataType, -} from 'sequelize-typescript'; -import type { - Message, - MessageContent, - MessageIncompleteDetails, - MessageAttachment, -} from '@/domain/models/message.interface'; - -@Table({ tableName: 'messages', timestamps: false }) -export class MessageEntity extends Model implements Message { - @PrimaryKey - @Column({ - type: DataType.STRING, - }) - id: string; - - @Column({ - type: DataType.STRING, - defaultValue: 'thread.message', - }) - object: 'thread.message'; - - @Column({ - type: DataType.STRING, - }) - thread_id: string; - - @Column({ - type: DataType.STRING, - allowNull: true, - }) - assistant_id: string | null; - - @Column({ - type: DataType.STRING, - }) - role: 'user' | 'assistant'; - - @Column({ - type: DataType.STRING, - }) - status: 'in_progress' | 'incomplete' | 'completed'; - - @Column({ - type: DataType.JSON, - allowNull: true, - }) - metadata: any | null; - - @Column({ - type: DataType.STRING, - allowNull: true, - }) - run_id: string | null; - - @Column({ - type: DataType.INTEGER, - allowNull: true, - }) - completed_at: number | null; - - @Column({ - type: DataType.JSON, - }) - content: MessageContent[]; - - @Column({ - type: DataType.JSON, - allowNull: true, - }) - incomplete_details: MessageIncompleteDetails | null; - - @Column({ - type: DataType.INTEGER, - }) - created_at: number; - - @Column({ - type: DataType.JSON, - }) - attachments: MessageAttachment[]; - - @Column({ - type: DataType.INTEGER, - allowNull: true, - }) - incomplete_at: number | null; -} diff --git a/cortex-js/src/infrastructure/entities/thread.entity.ts b/cortex-js/src/infrastructure/entities/thread.entity.ts deleted file mode 100644 index 2f02e5ff8..000000000 --- a/cortex-js/src/infrastructure/entities/thread.entity.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { - Table, - Column, - Model, - PrimaryKey, - DataType, -} from 'sequelize-typescript'; -import type { - Thread, - ThreadToolResources, -} from '@/domain/models/thread.interface'; -import { AssistantEntity } from './assistant.entity'; - -@Table({ tableName: 'threads', timestamps: false }) -export class ThreadEntity extends Model implements Thread { - @PrimaryKey - @Column({ - type: DataType.STRING, - }) - id: string; - - @Column({ - type: DataType.STRING, - defaultValue: 'thread', - }) - object: 'thread'; - - @Column({ - type: DataType.STRING, - }) - title: string; - - @Column({ - type: DataType.JSON, - }) - assistants: AssistantEntity[]; - - @Column({ - type: DataType.INTEGER, - }) - created_at: number; - - @Column({ - type: DataType.JSON, - allowNull: true, - }) - tool_resources: ThreadToolResources | null; - - @Column({ - type: DataType.JSON, - allowNull: true, - }) - metadata: any | null; -} diff --git a/cortex-js/src/infrastructure/exception/duplicate-assistant.exception.ts b/cortex-js/src/infrastructure/exception/duplicate-assistant.exception.ts deleted file mode 100644 index 8b328b9c4..000000000 --- a/cortex-js/src/infrastructure/exception/duplicate-assistant.exception.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { HttpException, HttpStatus } from '@nestjs/common'; - -export class DuplicateAssistantException extends HttpException { - constructor(assistantId: string) { - super( - `Assistant with the id ${assistantId} is already exists.`, - HttpStatus.CONFLICT, - ); - } -} diff --git a/cortex-js/src/infrastructure/exception/global.exception.ts b/cortex-js/src/infrastructure/exception/global.exception.ts deleted file mode 100644 index 3c254aa03..000000000 --- a/cortex-js/src/infrastructure/exception/global.exception.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; -import { TelemetryUsecases } from '@/usecases/telemetry/telemetry.usecases'; -import { - ArgumentsHost, - Catch, - ExceptionFilter, - HttpException, - HttpStatus, -} from '@nestjs/common'; -import { Response } from 'express'; - -@Catch() -export class GlobalExceptionFilter implements ExceptionFilter { - constructor(private readonly telemetryService: TelemetryUsecases) {} - async catch(exception: HttpException | Error, host: ArgumentsHost) { - const isHttpException = exception instanceof HttpException; - const httpStatus = isHttpException - ? exception.getStatus() - : HttpStatus.INTERNAL_SERVER_ERROR; - - const message = isHttpException - ? exception.message - : 'Internal Server Error'; - - const response = host.switchToHttp().getResponse(); - - if (!isHttpException || httpStatus === HttpStatus.INTERNAL_SERVER_ERROR) { - await this.telemetryService.createCrashReport( - exception, - TelemetrySource.CORTEX_SERVER, - ); - await this.telemetryService.sendCrashReport(); - } - response.status(httpStatus).json({ - statusCode: httpStatus, - message, - }); - } -} diff --git a/cortex-js/src/infrastructure/exception/model-not-found.exception.ts b/cortex-js/src/infrastructure/exception/model-not-found.exception.ts deleted file mode 100644 index f71cb8a1c..000000000 --- a/cortex-js/src/infrastructure/exception/model-not-found.exception.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { HttpException, HttpStatus } from '@nestjs/common'; - -export class ModelNotFoundException extends HttpException { - constructor(modelId: string) { - super(`Model ${modelId} not found!`, HttpStatus.NOT_FOUND); - } -} diff --git a/cortex-js/src/infrastructure/exception/model-setting-not-found.exception.ts b/cortex-js/src/infrastructure/exception/model-setting-not-found.exception.ts deleted file mode 100644 index 3301cfc80..000000000 --- a/cortex-js/src/infrastructure/exception/model-setting-not-found.exception.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { HttpException, HttpStatus } from '@nestjs/common'; - -export class ModelSettingNotFoundException extends HttpException { - constructor(engine: string) { - super(`Model setting for ${engine} not found!`, HttpStatus.NOT_FOUND); - } -} diff --git a/cortex-js/src/infrastructure/interceptors/transform.interceptor.ts b/cortex-js/src/infrastructure/interceptors/transform.interceptor.ts deleted file mode 100644 index 1cc5a44fe..000000000 --- a/cortex-js/src/infrastructure/interceptors/transform.interceptor.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { - CallHandler, - ExecutionContext, - Injectable, - NestInterceptor, -} from '@nestjs/common'; -import { Observable } from 'rxjs'; -import { map } from 'rxjs/operators'; - -export interface Response { - data: T; -} - -@Injectable() -export class TransformInterceptor - implements NestInterceptor> -{ - intercept( - context: ExecutionContext, - next: CallHandler, - ): Observable> { - return next.handle().pipe( - map((data) => { - return Array.isArray(data) - ? { - object: 'list', - data, - } - : data; - }), - ); - } -} diff --git a/cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts b/cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts deleted file mode 100644 index 1dbbd0a1e..000000000 --- a/cortex-js/src/infrastructure/middlewares/app.logger.middleware.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; -import { Injectable, NestMiddleware, Logger } from '@nestjs/common'; - -import { Request, Response, NextFunction } from 'express'; -import { ContextService } from '../services/context/context.service'; - -@Injectable() -export class AppLoggerMiddleware implements NestMiddleware { - constructor(private readonly contextService: ContextService) {} - private logger = new Logger('HTTP'); - - use(req: Request, res: Response, next: NextFunction): void { - const userAgent = req.get('user-agent') ?? ''; - const { ip, method, path: url, originalUrl } = req; - //Setting the x-correlation-id - const correlationHeader = req.header('x-correlation-id') ?? ''; - req.headers['x-correlation-id'] = correlationHeader; - res.set('X-Correlation-Id', correlationHeader); - res.on('close', () => { - const { statusCode } = res; - const contentLength = res.get('content-length'); - this.logger.log( - JSON.stringify({ - method: method, - path: originalUrl ?? url, - statusCode: statusCode, - ip: ip, - content_length: contentLength, - user_agent: userAgent, - x_correlation_id: req.headers['x-correlation-id'], - }), - ); - }); - this.contextService.init(() => { - this.contextService.set('endpoint', originalUrl ?? url); - this.contextService.set('source', TelemetrySource.CORTEX_SERVER); - next(); - }); - } -} diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.module.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.module.ts deleted file mode 100644 index 42d80effd..000000000 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.module.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { Module } from '@nestjs/common'; -import CortexProvider from './cortex.provider'; -import { HttpModule } from '@nestjs/axios'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import Onnxprovider from './onnx.provider'; -import LlamaCPPProvider from './llamacpp.provider'; -import TensorrtLLMProvider from './tensorrtllm.provider'; - -@Module({ - imports: [HttpModule, FileManagerModule], - providers: [ - { - provide: 'CORTEX_PROVIDER', - useClass: CortexProvider, - }, - Onnxprovider, - LlamaCPPProvider, - TensorrtLLMProvider, - ], - exports: [ - { - provide: 'CORTEX_PROVIDER', - useClass: CortexProvider, - }, - Onnxprovider, - LlamaCPPProvider, - TensorrtLLMProvider, - ], -}) -export class CortexProviderModule {} diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts deleted file mode 100644 index 810aa8b92..000000000 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ /dev/null @@ -1,183 +0,0 @@ -import { HttpStatus, Injectable } from '@nestjs/common'; -import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract'; -import { PromptTemplate } from '@/domain/models/prompt-template.interface'; -import { join } from 'path'; -import { Model, ModelSettingParams } from '@/domain/models/model.interface'; -import { HttpService } from '@nestjs/axios'; -import { - CORTEX_CPP_MODELS_URL, - defaultCortexCppHost, - defaultCortexCppPort, -} from '@/infrastructure/constants/cortex'; -import { readdirSync } from 'node:fs'; -import { normalizeModelId } from '@/utils/normalize-model-id'; -import { firstValueFrom } from 'rxjs'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { existsSync, readFileSync } from 'fs'; - -export interface ModelStatResponse { - object: string; - data: any; -} - -@Injectable() -export default class CortexProvider extends OAIEngineExtension { - apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`; - name = 'cortex'; - productName = 'Cortex Inference Engine'; - description = - 'This extension enables chat completion API calls using the Cortex engine'; - version = '0.0.1'; - apiKey?: string | undefined; - - private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`; - private unloadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/unloadmodel`; - - constructor(protected readonly httpService: HttpService) { - super(httpService); - this.persistEngineVersion(); - } - - // Override the inference method to make an inference request to the engine - override async loadModel( - model: Model, - settings?: ModelSettingParams, - ): Promise { - const modelsContainerDir = await fileManagerService.getModelsPath(); - - let llama_model_path = settings?.llama_model_path; - if (!llama_model_path) { - const modelFolderFullPath = join( - modelsContainerDir, - normalizeModelId(model.model), - ); - const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => { - return file.endsWith('.gguf'); - }); - - if (ggufFiles.length === 0) { - throw new Error('Model binary not found'); - } - - const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]); - llama_model_path = modelBinaryLocalPath; - } - - const cpuThreadCount = 1; // TODO: Math.max(1, nitroResourceProbe.numCpuPhysicalCore); - const modelSettings = { - // This is critical and requires real CPU physical core count (or performance core) - cpu_threads: cpuThreadCount, - ...model, - ...settings, - llama_model_path, - ...('mmproj' in model.files && - model.files.mmproj && { - mmproj: settings?.mmproj, - }), - }; - - // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt - if (model.prompt_template) { - const promptTemplate = model.prompt_template; - const prompt = this.promptTemplateConverter(promptTemplate); - if (prompt?.error) { - throw new Error(prompt.error); - } - modelSettings.system_prompt = prompt.system_prompt; - modelSettings.user_prompt = prompt.user_prompt; - modelSettings.ai_prompt = prompt.ai_prompt; - } - return firstValueFrom( - this.httpService.post(this.loadModelUrl, modelSettings), - ).then(); // pipe error or void instead of throwing - } - - override async unloadModel(modelId: string, engine?: string): Promise { - return firstValueFrom( - this.httpService.post(this.unloadModelUrl, { model: modelId, engine }), - ).then(); // pipe error or void instead of throwing - } - - // Override the isModelRunning method to check if the model is running - override async isModelRunning(modelId: string): Promise { - const configs = await fileManagerService.getConfig(); - - return firstValueFrom( - this.httpService.get( - CORTEX_CPP_MODELS_URL(configs.cortexCppHost, configs.cortexCppPort), - ), - ) - .then((res) => { - const data = res.data as ModelStatResponse; - if ( - res.status === HttpStatus.OK && - data && - Array.isArray(data.data) && - data.data.length > 0 - ) { - return data.data.find((e) => e.id === modelId); - } - return false; - }) - .catch(() => false); - } - - private readonly promptTemplateConverter = ( - promptTemplate: string, - ): PromptTemplate => { - // Split the string using the markers - const systemMarker = '{system_message}'; - const promptMarker = '{prompt}'; - - if ( - promptTemplate.includes(systemMarker) && - promptTemplate.includes(promptMarker) - ) { - // Find the indices of the markers - const systemIndex = promptTemplate.indexOf(systemMarker); - const promptIndex = promptTemplate.indexOf(promptMarker); - - // Extract the parts of the string - const system_prompt = promptTemplate.substring(0, systemIndex); - const user_prompt = promptTemplate.substring( - systemIndex + systemMarker.length, - promptIndex, - ); - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length, - ); - - // Return the split parts - return { system_prompt, user_prompt, ai_prompt }; - } else if (promptTemplate.includes(promptMarker)) { - // Extract the parts of the string for the case where only promptMarker is present - const promptIndex = promptTemplate.indexOf(promptMarker); - const user_prompt = promptTemplate.substring(0, promptIndex); - const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length, - ); - - // Return the split parts - return { user_prompt, ai_prompt }; - } - - // Return an error if none of the conditions are met - return { error: 'Cannot split prompt template' }; - }; - - public setUrls(host: string, port: number): void { - this.apiUrl = `http://${host}:${port}/inferences/server/chat_completion`; - this.loadModelUrl = `http://${host}:${port}/inferences/server/loadmodel`; - this.unloadModelUrl = `http://${host}:${port}/inferences/server/unloadmodel`; - } - - private persistEngineVersion = async () => { - const versionFilePath = join( - await fileManagerService.getCortexCppEnginePath(), - this.name, - 'version.txt', - ); - if (existsSync(versionFilePath)) - this.version = readFileSync(versionFilePath, 'utf-8'); - }; -} diff --git a/cortex-js/src/infrastructure/providers/cortex/llamacpp.provider.ts b/cortex-js/src/infrastructure/providers/cortex/llamacpp.provider.ts deleted file mode 100644 index 85a76c55b..000000000 --- a/cortex-js/src/infrastructure/providers/cortex/llamacpp.provider.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Injectable } from '@nestjs/common'; -import CortexProvider from './cortex.provider'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; - -@Injectable() -export default class LlamaCPPProvider extends CortexProvider { - name = Engines.llamaCPP; - productName = 'LlamaCPP Inference Engine'; - description = - 'This extension enables chat completion API calls using the LlamaCPP engine'; -} diff --git a/cortex-js/src/infrastructure/providers/cortex/onnx.provider.ts b/cortex-js/src/infrastructure/providers/cortex/onnx.provider.ts deleted file mode 100644 index 4a3ef2fa9..000000000 --- a/cortex-js/src/infrastructure/providers/cortex/onnx.provider.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Injectable } from '@nestjs/common'; -import CortexProvider from './cortex.provider'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; - -@Injectable() -export default class Onnxprovider extends CortexProvider { - name = Engines.onnx; - productName = 'Onnx Inference Engine'; - description = - 'This extension enables chat completion API calls using the Onnx engine'; -} diff --git a/cortex-js/src/infrastructure/providers/cortex/tensorrtllm.provider.ts b/cortex-js/src/infrastructure/providers/cortex/tensorrtllm.provider.ts deleted file mode 100644 index 65c63b489..000000000 --- a/cortex-js/src/infrastructure/providers/cortex/tensorrtllm.provider.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Injectable } from '@nestjs/common'; -import CortexProvider from './cortex.provider'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; - -@Injectable() -export default class TensorrtLLMProvider extends CortexProvider { - name = Engines.tensorrtLLM; - productName = 'TensorrtLLM Inference Engine'; - description = - 'This extension enables chat completion API calls using the TensorrtLLM engine'; -} diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts deleted file mode 100644 index 0c29d756c..000000000 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.module.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ExtensionRepositoryImpl } from './extension.repository'; -import { ExtensionRepository } from '@/domain/repositories/extension.interface'; -import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module'; -import { HttpModule } from '@nestjs/axios'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { ExtensionsModule } from '@/extensions/extensions.module'; - -@Module({ - imports: [ - CortexProviderModule, - HttpModule, - FileManagerModule, - ExtensionsModule, - ], - providers: [ - { - provide: ExtensionRepository, - useClass: ExtensionRepositoryImpl, - }, - ], - exports: [ExtensionRepository], -}) -export class ExtensionModule {} diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts deleted file mode 100644 index 86ddef9d9..000000000 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts +++ /dev/null @@ -1,159 +0,0 @@ -import { Inject, Injectable } from '@nestjs/common'; -import { ExtensionRepository } from '@/domain/repositories/extension.interface'; -import { Extension } from '@/domain/abstracts/extension.abstract'; -import { readdir, lstat } from 'fs/promises'; -import { join } from 'path'; -import { - fileManagerService, - FileManagerService, -} from '@/infrastructure/services/file-manager/file-manager.service'; -import { existsSync, mkdirSync, watch } from 'fs'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; -import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract'; -import { HttpService } from '@nestjs/axios'; -import LlamaCPPProvider from '@/infrastructure/providers/cortex/llamacpp.provider'; -import Onnxprovider from '@/infrastructure/providers/cortex/onnx.provider'; -import TensorrtLLMProvider from '@/infrastructure/providers/cortex/tensorrtllm.provider'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; - -@Injectable() -export class ExtensionRepositoryImpl implements ExtensionRepository { - // Initialize the Extensions Map with the key-value pairs of the core providers. - extensions = new Map(); - - constructor( - @Inject('EXTENSIONS_PROVIDER') - private readonly coreExtensions: OAIEngineExtension[], - private readonly httpService: HttpService, - ) { - this.loadCoreExtensions(); - this.loadExternalExtensions(); - - // Watch engine folder only for now - fileManagerService.getCortexCppEnginePath().then((path) => { - if (!existsSync(path)) mkdirSync(path); - watch(path, () => { - this.extensions.clear(); - this.loadCoreExtensions(); - this.loadExternalExtensions(); - }); - }); - } - /** - * Persist extension to the extensions map - * @param object - * @returns - */ - create(object: Extension): Promise { - this.extensions.set(object.name ?? '', object); - return Promise.resolve(object); - } - - /** - * Find all extensions - * @returns - */ - findAll(): Promise { - return Promise.resolve(Array.from(this.extensions.values())); - } - - /** - * Find one extension by id - * @param id - * @returns - */ - findOne(id: string): Promise { - return Promise.resolve(this.extensions.get(id) ?? null); - } - - /** - * Update extension - * It is not applicable - */ - update(): Promise { - throw new Error('Method not implemented.'); - } - - /** - * Remove extension from the extensions map - * @param id - * @returns - */ - remove(id: string): Promise { - this.extensions.delete(id); - return Promise.resolve(); - } - - private async loadCoreExtensions() { - const { cortexCppPort, cortexCppHost } = - await fileManagerService.getConfig(); - const llamaCPPEngine = new LlamaCPPProvider(this.httpService); - llamaCPPEngine.setUrls(cortexCppHost, cortexCppPort); - llamaCPPEngine.status = existsSync( - join(await fileManagerService.getCortexCppEnginePath(), Engines.llamaCPP), - ) - ? EngineStatus.READY - : EngineStatus.NOT_INITIALIZED; - - const onnxEngine = new Onnxprovider(this.httpService); - onnxEngine.setUrls(cortexCppHost, cortexCppPort); - onnxEngine.status = - existsSync( - join(await fileManagerService.getCortexCppEnginePath(), Engines.onnx), - ) && process.platform === 'win32' - ? EngineStatus.READY - : process.platform !== 'win32' - ? EngineStatus.NOT_SUPPORTED - : EngineStatus.NOT_INITIALIZED; - - const tensorrtLLMEngine = new TensorrtLLMProvider(this.httpService); - onnxEngine.setUrls(cortexCppHost, cortexCppPort); - tensorrtLLMEngine.status = - existsSync( - join( - await fileManagerService.getCortexCppEnginePath(), - Engines.tensorrtLLM, - ), - ) && process.platform !== 'darwin' - ? EngineStatus.READY - : process.platform === 'darwin' - ? EngineStatus.NOT_SUPPORTED - : EngineStatus.NOT_INITIALIZED; - - await llamaCPPEngine.onLoad(); - await onnxEngine.onLoad(); - await tensorrtLLMEngine.onLoad(); - - this.extensions.set(Engines.llamaCPP, llamaCPPEngine); - this.extensions.set(Engines.onnx, onnxEngine); - this.extensions.set(Engines.tensorrtLLM, tensorrtLLMEngine); - - for (const extension of this.coreExtensions) { - await extension.onLoad(); - this.extensions.set(extension.name, extension); - } - } - - private async loadExternalExtensions() { - const extensionsPath = - process.env.EXTENSIONS_PATH ?? - (await fileManagerService.getExtensionsPath()); - this.loadExtensions(extensionsPath); - } - - private async loadExtensions(extensionsPath: string) { - if (!existsSync(extensionsPath)) return; - - readdir(extensionsPath).then((files) => { - files.forEach(async (extension) => { - const extensionFullPath = join(extensionsPath, extension); - if (!(await lstat(extensionFullPath)).isDirectory()) return; - - import(extensionFullPath).then((extensionClass) => { - const newExtension = new extensionClass.default(); - this.extensions.set(extension, newExtension); - }); - }); - }); - } -} diff --git a/cortex-js/src/infrastructure/repositories/models/model.module.ts b/cortex-js/src/infrastructure/repositories/models/model.module.ts deleted file mode 100644 index c5bcda706..000000000 --- a/cortex-js/src/infrastructure/repositories/models/model.module.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { Module } from '@nestjs/common'; -import { CortexProviderModule } from '@/infrastructure/providers/cortex/cortex.module'; -import { HttpModule } from '@nestjs/axios'; -import { ModelRepository } from '@/domain/repositories/model.interface'; -import { ModelRepositoryImpl } from './model.repository'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; - -@Module({ - imports: [ - CortexProviderModule, - HttpModule, - FileManagerModule, - DownloadManagerModule, - ], - providers: [ - { - provide: ModelRepository, - useClass: ModelRepositoryImpl, - }, - ], - exports: [ModelRepository], -}) -export class ModelRepositoryModule {} diff --git a/cortex-js/src/infrastructure/repositories/models/model.repository.ts b/cortex-js/src/infrastructure/repositories/models/model.repository.ts deleted file mode 100644 index c2f9fdc04..000000000 --- a/cortex-js/src/infrastructure/repositories/models/model.repository.ts +++ /dev/null @@ -1,194 +0,0 @@ -import { Injectable } from '@nestjs/common'; -import { join, extname, basename } from 'path'; -import { ModelRepository } from '@/domain/repositories/model.interface'; -import { Model } from '@/domain/models/model.interface'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { - existsSync, - mkdirSync, - readFileSync, - readdirSync, - rmSync, - writeFileSync, - watch, -} from 'fs'; -import { load, dump } from 'js-yaml'; -import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id'; - -@Injectable() -export class ModelRepositoryImpl implements ModelRepository { - // Initialize the Extensions Map with the key-value pairs of the core providers. - models = new Map([]); - // Map between files and models. E.g. llama3:7b -> llama3-7b.yaml - fileModel = new Map([]); - // Check whether the models have been loaded or not. - loaded = false; - - constructor() { - this.loadModels(); - fileManagerService.getModelsPath().then((path) => { - if (!existsSync(path)) mkdirSync(path); - watch(path, (eventType, filename) => { - this.loadModels(true); - }); - }); - } - - /** - * Create a new model - * This would persist the model yaml file to the models folder - * @param object - * @returns the created model - */ - async create(object: Model): Promise { - const modelsFolderPath = join( - await fileManagerService.getDataFolderPath(), - 'models', - ); - const modelYaml = dump(object); - if (!existsSync(modelsFolderPath)) mkdirSync(modelsFolderPath); - const modelsPath = - process.env.EXTENSIONS_PATH ?? (await fileManagerService.getModelsPath()); - writeFileSync( - join(modelsPath, `${normalizeModelId(object.model)}.yaml`), - modelYaml, - ); - - this.models.set(object.model ?? '', object); - return Promise.resolve(object); - } - - /** - * Find all models - * This would load all the models from the models folder - * @param object - * @returns the created model - */ - findAll(): Promise { - return this.loadModels().then((res) => - res.filter((model) => isLocalModel(model.files)), - ); - } - /** - * Find one model by id - * @param id model id - * @returns the model - */ - findOne(id: string): Promise { - return this.loadModels().then(() => this.models.get(id) ?? null); - } - - /** - * Update a model - * This would update the model yaml file in the models folder - * @param id model id - * @param object model object - */ - async update(id: string, object: Partial): Promise { - const originalModel = await this.findOne(id); - if (!originalModel) throw new Error('Model not found'); - - const updatedModel = { - ...originalModel, - ...object, - } satisfies Model; - - const modelYaml = dump(updatedModel); - const modelsPath = - process.env.EXTENSIONS_PATH ?? (await fileManagerService.getModelsPath()); - - writeFileSync( - join( - modelsPath, - this.fileModel.get(id) ?? `${normalizeModelId(id)}.yaml`, - ), - modelYaml, - ); - - this.models.set(id ?? '', updatedModel); - } - - /** - * Remove a model - * This would remove the model yaml file from the models folder - * @param id model id - */ - async remove(id: string): Promise { - this.models.delete(id); - const yamlFilePath = join( - await fileManagerService.getModelsPath(), - this.fileModel.get(id) ?? id, - ); - if (existsSync(yamlFilePath)) rmSync(yamlFilePath); - return Promise.resolve(); - } - - /** - * Load all models - * This would load all the models from the models folder - * @returns the list of models - */ - private async loadModels(forceReload: boolean = false): Promise { - if (this.loaded && !forceReload) return Array.from(this.models.values()); - const modelsPath = - process.env.EXTENSIONS_PATH ?? (await fileManagerService.getModelsPath()); - - this.models.clear(); - this.fileModel.clear(); - - if (!existsSync(modelsPath)) return []; - - const modelFiles = readdirSync(modelsPath) - .filter( - (file) => - extname(file).toLowerCase() === '.yaml' || - extname(file).toLowerCase() === '.yml', - ) - .map((file) => join(modelsPath, file)); - - modelFiles.forEach(async (modelFile) => { - const model = readFileSync(modelFile, 'utf8'); - const yamlObject = load(model) as Model; - const fileName = basename(modelFile); - - if (yamlObject) { - this.fileModel.set(yamlObject.model, fileName); - this.models.set(yamlObject.model, yamlObject); - } - }); - this.loaded = true; - return Array.from(this.models.values()); - } - - /** - * Load a model by file - * This would load a model from a file - * @returns the model - */ - async loadModelByFile( - modelId: string, - modelPath: string, - modelFile: string, - ): Promise { - const checkExists = await this.findOne(modelId); - if (checkExists) return checkExists; - if (!existsSync(modelPath)) return null; - - const model = readFileSync(modelPath, 'utf8'); - const yamlObject = load(model) as Model; - const fileName = basename(modelId); - const modelObject = { - ...yamlObject, - model: modelId, - llama_model_path: modelFile, - model_path: modelFile, - files: [modelFile], - }; - if (modelObject) { - this.fileModel.set(modelId, fileName); - this.models.set(modelId, modelObject); - } - this.loaded = true; - return modelObject; - } -} diff --git a/cortex-js/src/infrastructure/repositories/telemetry/telemetry.repository.ts b/cortex-js/src/infrastructure/repositories/telemetry/telemetry.repository.ts deleted file mode 100644 index d276e01a9..000000000 --- a/cortex-js/src/infrastructure/repositories/telemetry/telemetry.repository.ts +++ /dev/null @@ -1,276 +0,0 @@ -import * as cypto from 'crypto'; -import os from 'os'; -import systemInformation from 'systeminformation'; -import { TelemetryRepository } from '@/domain/repositories/telemetry.interface'; -import { - Attribute, - CrashReportAttributes, - TelemetrySource, - TelemetryEvent, - TelemetryResource, - Telemetry, - TelemetryEventMetadata, - EventAttributes, - TelemetryAnonymized, - BenchmarkHardware, -} from '@/domain/telemetry/telemetry.interface'; -import { Injectable } from '@nestjs/common'; -import { join } from 'path'; -import packageJson from '@/../package.json'; -import axios from 'axios'; -import { telemetryServerUrl } from '@/infrastructure/constants/cortex'; -import { ModelStat } from '@/infrastructure/commanders/types/model-stat.interface'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -// refactor using convert to dto -@Injectable() -export class TelemetryRepositoryImpl implements TelemetryRepository { - private readonly telemetryResource: TelemetryResource = { - osName: process.platform, - osVersion: process.version, - architecture: process.arch, - appVersion: packageJson.version, - }; - - private readonly crashReportFileName = 'crash-report.jsonl'; - private readonly anonymizedDataFileName = 'session.json'; - constructor() {} - - private async getTelemetryDirectory(): Promise { - const dataFolderPath = await fileManagerService.getDataFolderPath(); - await fileManagerService.createFolderIfNotExistInDataFolder('telemetry'); - return join(dataFolderPath, 'telemetry'); - } - - private generateChecksum(data: any): string { - const hash = cypto.createHash('sha256'); - hash.update(JSON.stringify(data)); - return hash.digest('hex'); - } - - async sendTelemetryToServer( - telemetryEvent: Telemetry['event'], - type: 'crash-report' | 'metrics' = 'crash-report', - ): Promise { - try { - await axios.post(`${telemetryServerUrl}/api/v1/${type}`, telemetryEvent, { - headers: { - 'Content-Type': 'application/json', - 'cortex-checksum': this.generateChecksum(telemetryEvent), - timeout: 1000, - }, - }); - } catch (error) {} - } - - async sendBenchmarkToServer(data: { - hardware: BenchmarkHardware; - results: any; - metrics: any; - model: ModelStat; - sessionId: string; - }): Promise { - try { - await axios.post(`${telemetryServerUrl}/api/v1/benchmark`, data, { - headers: { - 'Content-Type': 'application/json', - 'cortex-checksum': this.generateChecksum(data), - }, - }); - } catch (error) {} - } - - async sendTelemetryToOTelCollector(endpoint: string, telemetry: Telemetry) { - try { - console.log('Sending telemetry to OTel collector'); - await axios.post(`${endpoint}/v1/logs`, telemetry.event, { - headers: { - 'Content-Type': 'application/json', - }, - }); - } catch (error) {} - } - - async markLastCrashReportAsSent(): Promise { - try { - const { data, position } = await fileManagerService.getLastLine( - join(await this.getTelemetryDirectory(), this.crashReportFileName), - ); - const Telemetry = JSON.parse(data) as Telemetry; - Telemetry.metadata.sentAt = new Date().toISOString(); - await fileManagerService.modifyLine( - join(await this.getTelemetryDirectory(), this.crashReportFileName), - JSON.stringify(Telemetry), - position, - ); - } catch (error) {} - } - - async readCrashReports(callback: (Telemetry: Telemetry) => void) { - fileManagerService.readLines( - join(await this.getTelemetryDirectory(), this.crashReportFileName), - (line: string) => { - const data = JSON.parse(line) as Telemetry; - callback(data); - }, - ); - } - - async getLastCrashReport(): Promise { - try { - await fileManagerService.createFolderIfNotExistInDataFolder('telemetry'); - const { data } = await fileManagerService.getLastLine( - join(await this.getTelemetryDirectory(), this.crashReportFileName), - ); - if (!data) { - return null; - } - return JSON.parse(data) as Telemetry; - } catch (error) { - return null; - } - } - - async createCrashReport( - crashReport: CrashReportAttributes, - source: TelemetrySource, - ): Promise { - const telemetryEvent = await this.convertToTelemetryEvent({ - attributes: crashReport, - source, - type: 'crash-report', - }); - const metadata: TelemetryEventMetadata = { - createdAt: new Date().toISOString(), - sentAt: null, - }; - return fileManagerService.append( - join(await this.getTelemetryDirectory(), this.crashReportFileName), - JSON.stringify({ - metadata, - event: { - resourceLogs: [telemetryEvent], - }, - } as Telemetry), - ); - } - - private async convertToTelemetryEvent({ - attributes, - source, - type, - }: { - attributes: CrashReportAttributes | EventAttributes; - source: TelemetrySource; - type: 'crash-report' | 'metrics'; - }): Promise { - const gpus = (await systemInformation.graphics()).controllers.map( - ({ model, vendor, vram, vramDynamic }) => ({ - model, - vendor, - vram, - vramDynamic, - }), - ); - - const body = - type === 'crash-report' - ? (attributes as CrashReportAttributes).message - : (attributes as EventAttributes).name; - - const severity = type === 'crash-report' ? 'ERROR' : 'INFO'; - - const resourceAttributes: Attribute[] = Object.entries({ - ...this.telemetryResource, - 'service.name': type, - cpu: os.cpus()[0].model, - gpus: JSON.stringify(gpus), - source, - }).map(([key, value]) => ({ - key, - value: { stringValue: value }, - })); - const telemetryLogAttributes: Attribute[] = Object.entries(attributes).map( - ([key, value]) => { - if (typeof value === 'object') { - return { - key, - value: { - kvlist_value: { - values: Object.entries(value).map(([k, v]) => ({ - key: k, - value: { stringValue: v as string }, - })), - }, - }, - }; - } - return { - key, - value: { stringValue: value }, - }; - }, - ); - - return { - resource: { - attributes: resourceAttributes, - }, - scopeLogs: [ - { - scope: {}, - logRecords: [ - { - traceId: cypto.randomBytes(16).toString('hex'), - timeUnixNano: (BigInt(Date.now()) * BigInt(1000000)).toString(), - body: { stringValue: body }, - severityText: severity, - attributes: telemetryLogAttributes, - }, - ], - }, - ], - }; - } - - async sendEvent( - events: EventAttributes[], - source: TelemetrySource, - ): Promise { - const telemetryEvents = await Promise.all( - events.map(async (event) => - this.convertToTelemetryEvent({ - attributes: event, - source, - type: 'metrics', - }), - ), - ); - await this.sendTelemetryToServer( - { - resourceLogs: telemetryEvents, - }, - 'metrics', - ); - } - - async getAnonymizedData(): Promise { - const content = await fileManagerService.readFile( - join(await this.getTelemetryDirectory(), this.anonymizedDataFileName), - ); - - if (!content) { - return null; - } - - const data = JSON.parse(content) as TelemetryAnonymized; - return data; - } - - async updateAnonymousData(data: TelemetryAnonymized): Promise { - return fileManagerService.writeFile( - join(await this.getTelemetryDirectory(), this.anonymizedDataFileName), - JSON.stringify(data), - ); - } -} diff --git a/cortex-js/src/infrastructure/services/context/context.module.ts b/cortex-js/src/infrastructure/services/context/context.module.ts deleted file mode 100644 index a583baa25..000000000 --- a/cortex-js/src/infrastructure/services/context/context.module.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ContextService } from './context.service'; - -@Module({ - //move context service to usecase? - providers: [ContextService], - exports: [ContextService], -}) -export class ContextModule {} diff --git a/cortex-js/src/infrastructure/services/context/context.service.ts b/cortex-js/src/infrastructure/services/context/context.service.ts deleted file mode 100644 index e69ba06fd..000000000 --- a/cortex-js/src/infrastructure/services/context/context.service.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { Injectable } from '@nestjs/common'; -import { AsyncLocalStorage } from 'async_hooks'; - -@Injectable() -export class ContextService { - private readonly localStorage = new AsyncLocalStorage(); - - init(callback: () => T): T { - return this.localStorage.run(new Map(), callback); - } - - get(key: string): T | null { - const context = this.localStorage.getStore() as Map; - return context?.get(key) ?? null; - } - - set(key: string, value: T): void { - const context = this.localStorage.getStore() as Map; - context?.set(key, value); - } -} diff --git a/cortex-js/src/infrastructure/services/download-manager/download-manager.module.ts b/cortex-js/src/infrastructure/services/download-manager/download-manager.module.ts deleted file mode 100644 index 5d94cc006..000000000 --- a/cortex-js/src/infrastructure/services/download-manager/download-manager.module.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { Module } from '@nestjs/common'; -import { DownloadManagerService } from './download-manager.service'; -import { HttpModule } from '@nestjs/axios'; - -@Module({ - imports: [HttpModule], - providers: [DownloadManagerService], - exports: [DownloadManagerService], -}) -export class DownloadManagerModule {} diff --git a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.spec.ts b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.spec.ts deleted file mode 100644 index 8263a632d..000000000 --- a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.spec.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { DownloadManagerService } from './download-manager.service'; -import { HttpModule } from '@nestjs/axios'; -import { EventEmitterModule } from '@nestjs/event-emitter'; - -describe('DownloadManagerService', () => { - let service: DownloadManagerService; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [HttpModule, EventEmitterModule.forRoot()], - providers: [DownloadManagerService], - }).compile(); - - service = module.get(DownloadManagerService); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts deleted file mode 100644 index 6ae6550fb..000000000 --- a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts +++ /dev/null @@ -1,348 +0,0 @@ -import { - DownloadItem, - DownloadState, - DownloadStatus, - DownloadType, -} from '@/domain/models/download.interface'; -import { HttpService } from '@nestjs/axios'; -import { Injectable } from '@nestjs/common'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { Presets, SingleBar } from 'cli-progress'; -import { createWriteStream, unlinkSync } from 'node:fs'; -import { basename } from 'node:path'; -import { firstValueFrom } from 'rxjs'; -import crypto from 'crypto'; - -@Injectable() -export class DownloadManagerService { - private allDownloadStates: DownloadState[] = []; - private abortControllers: Record> = - {}; - private timeouts: Record = {}; - - constructor( - private readonly httpService: HttpService, - private readonly eventEmitter: EventEmitter2, - ) {} - - async abortDownload(downloadId: string) { - if (!this.abortControllers[downloadId]) { - return; - } - clearTimeout(this.timeouts[downloadId]); - Object.keys(this.abortControllers[downloadId]).forEach((destination) => { - this.abortControllers[downloadId][destination].abort(); - }); - delete this.abortControllers[downloadId]; - - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - this.allDownloadStates = this.allDownloadStates.filter( - (downloadState) => downloadState.id !== downloadId, - ); - - if (currentDownloadState) { - this.deleteDownloadStateFiles(currentDownloadState); - } - this.eventEmitter.emit('download.event', this.allDownloadStates); - } - - async submitDownloadRequest( - downloadId: string, - title: string, - downloadType: DownloadType, - urlToDestination: Record< - string, - { - destination: string; - checksum?: string; - } - >, - finishedCallback?: () => Promise, - inSequence: boolean = true, - ) { - if ( - this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ) - ) { - return; - } - - const downloadItems: DownloadItem[] = Object.keys(urlToDestination).map( - (url) => { - const { destination, checksum } = urlToDestination[url]; - const downloadItem: DownloadItem = { - id: destination, - time: { - elapsed: 0, - remaining: 0, - }, - size: { - total: 0, - transferred: 0, - }, - progress: 0, - status: DownloadStatus.Downloading, - checksum, - }; - - return downloadItem; - }, - ); - - const downloadState: DownloadState = { - id: downloadId, - title: title, - type: downloadType, - progress: 0, - status: DownloadStatus.Downloading, - children: downloadItems, - }; - - this.allDownloadStates.push(downloadState); - this.abortControllers[downloadId] = {}; - - const callBack = async () => { - // Await post processing callback - await finishedCallback?.(); - - // Finished - update the current downloading states - delete this.abortControllers[downloadId]; - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (currentDownloadState) { - currentDownloadState.status = DownloadStatus.Downloaded; - this.eventEmitter.emit('download.event', this.allDownloadStates); - - // remove download state if all children is downloaded - this.allDownloadStates = this.allDownloadStates.filter( - (downloadState) => downloadState.id !== downloadId, - ); - } - this.eventEmitter.emit('download.event', this.allDownloadStates); - }; - if (!inSequence) { - Promise.all( - Object.keys(urlToDestination).map((url) => { - const { destination, checksum } = urlToDestination[url]; - return this.downloadFile(downloadId, url, destination, checksum); - }), - ).then(callBack); - } else { - // Download model file in sequence - for (const url of Object.keys(urlToDestination)) { - const { destination, checksum } = urlToDestination[url]; - await this.downloadFile(downloadId, url, destination, checksum); - } - return callBack(); - } - } - - private async downloadFile( - downloadId: string, - url: string, - destination: string, - checksum?: string, - ) { - console.log('Downloading', { - downloadId, - url, - destination, - checksum, - }); - const controller = new AbortController(); - // adding to abort controllers - this.abortControllers[downloadId][destination] = controller; - return new Promise(async (resolve) => { - const response = await firstValueFrom( - this.httpService.get(url, { - responseType: 'stream', - signal: controller.signal, - }), - ); - - // check if response is success - if (!response) { - throw new Error('Failed to download model'); - } - - const writer = createWriteStream(destination); - const hash = crypto.createHash('sha256'); - const totalBytes = Number(response.headers['content-length']); - - // update download state - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (!currentDownloadState) { - resolve(); - return; - } - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - if (downloadItem) { - downloadItem.size.total = totalBytes; - } - - console.log('Downloading', basename(destination)); - - const timeout = 20000; // Timeout period for receiving new data - let timeoutId: NodeJS.Timeout; - const resetTimeout = () => { - if (timeoutId) { - clearTimeout(timeoutId); - delete this.timeouts[downloadId]; - } - timeoutId = setTimeout(() => { - try { - this.handleError( - new Error('Download timeout'), - downloadId, - destination, - ); - } finally { - bar.stop(); - resolve(); - } - }, timeout); - this.timeouts[downloadId] = timeoutId; - }; - - let transferredBytes = 0; - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - - writer.on('finish', () => { - try { - if (timeoutId) clearTimeout(timeoutId); - // delete the abort controller - delete this.abortControllers[downloadId][destination]; - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (!currentDownloadState) return; - - // update current child status to downloaded, find by destination as id - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - const isFileBroken = checksum && checksum === hash.digest('hex'); - if (downloadItem) { - downloadItem.status = isFileBroken - ? DownloadStatus.Error - : DownloadStatus.Downloaded; - if (isFileBroken) { - downloadItem.error = 'Checksum is not matched'; - this.handleError( - new Error('Checksum is not matched'), - downloadId, - destination, - ); - } - } - if (isFileBroken) { - currentDownloadState.status = DownloadStatus.Error; - currentDownloadState.error = 'Checksum is not matched'; - } - this.eventEmitter.emit('download.event', this.allDownloadStates); - } finally { - bar.stop(); - resolve(); - } - }); - writer.on('error', (error) => { - try { - if (timeoutId) clearTimeout(timeoutId); - this.handleError(error, downloadId, destination); - } finally { - bar.stop(); - resolve(); - } - }); - - response.data.on('data', (chunk: any) => { - hash.update(chunk); - resetTimeout(); - transferredBytes += chunk.length; - - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (!currentDownloadState) return; - - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - if (downloadItem) { - downloadItem.size.transferred = transferredBytes; - downloadItem.progress = Math.round( - (transferredBytes / totalBytes) * 100, - ); - bar.update(downloadItem.progress); - } - const lastProgress = currentDownloadState.progress; - currentDownloadState.progress = Math.round( - (currentDownloadState.children.reduce( - (pre, curr) => pre + curr.size.transferred, - 0, - ) / - Math.max( - currentDownloadState.children.reduce( - (pre, curr) => pre + curr.size.total, - 0, - ), - 1, - )) * - 100, - ); - // console.log(currentDownloadState.progress); - if (currentDownloadState.progress !== lastProgress) - this.eventEmitter.emit('download.event', this.allDownloadStates); - }); - - response.data.pipe(writer); - }); - } - - getDownloadStates() { - return this.allDownloadStates; - } - - private handleError(error: Error, downloadId: string, destination: string) { - delete this.abortControllers[downloadId][destination]; - const currentDownloadState = this.allDownloadStates.find( - (downloadState) => downloadState.id === downloadId, - ); - if (!currentDownloadState) return; - - const downloadItem = currentDownloadState?.children.find( - (downloadItem) => downloadItem.id === destination, - ); - if (downloadItem) { - downloadItem.status = DownloadStatus.Error; - downloadItem.error = error.message; - } - - currentDownloadState.status = DownloadStatus.Error; - currentDownloadState.error = error.message; - - // remove download state if all children is downloaded - this.allDownloadStates = this.allDownloadStates.filter( - (downloadState) => downloadState.id !== downloadId, - ); - this.deleteDownloadStateFiles(currentDownloadState); - this.eventEmitter.emit('download.event', [currentDownloadState]); - this.eventEmitter.emit('download.event', this.allDownloadStates); - } - - private deleteDownloadStateFiles(downloadState: DownloadState) { - if (!downloadState.children?.length) return; - downloadState.children.forEach((child) => { - unlinkSync(child.id); - }); - } -} diff --git a/cortex-js/src/infrastructure/services/file-manager/file-manager.module.ts b/cortex-js/src/infrastructure/services/file-manager/file-manager.module.ts deleted file mode 100644 index 9e3757342..000000000 --- a/cortex-js/src/infrastructure/services/file-manager/file-manager.module.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { Module } from '@nestjs/common'; -import { FileManagerService } from './file-manager.service'; - -@Module({ - providers: [FileManagerService], - exports: [FileManagerService], -}) -export class FileManagerModule {} diff --git a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.spec.ts b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.spec.ts deleted file mode 100644 index 96cc6dad6..000000000 --- a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.spec.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { FileManagerService } from './file-manager.service'; - -describe('FileManagerService', () => { - let service: FileManagerService; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - providers: [FileManagerService], - }).compile(); - - service = module.get(FileManagerService); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts deleted file mode 100644 index c452133eb..000000000 --- a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts +++ /dev/null @@ -1,406 +0,0 @@ -import { Config } from '@/domain/config/config.interface'; -import { Injectable } from '@nestjs/common'; -import os from 'os'; -import { join } from 'node:path'; -import { - existsSync, - read, - open, - close, - promises, - createReadStream, -} from 'node:fs'; -import { promisify } from 'util'; -import yaml, { load } from 'js-yaml'; -import { readdirSync, readFileSync, write } from 'fs'; -import { createInterface } from 'readline'; -import { - defaultCortexCppHost, - defaultCortexCppPort, - defaultCortexJsHost, - defaultCortexJsPort, -} from '@/infrastructure/constants/cortex'; - -const readFileAsync = promisify(read); -const openAsync = promisify(open); -const closeAsync = promisify(close); -const writeAsync = promisify(write); - -@Injectable() -export class FileManagerService { - private cortexDirectoryName = 'cortex'; - private modelFolderName = 'models'; - private presetFolderName = 'presets'; - private extensionFoldername = 'extensions'; - private benchmarkFoldername = 'benchmark'; - private cortexEnginesFolderName = 'engines'; - private cortexTelemetryFolderName = 'telemetry'; - private configProfile = process.env.CORTEX_PROFILE || 'default'; - private configPath = process.env.CORTEX_CONFIG_PATH || os.homedir(); - - /** - * Get cortex configs - * @returns the config object - */ - async getConfig(dataFolderPath?: string): Promise { - const configPath = join( - this.configPath, - this.getConfigFileName(this.configProfile), - ); - const config = this.defaultConfig(); - const dataFolderPathUsed = dataFolderPath || config.dataFolderPath; - if (!existsSync(configPath) || !existsSync(dataFolderPathUsed)) { - if (!existsSync(dataFolderPathUsed)) { - await this.createFolderIfNotExist(dataFolderPathUsed); - } - if (!existsSync(configPath)) { - await this.writeConfigFile(config); - } - return config; - } - - try { - const content = await promises.readFile(configPath, 'utf8'); - const config = yaml.load(content) as Config & object; - return { - ...this.defaultConfig(), - ...config, - }; - } catch (error) { - console.warn('Error reading config file. Using default config.'); - console.warn(error); - await this.createFolderIfNotExist(dataFolderPathUsed); - await this.writeConfigFile(config); - return config; - } - } - - async writeConfigFile(config: Config & object): Promise { - const configPath = join( - this.configPath, - this.getConfigFileName(this.configProfile), - ); - - // write config to file as yaml - if (!existsSync(configPath)) { - await promises.writeFile(configPath, '', 'utf8'); - } - const content = await promises.readFile(configPath, 'utf8'); - const currentConfig = yaml.load(content) as Config & object; - const configString = yaml.dump({ - ...currentConfig, - ...config, - }); - await promises.writeFile(configPath, configString, 'utf8'); - } - - private async createFolderIfNotExist(dataFolderPath: string): Promise { - if (!existsSync(dataFolderPath)) { - await promises.mkdir(dataFolderPath, { recursive: true }); - } - - const modelFolderPath = join(dataFolderPath, this.modelFolderName); - const cortexCppFolderPath = join( - dataFolderPath, - this.cortexEnginesFolderName, - ); - const cortexTelemetryFolderPath = join( - dataFolderPath, - this.cortexTelemetryFolderName, - ); - if (!existsSync(modelFolderPath)) { - await promises.mkdir(modelFolderPath, { recursive: true }); - } - if (!existsSync(cortexCppFolderPath)) { - await promises.mkdir(cortexCppFolderPath, { recursive: true }); - } - if (!existsSync(cortexTelemetryFolderPath)) { - await promises.mkdir(cortexTelemetryFolderPath, { recursive: true }); - } - } - - public defaultConfig(): Config { - // default will store at home directory - const homeDir = os.homedir(); - const dataFolderPath = join(homeDir, this.cortexDirectoryName); - - return { - dataFolderPath, - cortexCppHost: defaultCortexCppHost, - cortexCppPort: defaultCortexCppPort, - apiServerHost: defaultCortexJsHost, - apiServerPort: defaultCortexJsPort, - }; - } - - /** - * Get the app data folder path - * Usually it is located at the home directory > cortex - * @returns the path to the data folder - */ - async getDataFolderPath(): Promise { - const config = await this.getConfig(); - return config.dataFolderPath; - } - - async getLastLine( - filePath: string, - ): Promise<{ data: any; position: number }> { - try { - const fileDescriptor = await openAsync(filePath, 'a+'); - const stats = await promises.stat(filePath); - const bufferSize = 1024 * 5; // 5KB - const buffer = Buffer.alloc(bufferSize); - const filePosition = stats.size; - let lastPosition = filePosition; - let data = ''; - let newLineIndex = -1; - - async function readPreviousChunk() { - const readPosition = Math.max(0, lastPosition - bufferSize); - const chunkSize = Math.min(bufferSize, lastPosition); - - const { bytesRead } = await readFileAsync( - fileDescriptor, - buffer, - 0, - chunkSize, - readPosition, - ); - data = buffer.slice(0, bytesRead).toString() + data; - lastPosition -= bufferSize; - newLineIndex = data.lastIndexOf('\n'); - if (newLineIndex === -1 && lastPosition > 0) { - return readPreviousChunk(); - } else { - const lastLine = data.slice(newLineIndex + 1).trim(); - await closeAsync(fileDescriptor); - return { - data: lastLine, - position: readPosition + newLineIndex + 1, - }; - } - } - - return await readPreviousChunk(); - } catch (err) { - //todo: add log level then log error - throw err; - } - } - async modifyLine(filePath: string, modifiedLine: any, position: number) { - try { - const fd = await openAsync(filePath, 'r+'); - const buffer = Buffer.from(modifiedLine, 'utf8'); - await writeAsync(fd, buffer, 0, buffer.length, position); - await closeAsync(fd); - } catch (err) { - //todo: add log level then log error - throw err; - } - } - - async append(filePath: string, data: any) { - try { - const stats = await promises.stat(filePath); - return await promises.appendFile( - filePath, - stats.size === 0 ? data : `\n${data}`, - { - encoding: 'utf8', - flag: 'a+', - }, - ); - } catch (err) { - throw err; - } - } - readLines( - filePath: string, - callback: (line: string) => void, - start: number = 0, - ) { - const fileStream = createReadStream(filePath, { - start, - }); - const rl = createInterface({ - input: fileStream, - crlfDelay: Infinity, - }); - rl.on('line', callback); - } - - /** - * Get the models data folder path - * Usually it is located at the home directory > cortex > models - * @returns the path to the models folder - */ - async getModelsPath(): Promise { - const dataFolderPath = await this.getDataFolderPath(); - return join(dataFolderPath, this.modelFolderName); - } - - /** - * Get the presets data folder path - * Usually it is located at the home directory > cortex > presets - * @returns the path to the presets folder - */ - async getPresetsPath(): Promise { - const dataFolderPath = await this.getDataFolderPath(); - return join(dataFolderPath, this.presetFolderName); - } - - /** - * Get the preset data - * Usually it is located at the home directory > cortex > presets > preset.yaml - * @returns the preset data - */ - async getPreset(preset?: string): Promise { - if (!preset) return undefined; - - const dataFolderPath = await this.getDataFolderPath(); - const presetsFolder = join(dataFolderPath, this.presetFolderName); - if (!existsSync(presetsFolder)) return {}; - - const presetFile = readdirSync(presetsFolder).find( - (file) => - file.toLowerCase() === `${preset?.toLowerCase()}.yaml` || - file.toLowerCase() === `${preset?.toLocaleLowerCase()}.yml`, - ); - if (!presetFile) return {}; - const presetPath = join(presetsFolder, presetFile); - - if (!preset || !existsSync(presetPath)) return {}; - return preset - ? (load(readFileSync(join(presetPath), 'utf-8')) as object) - : {}; - } - - /** - * Get the extensions data folder path - * Usually it is located at the home directory > cortex > extensions - * @returns the path to the extensions folder - */ - async getExtensionsPath(): Promise { - const dataFolderPath = await this.getDataFolderPath(); - return join(dataFolderPath, this.extensionFoldername); - } - - /** - * Get the benchmark folder path - * Usually it is located at the home directory > cortex > extensions - * @returns the path to the benchmark folder - */ - async getBenchmarkPath(): Promise { - const dataFolderPath = await this.getDataFolderPath(); - return join(dataFolderPath, this.benchmarkFoldername); - } - - /** - * Get Cortex CPP engines folder path - * @returns the path to the cortex engines folder - */ - async getCortexCppEnginePath(): Promise { - return join(await this.getDataFolderPath(), this.cortexEnginesFolderName); - } - - /** - * Get log path - * @returns the path to the cortex engines folder - */ - async getLogPath(): Promise { - return join(await this.getDataFolderPath(), 'cortex.log'); - } - - async createFolderIfNotExistInDataFolder(folderName: string): Promise { - const dataFolderPath = await this.getDataFolderPath(); - const folderPath = join(dataFolderPath, folderName); - if (!existsSync(folderPath)) { - await promises.mkdir(folderPath, { recursive: true }); - } - } - - async readFile(filePath: string): Promise { - try { - const isFileExist = existsSync(filePath); - if (!isFileExist) { - return null; - } - const content = await promises.readFile(filePath, { - encoding: 'utf8', - }); - return content; - } catch (error) { - throw error; - } - } - async writeFile(filePath: string, data: any): Promise { - try { - return promises.writeFile(filePath, data, { - encoding: 'utf8', - flag: 'w+', - }); - } catch (error) { - throw error; - } - } - - /** - * Get the cortex server configurations - * It is supposed to be stored in the home directory > .cortexrc - * @returns the server configurations - */ - getServerConfig(): { host: string; port: number } { - const configPath = join( - this.configPath, - this.getConfigFileName(this.configProfile), - ); - let config = this.defaultConfig(); - try { - const content = readFileSync(configPath, 'utf8'); - const currentConfig = (yaml.load(content) as Config & object) ?? {}; - if (currentConfig) { - config = currentConfig; - } - } catch {} - return { - host: config.apiServerHost ?? '127.0.0.1', - port: config.apiServerPort ?? 1337, - }; - } - - public setConfigProfile(profile: string) { - this.configProfile = profile; - } - - public getConfigProfile() { - return this.configProfile; - } - public profileConfigExists(profile: string): boolean { - const configPath = join(this.configPath, this.getConfigFileName(profile)); - try { - const content = readFileSync(configPath, 'utf8'); - const config = yaml.load(content) as Config & object; - return !!config; - } catch { - return false; - } - } - - private getConfigFileName(configProfile: string): string { - if (configProfile === 'default') { - return '.cortexrc'; - } - return `.${configProfile}rc`; - } - - public setConfigPath(configPath: string) { - this.configPath = configPath; - } - - public getConfigPath(): string { - return this.configPath; - } -} - -export const fileManagerService = new FileManagerService(); diff --git a/cortex-js/src/infrastructure/services/resources-manager/resources-manager.module.ts b/cortex-js/src/infrastructure/services/resources-manager/resources-manager.module.ts deleted file mode 100644 index 02c43c8bb..000000000 --- a/cortex-js/src/infrastructure/services/resources-manager/resources-manager.module.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ResourcesManagerService } from './resources-manager.service'; - -@Module({ - providers: [ResourcesManagerService], - exports: [ResourcesManagerService], -}) -export class ResourceManagerModule {} diff --git a/cortex-js/src/infrastructure/services/resources-manager/resources-manager.service.ts b/cortex-js/src/infrastructure/services/resources-manager/resources-manager.service.ts deleted file mode 100644 index 190d79779..000000000 --- a/cortex-js/src/infrastructure/services/resources-manager/resources-manager.service.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { - ResourceStatus, - UsedMemInfo, -} from '@/domain/models/resource.interface'; -import { getMemoryInformation, MemoryInformation } from '@/utils/system-resource'; -import { Injectable } from '@nestjs/common'; -import si, { Systeminformation } from 'systeminformation'; - -@Injectable() -export class ResourcesManagerService { - async getResourceStatuses(): Promise { - const promises = [si.currentLoad(), getMemoryInformation()]; - const results = await Promise.all(promises); - - const cpuUsage = results[0] as Systeminformation.CurrentLoadData; - const memory = results[1] as MemoryInformation - const memInfo: UsedMemInfo = { - total: memory.total, - used: memory.used, - }; - return { - mem: memInfo, - cpu: { - usage: Number(cpuUsage.currentLoad.toFixed(2)), - }, - gpus: (await si.graphics()).controllers.map((gpu) => ({ - name: gpu.name, - vram: { - total: gpu.vram ?? 0, - used: gpu.memoryUsed ?? 0, - } - })), - }; - } -} diff --git a/cortex-js/src/main.ts b/cortex-js/src/main.ts deleted file mode 100644 index 19c3e4d74..000000000 --- a/cortex-js/src/main.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { - defaultCortexJsHost, - defaultCortexJsPort, -} from '@/infrastructure/constants/cortex'; -import { getApp } from './app'; - -process.title = 'Cortex API Server'; - -async function bootstrap() { - // getting port from env - const host = process.env.CORTEX_JS_HOST || defaultCortexJsHost; - const port = process.env.CORTEX_JS_PORT || defaultCortexJsPort; - const app = await getApp(host, Number(port)); - try { - await app.listen(port, host); - console.log(`Started server at http://${host}:${port}`); - console.log(`API Playground available at http://${host}:${port}/api`); - } catch (error) { - console.error(`Failed to start server. Is port ${port} in use? ${error}`); - } -} - -bootstrap(); diff --git a/cortex-js/src/usecases/assistants/assistants.module.ts b/cortex-js/src/usecases/assistants/assistants.module.ts deleted file mode 100644 index 95849a3e7..000000000 --- a/cortex-js/src/usecases/assistants/assistants.module.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { Module } from '@nestjs/common'; -import { AssistantsUsecases } from './assistants.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; - -@Module({ - imports: [DatabaseModule, ModelRepositoryModule], - controllers: [], - providers: [AssistantsUsecases], - exports: [AssistantsUsecases], -}) -export class AssistantsModule {} diff --git a/cortex-js/src/usecases/assistants/assistants.usecases.spec.ts b/cortex-js/src/usecases/assistants/assistants.usecases.spec.ts deleted file mode 100644 index 2dc8bb89f..000000000 --- a/cortex-js/src/usecases/assistants/assistants.usecases.spec.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { AssistantsUsecases } from './assistants.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; - -describe('AssistantsService', () => { - let service: AssistantsUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ModelRepositoryModule, - DownloadManagerModule, - ], - exports: [AssistantsUsecases], - providers: [AssistantsUsecases], - }).compile(); - - service = module.get(AssistantsUsecases); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/assistants/assistants.usecases.ts b/cortex-js/src/usecases/assistants/assistants.usecases.ts deleted file mode 100644 index b3b425468..000000000 --- a/cortex-js/src/usecases/assistants/assistants.usecases.ts +++ /dev/null @@ -1,96 +0,0 @@ -import { Inject, Injectable } from '@nestjs/common'; -import { CreateAssistantDto } from '@/infrastructure/dtos/assistants/create-assistant.dto'; -import { Assistant } from '@/domain/models/assistant.interface'; -import { PageDto } from '@/infrastructure/dtos/page.dto'; -import { ModelRepository } from '@/domain/repositories/model.interface'; -import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { Op } from 'sequelize'; -import { AssistantEntity } from '@/infrastructure/entities/assistant.entity'; -import { Repository } from 'sequelize-typescript'; - -@Injectable() -export class AssistantsUsecases { - constructor( - @Inject('ASSISTANT_REPOSITORY') - private readonly assistantRepository: Repository, - private readonly modelRepository: ModelRepository, - ) {} - - async create(createAssistantDto: CreateAssistantDto) { - const { top_p, temperature, model, id } = createAssistantDto; - if (model !== '*') { - const modelEntity = await this.modelRepository.findOne(model); - if (!modelEntity) { - throw new ModelNotFoundException(model); - } - } - - const assistant: Partial = { - ...createAssistantDto, - object: 'assistant', - created_at: Date.now(), - response_format: null, - tool_resources: null, - top_p: top_p ?? null, - temperature: temperature ?? null, - }; - - try { - await this.assistantRepository.create(assistant); - } catch (err) { - throw err; - } - - return this.findOne(id); - } - - async listAssistants( - limit: number, - order: 'asc' | 'desc', - after?: string, - before?: string, - ) { - const normalizedOrder = order === 'asc' ? 'ASC' : 'DESC'; - - const where: any = {}; - if (after) { - where.id = { [Op.gt]: after }; - } - if (before) { - where.id = { [Op.lt]: before }; - } - - const assistants = await this.assistantRepository.findAll({ - where, - order: [['created_at', normalizedOrder]], - limit: limit + 1, - }); - - let hasMore = false; - if (assistants.length > limit) { - hasMore = true; - assistants.pop(); - } - - const firstId = assistants[0]?.id ?? undefined; - const lastId = assistants[assistants.length - 1]?.id ?? undefined; - - return new PageDto(assistants, hasMore, firstId, lastId); - } - - async findAll(): Promise { - return this.assistantRepository.findAll(); - } - - async findOne(id: string) { - return this.assistantRepository.findOne({ - where: { id }, - }); - } - - async remove(id: string) { - return this.assistantRepository.destroy({ - where: { id }, - }); - } -} diff --git a/cortex-js/src/usecases/chat/chat.module.ts b/cortex-js/src/usecases/chat/chat.module.ts deleted file mode 100644 index a09d1fabf..000000000 --- a/cortex-js/src/usecases/chat/chat.module.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ChatUsecases } from './chat.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; -import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; -import { HttpModule } from '@nestjs/axios'; -import { TelemetryModule } from '../telemetry/telemetry.module'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { ModelsModule } from '../models/models.module'; - -@Module({ - imports: [ - DatabaseModule, - ExtensionModule, - ModelRepositoryModule, - HttpModule, - TelemetryModule, - FileManagerModule, - ModelsModule, - ], - controllers: [], - providers: [ChatUsecases], - exports: [ChatUsecases], -}) -export class ChatModule {} diff --git a/cortex-js/src/usecases/chat/chat.usecases.spec.ts b/cortex-js/src/usecases/chat/chat.usecases.spec.ts deleted file mode 100644 index 896c5d275..000000000 --- a/cortex-js/src/usecases/chat/chat.usecases.spec.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { ChatUsecases } from './chat.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; -import { TelemetryModule } from '../telemetry/telemetry.module'; -import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; -import { HttpModule } from '@nestjs/axios'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { ModelsModule } from '../models/models.module'; - -describe('ChatService', () => { - let service: ChatUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ExtensionModule, - ModelRepositoryModule, - HttpModule, - TelemetryModule, - DownloadManagerModule, - EventEmitterModule.forRoot(), - FileManagerModule, - ModelsModule, - ], - providers: [ChatUsecases], - exports: [ChatUsecases], - }).compile(); - - service = module.get(ChatUsecases); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts deleted file mode 100644 index bf38e9cfb..000000000 --- a/cortex-js/src/usecases/chat/chat.usecases.ts +++ /dev/null @@ -1,111 +0,0 @@ -import { Injectable } from '@nestjs/common'; -import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; -import { EngineExtension } from '@/domain/abstracts/engine.abstract'; -import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { TelemetryUsecases } from '../telemetry/telemetry.usecases'; -import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; -import { ExtensionRepository } from '@/domain/repositories/extension.interface'; -import { firstValueFrom } from 'rxjs'; -import { HttpService } from '@nestjs/axios'; -import { - CORTEX_CPP_EMBEDDINGS_URL, - defaultEmbeddingModel, -} from '@/infrastructure/constants/cortex'; -import { CreateEmbeddingsDto } from '@/infrastructure/dtos/embeddings/embeddings-request.dto'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; -import { ModelsUsecases } from '../models/models.usecases'; -import { isRemoteEngine } from '@/utils/normalize-model-id'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -@Injectable() -export class ChatUsecases { - constructor( - private readonly extensionRepository: ExtensionRepository, - private readonly telemetryUseCases: TelemetryUsecases, - private readonly modelsUsescases: ModelsUsecases, - private readonly httpService: HttpService, - ) {} - - async inference( - createChatDto: CreateChatCompletionDto, - headers: Record, - ): Promise { - const { model: modelId } = createChatDto; - const model = await this.modelsUsescases.findOne(modelId); - if (!model) { - throw new ModelNotFoundException(modelId); - } - - const isModelRunning = await this.modelsUsescases.isModelRunning(modelId); - // If model is not running - // Start the model - if (!isModelRunning) { - await this.modelsUsescases.startModel(modelId); - } - - const engine = (await this.extensionRepository.findOne( - model!.engine ?? Engines.llamaCPP, - )) as EngineExtension | undefined; - if (engine == null) { - throw new Error(`No engine found with name: ${model.engine}`); - } - const payload = { - ...createChatDto, - ...(model.engine && - !isRemoteEngine(model.engine) && { engine: model.engine }), - }; - try { - return await engine.inference(payload, headers); - } catch (error) { - await this.telemetryUseCases.createCrashReport( - error, - TelemetrySource.CORTEX_CPP, - ); - throw error; - } - } - - /** - * Creates an embedding vector representing the input text. - * @param model Embedding model ID. - * @param input Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. - * @param encoding_format Encoding format for the embeddings. Supported formats are 'float' and 'int'. - * @param host Cortex CPP host. - * @param port Cortex CPP port. - * @returns Embedding vector. - */ - async embeddings(dto: CreateEmbeddingsDto) { - const modelId = dto.model ?? defaultEmbeddingModel; - - if (modelId !== dto.model) dto = { ...dto, model: modelId }; - - if (!(await this.modelsUsescases.findOne(modelId))) { - await this.modelsUsescases.pullModel(modelId); - } - - const isModelRunning = await this.modelsUsescases.isModelRunning(modelId); - // If model is not running - // Start the model - if (!isModelRunning) { - await this.modelsUsescases.startModel(modelId, { - embedding: true, - model_type: 'embedding', - }); - } - - const configs = await fileManagerService.getConfig(); - - return firstValueFrom( - this.httpService.post( - CORTEX_CPP_EMBEDDINGS_URL(configs.cortexCppHost, configs.cortexCppPort), - dto, - { - headers: { - 'Content-Type': 'application/json', - 'Accept-Encoding': 'gzip', - }, - }, - ), - ).then((res) => res.data); - } -} diff --git a/cortex-js/src/usecases/chat/exception/invalid-api-url.exception.ts b/cortex-js/src/usecases/chat/exception/invalid-api-url.exception.ts deleted file mode 100644 index 958d5d87c..000000000 --- a/cortex-js/src/usecases/chat/exception/invalid-api-url.exception.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { HttpException, HttpStatus } from '@nestjs/common'; - -export class InvalidApiUrlException extends HttpException { - constructor(modelId: string, apiUrl: string | undefined) { - super( - `Remote model ${modelId} has configured api url ${apiUrl} is not valid!`, - HttpStatus.INTERNAL_SERVER_ERROR, - ); - } -} diff --git a/cortex-js/src/usecases/configs/configs.module.ts b/cortex-js/src/usecases/configs/configs.module.ts deleted file mode 100644 index afa4bca2f..000000000 --- a/cortex-js/src/usecases/configs/configs.module.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Module } from '@nestjs/common'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { ConfigsUsecases } from './configs.usecase'; - -@Module({ - imports: [FileManagerModule], - controllers: [], - providers: [ConfigsUsecases], - exports: [ConfigsUsecases], -}) -export class ConfigsModule {} diff --git a/cortex-js/src/usecases/configs/configs.usecase.ts b/cortex-js/src/usecases/configs/configs.usecase.ts deleted file mode 100644 index 0332fc1ce..000000000 --- a/cortex-js/src/usecases/configs/configs.usecase.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { CommonResponseDto } from '@/infrastructure/dtos/common/common-response.dto'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { Injectable } from '@nestjs/common'; -import { EventEmitter2 } from '@nestjs/event-emitter'; - -@Injectable() -export class ConfigsUsecases { - constructor(private readonly eventEmitter: EventEmitter2) {} - - /** - * Save a configuration to the .cortexrc file. - * @param key Configuration Key - * @param engine The engine where the configs belongs - */ - async saveConfig( - key: string, - value: string, - engine?: string, - ): Promise { - const configs = await fileManagerService.getConfig(); - - const groupConfigs = configs[ - engine as keyof typeof configs - ] as unknown as object; - const newConfigs = { - ...configs, - ...(engine - ? { - [engine]: { - ...groupConfigs, - [key]: value, - }, - } - : {}), - }; - - return fileManagerService - .writeConfigFile(newConfigs) - .then(async () => { - if (engine) { - this.eventEmitter.emit('config.updated', { - engine, - key, - value, - }); - } - }) - .then(() => { - return { - message: 'The config has been successfully updated.', - }; - }); - } - - /** - * Get the configurations of a group. - * @param group - * @returns - */ - async getGroupConfigs(group: string) { - const configs = await fileManagerService.getConfig(); - return configs[group as keyof typeof configs] as unknown as object; - } - - /** - * Get all available configurations. - * @param group - * @returns - */ - async getConfigs() { - return fileManagerService.getConfig(); - } - - /** - * Get the configuration with given key - * @param group - * @param key - * @returns - */ - async getKeyConfig(key: string) { - const configs = await fileManagerService.getConfig(); - return configs[key as keyof typeof configs]; - } -} diff --git a/cortex-js/src/usecases/cortex/cortex.module.ts b/cortex-js/src/usecases/cortex/cortex.module.ts deleted file mode 100644 index 3b1107ae2..000000000 --- a/cortex-js/src/usecases/cortex/cortex.module.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Module } from '@nestjs/common'; -import { CortexUsecases } from './cortex.usecases'; -import { HttpModule } from '@nestjs/axios'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; - -@Module({ - imports: [HttpModule, FileManagerModule], - providers: [CortexUsecases], - exports: [CortexUsecases], -}) -export class CortexModule {} diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.spec.ts b/cortex-js/src/usecases/cortex/cortex.usecases.spec.ts deleted file mode 100644 index 08f584dac..000000000 --- a/cortex-js/src/usecases/cortex/cortex.usecases.spec.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { CortexUsecases } from './cortex.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { HttpModule } from '@nestjs/axios'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; - -describe('CortexUsecases', () => { - let cortexUsecases: CortexUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [DatabaseModule, HttpModule, FileManagerModule], - providers: [CortexUsecases], - exports: [], - }).compile(); - - cortexUsecases = module.get(CortexUsecases); - }); - - it('should be defined', () => { - expect(cortexUsecases).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts deleted file mode 100644 index ac904da53..000000000 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ /dev/null @@ -1,254 +0,0 @@ -import { - BeforeApplicationShutdown, - HttpStatus, - Injectable, -} from '@nestjs/common'; -import { ChildProcess, fork } from 'child_process'; -import { delimiter, join } from 'path'; -import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; -import { HttpService } from '@nestjs/axios'; - -import { firstValueFrom } from 'rxjs'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { - CORTEX_CPP_HEALTH_Z_URL, - CORTEX_CPP_PROCESS_DESTROY_URL, - CORTEX_CPP_UNLOAD_ENGINE_URL, - CORTEX_JS_SYSTEM_URL, - defaultCortexJsHost, - defaultCortexJsPort, -} from '@/infrastructure/constants/cortex'; -import { openSync } from 'fs'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; - -@Injectable() -export class CortexUsecases implements BeforeApplicationShutdown { - private cortexProcess: ChildProcess | undefined; - - constructor(private readonly httpService: HttpService) {} - - /** - * Start the Cortex CPP process - * @param attach - * @returns - */ - async startCortex(): Promise { - const configs = await fileManagerService.getConfig(); - const host = configs.cortexCppHost; - const port = configs.cortexCppPort; - if (this.cortexProcess || (await this.healthCheck(host, port))) { - return { - message: 'Cortex is already running', - status: 'success', - }; - } - - const engineDir = await fileManagerService.getCortexCppEnginePath(); - const dataFolderPath = await fileManagerService.getDataFolderPath(); - - const writer = openSync(await fileManagerService.getLogPath(), 'a+'); - - // Attempt to stop the process if it's already running - await this.stopCortex(); - // go up one level to get the binary folder, have to also work on windows - this.cortexProcess = fork(join(__dirname, './../../utils/cortex-cpp'), [], { - detached: true, - cwd: dataFolderPath, - stdio: [0, writer, writer, 'ipc'], - env: { - ...process.env, - CUDA_VISIBLE_DEVICES: '0', - ENGINE_PATH: dataFolderPath, - PATH: (process.env.PATH || '').concat(delimiter, engineDir), - LD_LIBRARY_PATH: (process.env.LD_LIBRARY_PATH || '').concat( - delimiter, - engineDir, - ), - CORTEX_CPP_PORT: port.toString(), - // // Vulkan - Support 1 device at a time for now - // ...(executableOptions.vkVisibleDevices?.length > 0 && { - // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], - // }), - }, - }); - this.cortexProcess.unref(); - - // Handle process exit - this.cortexProcess.on('close', (code) => { - this.cortexProcess = undefined; - console.log(`child process exited with code ${code}`); - }); - - // Await for the /healthz status ok - return new Promise((resolve, reject) => { - const interval = setInterval(() => { - this.healthCheck(host, port) - .then(() => { - clearInterval(interval); - resolve({ - message: 'Cortex started successfully', - status: 'success', - }); - }) - .catch(reject); - }, 1000); - }).then((res) => { - fileManagerService.writeConfigFile({ - ...configs, - cortexCppHost: host, - cortexCppPort: port, - }); - return res; - }); - } - - /** - * Stop the Cortex CPP process - */ - async stopCortex(): Promise { - const configs = await fileManagerService.getConfig(); - try { - await firstValueFrom( - this.httpService.delete( - CORTEX_CPP_PROCESS_DESTROY_URL( - configs.cortexCppHost, - configs.cortexCppPort, - ), - ), - ); - } finally { - this.cortexProcess?.kill(); - return { - message: 'Cortex stopped successfully', - status: 'success', - }; - } - } - - /** - * Unload the engine - */ - async unloadCortexEngine(engine: Engines): Promise { - const configs = await fileManagerService.getConfig(); - try { - await firstValueFrom( - this.httpService.post( - CORTEX_CPP_UNLOAD_ENGINE_URL( - configs.cortexCppHost, - configs.cortexCppPort, - ), - ), - ); - } finally { - return { - message: `${engine} unloaded successfully`, - status: 'success', - }; - } - } - - - /** - * Check whether the Cortex CPP is healthy - * @param host - * @param port - * @returns - */ - async healthCheck(host: string, port: number): Promise { - return fetch(CORTEX_CPP_HEALTH_Z_URL(host, port)) - .then((res) => { - if (res.ok) { - return true; - } - return false; - }) - .catch(() => false); - } - - /** - * start the API server in detached mode - */ - async startServerDetached(host: string, port: number) { - const writer = openSync(await fileManagerService.getLogPath(), 'a+'); - const server = fork(join(__dirname, './../../main.js'), [], { - detached: true, - stdio: ['ignore', writer, writer, 'ipc'], - env: { - CORTEX_JS_HOST: host, - CORTEX_JS_PORT: port.toString(), - CORTEX_PROFILE: fileManagerService.getConfigProfile(), - CORTEX_CONFIG_PATH: fileManagerService.getConfigPath(), - ...process.env, - }, - }); - server.disconnect(); // closes the IPC channel - server.unref(); - // Await for the /healthz status ok - return new Promise((resolve, reject) => { - const TIMEOUT = 10 * 1000; - const timeout = setTimeout(() => { - clearInterval(interval); - clearTimeout(timeout); - reject(); - }, TIMEOUT); - const interval = setInterval(() => { - this.isAPIServerOnline(host, port) - .then((result) => { - if (result) { - clearInterval(interval); - clearTimeout(timeout); - resolve(true); - } - }) - .catch(reject); - }, 1000); - }); - } - /** - * Check if the Cortex API server is online - * @returns - */ - - async isAPIServerOnline(host?: string, port?: number): Promise { - const { - apiServerHost: configApiServerHost, - apiServerPort: configApiServerPort, - } = await fileManagerService.getConfig(); - // for backward compatibility, we didn't have the apiServerHost and apiServerPort in the config file in the past - const apiServerHost = host || configApiServerHost || defaultCortexJsHost; - const apiServerPort = port || configApiServerPort || defaultCortexJsPort; - return firstValueFrom( - this.httpService.get(CORTEX_JS_SYSTEM_URL(apiServerHost, apiServerPort)), - ) - .then((res) => res.status === HttpStatus.OK) - .catch(() => false); - } - - async stopApiServer() { - const { - apiServerHost: configApiServerHost, - apiServerPort: configApiServerPort, - } = await fileManagerService.getConfig(); - - // for backward compatibility, we didn't have the apiServerHost and apiServerPort in the config file in the past - const apiServerHost = configApiServerHost || defaultCortexJsHost; - const apiServerPort = configApiServerPort || defaultCortexJsPort; - return fetch(CORTEX_JS_SYSTEM_URL(apiServerHost, apiServerPort), { - method: 'DELETE', - }).catch(() => {}); - } - - async updateApiServerConfig(host: string, port: number) { - const config = await fileManagerService.getConfig(); - await fileManagerService.writeConfigFile({ - ...config, - cortexCppHost: host, - cortexCppPort: port, - }); - } - - async beforeApplicationShutdown(signal: string) { - console.log(`Received ${signal}, performing pre-shutdown tasks.`); - await this.stopCortex(); - } -} diff --git a/cortex-js/src/usecases/engines/engines.module.ts b/cortex-js/src/usecases/engines/engines.module.ts deleted file mode 100644 index 5ee607a8c..000000000 --- a/cortex-js/src/usecases/engines/engines.module.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ConfigsModule } from '../configs/configs.module'; -import { EnginesUsecases } from './engines.usecase'; -import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; -import { HttpModule } from '@nestjs/axios'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; - -@Module({ - imports: [ - ConfigsModule, - ExtensionModule, - HttpModule, - FileManagerModule, - DownloadManagerModule, - ConfigsModule, - ], - controllers: [], - providers: [EnginesUsecases], - exports: [EnginesUsecases], -}) -export class EnginesModule {} diff --git a/cortex-js/src/usecases/engines/engines.usecase.ts b/cortex-js/src/usecases/engines/engines.usecase.ts deleted file mode 100644 index ac0b22383..000000000 --- a/cortex-js/src/usecases/engines/engines.usecase.ts +++ /dev/null @@ -1,305 +0,0 @@ -import { isEmpty } from 'lodash'; -import { ForbiddenException, Injectable } from '@nestjs/common'; -import { ExtensionRepository } from '@/domain/repositories/extension.interface'; -import { cpSync, existsSync, mkdirSync, readdirSync, writeFileSync } from 'fs'; -import { join } from 'path'; -import { HttpService } from '@nestjs/axios'; -import decompress from 'decompress'; -import { exit } from 'node:process'; -import { InitOptions } from '@commanders/types/init-options.interface'; -import { firstValueFrom } from 'rxjs'; -import { rm } from 'fs/promises'; -import { - CORTEX_ENGINE_RELEASES_URL, - CUDA_DOWNLOAD_URL, - MIN_CUDA_VERSION, -} from '@/infrastructure/constants/cortex'; - -import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; -import { DownloadType } from '@/domain/models/download.interface'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; -import { CommonResponseDto } from '@/infrastructure/dtos/common/common-response.dto'; -import { EngineStatus } from '@/domain/abstracts/engine.abstract'; -import { ConfigsUsecases } from '../configs/configs.usecase'; -import { defaultInstallationOptions } from '@/utils/init'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -@Injectable() -export class EnginesUsecases { - constructor( - private readonly httpService: HttpService, - private readonly downloadManagerService: DownloadManagerService, - private readonly extensionRepository: ExtensionRepository, - private readonly configsUsecases: ConfigsUsecases, - ) {} - - /** - * Get the engines - * @returns Cortex supported Engines - */ - async getEngines() { - return (await this.extensionRepository.findAll()).map((engine) => ({ - name: engine.name, - description: engine.description, - version: engine.version, - productName: engine.productName, - status: engine.status?.toLowerCase(), - })); - } - - /** - * Get the engine with the given name - * @param name Engine name - * @returns Engine - */ - async getEngine(name: string) { - return this.extensionRepository.findOne(name).then((engine) => - engine - ? { - name: engine.name, - description: engine.description, - version: engine.version, - productName: engine.productName, - status: engine.status?.toLowerCase(), - } - : undefined, - ); - } - - /** - * Install Engine and Dependencies with given options - * @param engineFileName - * @param version - */ - installEngine = async ( - options?: InitOptions, - engine: string = 'default', - force: boolean = false, - ): Promise => { - // Use default option if not defined - - if ((!options || isEmpty(options)) && engine === Engines.llamaCPP) { - options = await defaultInstallationOptions(); - } - const installPackages = []; - // Ship Llama.cpp engine by default - if ( - !existsSync( - join(await fileManagerService.getCortexCppEnginePath(), engine), - ) || - force - ) { - const isVulkan = - engine === Engines.llamaCPP && - (options?.vulkan || - (options?.runMode === 'GPU' && options?.gpuType !== 'Nvidia')); - const platformMatcher = - process.platform === 'win32' - ? '-windows' - : process.platform === 'darwin' - ? '-mac' - : '-linux'; - installPackages.push( - this.installAcceleratedEngine(options?.version ?? 'latest', engine, [ - platformMatcher, - // CPU Instructions - CPU | GPU Non-Vulkan - !isVulkan && engine === Engines.llamaCPP && platformMatcher !== '-mac' - ? `-noavx` - : '', - // Cuda - options?.runMode === 'GPU' && - options?.gpuType === 'Nvidia' && - !isVulkan - ? `cuda-${options.cudaVersion ?? '12'}` - : '', - // Vulkan - isVulkan ? '-vulkan' : '', - - // Arch - engine !== Engines.tensorrtLLM - ? process.arch === 'arm64' - ? '-arm64' - : '-amd64' - : '', - ]), - ); - } - - if ( - engine === Engines.tensorrtLLM || - (engine === Engines.llamaCPP && - options?.runMode === 'GPU' && - options?.gpuType === 'Nvidia' && - !options?.vulkan) - ) - installPackages.push( - this.installCudaToolkitDependency( - engine === Engines.tensorrtLLM - ? MIN_CUDA_VERSION - : options?.cudaVersion, - ), - ); - await Promise.all(installPackages); - // Update states - await this.extensionRepository.findOne(engine).then((e) => { - if (e) e.status = EngineStatus.READY; - }); - }; - - /** - * Update engine's configurations - * @param config Configuration Key - * @param value Configuration Value - * @param engine Configuration Group where the key belongs - */ - async updateConfigs( - config: string, - value: string, - engine: string, - ): Promise { - if (!engine || !(await this.extensionRepository.findOne(engine))) - throw new ForbiddenException('Engine not found'); - - return this.configsUsecases - .saveConfig(config, value, engine) - .then((res) => { - this.extensionRepository.findOne(engine).then((e) => { - if (e && value) e.status = EngineStatus.READY; - }); - return res; - }); - } - - /** - * Get the configurations of an engine. - * @param engine - * @returns - */ - async getEngineConfigs(engine: string) { - if (!engine || !(await this.extensionRepository.findOne(engine))) - throw new ForbiddenException('Engine not found'); - return this.configsUsecases.getGroupConfigs(engine); - } - - /** - * Install CUDA Toolkit dependency (dll/so files) - * @param options - */ - private installCudaToolkitDependency = async (cudaVersion?: string) => { - const platform = process.platform === 'win32' ? 'windows' : 'linux'; - - const dataFolderPath = await fileManagerService.getDataFolderPath(); - const url = CUDA_DOWNLOAD_URL.replace( - '', - cudaVersion === '11' ? '11.7' : MIN_CUDA_VERSION, - ).replace('', platform); - const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz'); - - console.log('Downloading CUDA Toolkit dependency...'); - - await this.downloadManagerService.submitDownloadRequest( - url, - 'Cuda Toolkit Dependencies', - DownloadType.Engine, - { [url]: { destination } }, - async () => { - try { - await decompress( - destination, - await fileManagerService.getCortexCppEnginePath(), - ); - } catch (e) { - console.log(e); - exit(1); - } - await rm(destination, { force: true }); - }, - ); - }; - - /** - * Download and install accelerated engine - * @param version - * @param engineFileName - */ - private async installAcceleratedEngine( - version: string = 'latest', - engine: string = Engines.llamaCPP, - matchers: string[] = [], - ) { - const res = await firstValueFrom( - this.httpService.get( - CORTEX_ENGINE_RELEASES_URL(engine) + - `${version === 'latest' ? '/latest' : ''}`, - { - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - Accept: 'application/vnd.github+json', - }, - }, - ), - ); - - if (!res?.data) { - console.log('Failed to fetch releases'); - exit(1); - } - - let release = res?.data; - if (Array.isArray(res?.data)) { - release = Array(res?.data)[0].find( - (e) => e.name === version.replace('v', ''), - ); - } - // Find the asset for the current platform - const toDownloadAsset = release.assets - .sort((a: any, b: any) => a.name.length - b.name.length) - .find((asset: any) => - matchers.every((matcher) => asset.name.includes(matcher)), - ); - - console.log('Downloading ', toDownloadAsset.name); - - if (!toDownloadAsset) { - console.log( - `Could not find engine file for platform ${process.platform}`, - ); - } - - const engineDir = await fileManagerService.getCortexCppEnginePath(); - - if (!existsSync(engineDir)) mkdirSync(engineDir, { recursive: true }); - - const destination = join(engineDir, toDownloadAsset.name); - - await this.downloadManagerService.submitDownloadRequest( - toDownloadAsset.browser_download_url, - engine, - DownloadType.Engine, - { [toDownloadAsset.browser_download_url]: { destination } }, - // On completed - post processing - async () => { - try { - await decompress(destination, engineDir); - } catch (e) { - console.error('Error decompressing file', e); - exit(1); - } - await rm(destination, { force: true }); - - // Copy the additional files to the cortex-cpp directory - for (const file of readdirSync(join(engineDir, engine))) { - if (!file.includes('engine')) { - cpSync(join(engineDir, engine, file), join(engineDir, file)); - } - } - - writeFileSync( - join(engineDir, engine, 'version.txt'), - release.tag_name.replace(/^v/, ''), - 'utf-8', - ); - }, - ); - } -} diff --git a/cortex-js/src/usecases/messages/messages.module.ts b/cortex-js/src/usecases/messages/messages.module.ts deleted file mode 100644 index ab759dc81..000000000 --- a/cortex-js/src/usecases/messages/messages.module.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Module } from '@nestjs/common'; -import { MessagesUsecases } from './messages.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; - -@Module({ - imports: [DatabaseModule], - controllers: [], - providers: [MessagesUsecases], - exports: [MessagesUsecases], -}) -export class MessagesModule {} diff --git a/cortex-js/src/usecases/messages/messages.usecases.spec.ts b/cortex-js/src/usecases/messages/messages.usecases.spec.ts deleted file mode 100644 index ad671d3f3..000000000 --- a/cortex-js/src/usecases/messages/messages.usecases.spec.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { MessagesUsecases } from './messages.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; - -describe('MessagesService', () => { - let service: MessagesUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [DatabaseModule], - providers: [MessagesUsecases], - exports: [MessagesUsecases], - }).compile(); - - service = module.get(MessagesUsecases); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/messages/messages.usecases.ts b/cortex-js/src/usecases/messages/messages.usecases.ts deleted file mode 100644 index 2e4fab8d8..000000000 --- a/cortex-js/src/usecases/messages/messages.usecases.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { Inject, Injectable } from '@nestjs/common'; -import { CreateMessageDto } from '@/infrastructure/dtos/messages/create-message.dto'; -import { UpdateMessageDto } from '@/infrastructure/dtos/messages/update-message.dto'; -import { ulid } from 'ulid'; -import { MessageEntity } from '@/infrastructure/entities/message.entity'; -import { Message } from '@/domain/models/message.interface'; -import { Repository } from 'sequelize-typescript'; - -@Injectable() -export class MessagesUsecases { - constructor( - @Inject('MESSAGE_REPOSITORY') - private messageRepository: Repository, - ) {} - - async create(createMessageDto: CreateMessageDto) { - const { assistant_id } = createMessageDto; - const message: Partial = { - ...createMessageDto, - id: ulid(), - created_at: Date.now(), - object: 'thread.message', - run_id: null, - completed_at: null, - incomplete_details: null, - attachments: [], - incomplete_at: null, - metadata: undefined, - assistant_id: assistant_id ?? null, - }; - return this.messageRepository.create(message); - } - - async findAll() { - return this.messageRepository.findAll(); - } - - async findOne(id: string) { - return this.messageRepository.findOne({ - where: { - id, - }, - }); - } - - async update(id: string, updateMessageDto: UpdateMessageDto) { - const [numberOfAffectedRows, [updatedMessage]] = - await this.messageRepository.update(updateMessageDto, { - where: { id }, - returning: true, - }); - return { numberOfAffectedRows, updatedMessage }; - } - - async remove(id: string) { - return this.messageRepository.destroy({ - where: { id }, - }); - } - - async getLastMessagesByThread(threadId: string, limit: number) { - return this.messageRepository.findAll({ - where: { - thread_id: threadId, - }, - order: [['created_at', 'DESC']], - limit: limit, - }); - } -} diff --git a/cortex-js/src/usecases/models/models.module.ts b/cortex-js/src/usecases/models/models.module.ts deleted file mode 100644 index e5972941b..000000000 --- a/cortex-js/src/usecases/models/models.module.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ModelsUsecases } from './models.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { CortexModule } from '@/usecases/cortex/cortex.module'; -import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; -import { HttpModule } from '@nestjs/axios'; -import { TelemetryModule } from '../telemetry/telemetry.module'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { ContextModule } from '@/infrastructure/services/context/context.module'; - -@Module({ - imports: [ - DatabaseModule, - CortexModule, - ExtensionModule, - HttpModule, - FileManagerModule, - TelemetryModule, - ContextModule, - ModelRepositoryModule, - DownloadManagerModule, - ], - controllers: [], - providers: [ModelsUsecases], - exports: [ModelsUsecases], -}) -export class ModelsModule {} diff --git a/cortex-js/src/usecases/models/models.usecases.spec.ts b/cortex-js/src/usecases/models/models.usecases.spec.ts deleted file mode 100644 index ab6a13d09..000000000 --- a/cortex-js/src/usecases/models/models.usecases.spec.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { ModelsUsecases } from './models.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; -import { ModelsModule } from './models.module'; -import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { HttpModule } from '@nestjs/axios'; -import { ModelRepositoryModule } from '@/infrastructure/repositories/models/model.module'; -import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; -import { EventEmitterModule } from '@nestjs/event-emitter'; -import { TelemetryModule } from '../telemetry/telemetry.module'; -import { ContextModule } from '@/infrastructure/services/context/context.module'; -import { CortexModule } from '../cortex/cortex.module'; - -describe('ModelsService', () => { - let service: ModelsUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [ - EventEmitterModule.forRoot(), - DatabaseModule, - ModelsModule, - ExtensionModule, - FileManagerModule, - DownloadManagerModule, - HttpModule, - ModelRepositoryModule, - DownloadManagerModule, - EventEmitterModule.forRoot(), - TelemetryModule, - TelemetryModule, - ContextModule, - CortexModule, - ], - providers: [ModelsUsecases], - exports: [ModelsUsecases], - }).compile(); - - service = module.get(ModelsUsecases); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts deleted file mode 100644 index e5a9a4b37..000000000 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ /dev/null @@ -1,574 +0,0 @@ -import ora from 'ora'; -import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; -import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; -import { BadRequestException, Injectable } from '@nestjs/common'; -import { Model, ModelSettingParams } from '@/domain/models/model.interface'; -import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { basename, join, parse } from 'path'; -import { promises, existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; -import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; -import { ExtensionRepository } from '@/domain/repositories/extension.interface'; -import { EngineExtension } from '@/domain/abstracts/engine.abstract'; -import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { AxiosError } from 'axios'; -import { TelemetryUsecases } from '../telemetry/telemetry.usecases'; -import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; -import { ModelRepository } from '@/domain/repositories/model.interface'; -import { ModelParameterParser } from '@/utils/model-parameter.parser'; -import { HuggingFaceRepoSibling } from '@/domain/models/huggingface.interface'; -import { fetchJanRepoData, getHFModelMetadata } from '@/utils/huggingface'; -import { - DownloadStatus, - DownloadType, -} from '@/domain/models/download.interface'; -import { EventEmitter2 } from '@nestjs/event-emitter'; -import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event'; -import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { Engines } from '@/infrastructure/commanders/types/engine.interface'; -import { load } from 'js-yaml'; -import { llamaModelFile } from '@/utils/app-path'; -import { CortexUsecases } from '../cortex/cortex.usecases'; -import { isLocalFile } from '@/utils/urls'; - -@Injectable() -export class ModelsUsecases { - constructor( - private readonly modelRepository: ModelRepository, - private readonly cortexUsecases: CortexUsecases, - private readonly extensionRepository: ExtensionRepository, - private readonly downloadManagerService: DownloadManagerService, - private readonly telemetryUseCases: TelemetryUsecases, - private readonly contextService: ContextService, - private readonly eventEmitter: EventEmitter2, - ) {} - - private activeModelStatuses: Record = {}; - - /** - * Create a new model - * @param createModelDto Model data - */ - async create(createModelDto: CreateModelDto) { - const { model: modelId, owned_by } = createModelDto; - const model: Model = { - ...createModelDto, - id: modelId, - created: Date.now(), - object: 'model', - owned_by: owned_by ?? '', - }; - - await this.modelRepository.create(model); - } - - /** - * Find all models - * @returns Models - */ - async findAll(): Promise { - return this.modelRepository.findAll(); - } - - /** - * Find a model by ID - * @param model Model ID - * @returns Model - */ - async findOne(model: string) { - this.contextService.set('modelId', model); - return this.modelRepository.findOne(model); - } - - /** - * Get a model by ID or throw an exception - * @param id Model ID - * @returns Model - */ - async getModelOrThrow(id: string): Promise { - const model = await this.findOne(id); - if (!model) { - throw new ModelNotFoundException(id); - } - return model; - } - - /** - * Update a model by ID - * @param id Model ID - * @param updateModelDto Model data to update - * @returns Model update status - */ - update(id: string, updateModelDto: UpdateModelDto) { - return this.modelRepository.update(id, updateModelDto); - } - - /** - * Remove a model by ID - * @param id Model ID - * @returns Model removal status - */ - async remove(id: string) { - const modelsContainerDir = await fileManagerService.getModelsPath(); - if (!existsSync(modelsContainerDir)) { - return; - } - - const modelFolder = join(modelsContainerDir, normalizeModelId(id)); - const model = await this.getModelOrThrow(id); - const engine = (await this.extensionRepository.findOne( - model!.engine ?? Engines.llamaCPP, - )) as EngineExtension | undefined; - - if (engine) { - await engine - .unloadModel(id, model.engine || Engines.llamaCPP) - .catch(() => {}); // Silent fail - } - return this.modelRepository - .remove(id) - .then( - () => - existsSync(modelFolder) && rmSync(modelFolder, { recursive: true }), - ) - .then(() => { - const modelEvent: ModelEvent = { - model: id, - event: 'model-deleted', - metadata: {}, - }; - this.eventEmitter.emit('model.event', modelEvent); - }) - .then(() => { - return { - message: 'Model removed successfully', - modelId: id, - }; - }); - } - - /** - * Start a model by ID - * @param modelId Model ID - * @param settings Model settings - * @returns Model start status - */ - async startModel( - modelId: string, - settings?: ModelSettingParams, - filePath?: string, - metadataPath?: string, - ): Promise { - let model: Model | null; - if (filePath) { - model = await this.modelRepository.loadModelByFile( - modelId, - metadataPath!, - filePath, - ); - if (!existsSync(filePath) || !model) { - throw new ModelNotFoundException(model?.id ?? filePath); - } - } else { - model = await this.getModelOrThrow(modelId); - } - const engine = (await this.extensionRepository.findOne( - model!.engine ?? Engines.llamaCPP, - )) as EngineExtension | undefined; - - if (!engine) { - return { - message: 'No extension handler found for model', - modelId, - }; - } - - // Attempt to start cortex - await this.cortexUsecases.startCortex(); - - const loadingModelSpinner = ora('Loading model...').start(); - // update states and emitting event - this.activeModelStatuses[modelId] = { - model: modelId, - status: 'starting', - metadata: {}, - }; - const modelEvent: ModelEvent = { - model: modelId, - event: 'starting', - metadata: {}, - }; - this.eventEmitter.emit('model.event', modelEvent); - - const parser = new ModelParameterParser(); - const loadModelSettings: ModelSettingParams = { - // Default settings - ctx_len: 4096, - ngl: 100, - //TODO: Utils for model file retrieval - ...(model?.files && - Array.isArray(model.files) && - !('llama_model_path' in model) && { - llama_model_path: (model.files as string[])[0], - model_path: (model.files as string[])[0], - }), - engine: model.engine ?? Engines.llamaCPP, - // User / Model settings - ...parser.parseModelEngineSettings(model), - ...parser.parseModelEngineSettings(settings ?? {}), - }; - return engine - .loadModel(model, loadModelSettings) - .catch((e) => { - // Skip model already loaded error - if (e.code === AxiosError.ERR_BAD_REQUEST) return; - else throw e; - }) - .then(() => { - this.activeModelStatuses[modelId] = { - model: modelId, - status: 'started', - metadata: {}, - }; - const modelEvent: ModelEvent = { - model: modelId, - event: 'started', - metadata: {}, - }; - this.eventEmitter.emit('model.event', modelEvent); - }) - .then(() => { - loadingModelSpinner.succeed('Model loaded'); - return { - message: 'Model loaded successfully', - modelId, - }; - }) - .catch(async (e) => { - // remove the model from this.activeModelStatus. - delete this.activeModelStatuses[modelId]; - const modelEvent: ModelEvent = { - model: modelId, - event: 'starting-failed', - metadata: {}, - }; - this.eventEmitter.emit('model.event', modelEvent); - loadingModelSpinner.fail('Model loading failed'); - await this.telemetryUseCases.createCrashReport( - e, - TelemetrySource.CORTEX_CPP, - ); - throw { - message: e.message, - modelId, - }; - }); - } - - /** - * Stop a running model - * @param modelId Model Identifier - * @returns Model stop status - */ - async stopModel(modelId: string): Promise { - const model = await this.getModelOrThrow(modelId); - const engine = (await this.extensionRepository.findOne( - model!.engine ?? Engines.llamaCPP, - )) as EngineExtension | undefined; - - if (!engine) { - return { - message: 'No extension handler found for model', - modelId, - }; - } - - this.activeModelStatuses[modelId] = { - model: modelId, - status: 'stopping', - metadata: {}, - }; - const modelEvent: ModelEvent = { - model: modelId, - event: 'stopping', - metadata: {}, - }; - this.eventEmitter.emit('model.event', modelEvent); - - return engine - .unloadModel(modelId, model.engine || Engines.llamaCPP) - .catch((e) => { - // Skip model already unloaded error - if (e.code === AxiosError.ERR_BAD_REQUEST) return; - else throw e; - }) - .then(() => { - delete this.activeModelStatuses[modelId]; - const modelEvent: ModelEvent = { - model: modelId, - event: 'stopped', - metadata: {}, - }; - this.eventEmitter.emit('model.event', modelEvent); - }) - .then(() => ({ - message: 'Model is stopped', - modelId, - })) - .catch(async (e) => { - const modelEvent: ModelEvent = { - model: modelId, - event: 'stopping-failed', - metadata: {}, - }; - this.eventEmitter.emit('model.event', modelEvent); - await this.telemetryUseCases.createCrashReport( - e, - TelemetrySource.CORTEX_CPP, - ); - return { - message: e.message, - modelId, - }; - }); - } - - /** - * Abort a download - * @param downloadId Download ID - */ - async abortDownloadModel(downloadId: string) { - this.downloadManagerService.abortDownload(downloadId); - } - - /** - * Populate model metadata from a Model repository (HF, Jan...) and download it - * @param modelId - */ - async pullModel( - originModelId: string, - inSequence: boolean = true, - selection?: ( - siblings: HuggingFaceRepoSibling[], - ) => Promise, - persistedModelId?: string, - ) { - const modelId = persistedModelId ?? originModelId; - const existingModel = await this.findOne(modelId); - - if (isLocalModel(existingModel?.files)) { - throw new BadRequestException('Model already exists'); - } - - // Pull a local model file - if (isLocalFile(originModelId)) { - await this.populateHuggingFaceModel(originModelId, persistedModelId); - this.eventEmitter.emit('download.event', [ - { - id: modelId, - type: DownloadType.Model, - status: DownloadStatus.Downloaded, - progress: 100, - children: [], - }, - ]); - return; - } - - const modelsContainerDir = await fileManagerService.getModelsPath(); - - if (!existsSync(modelsContainerDir)) { - mkdirSync(modelsContainerDir, { recursive: true }); - } - - const modelFolder = join(modelsContainerDir, normalizeModelId(modelId)); - await promises.mkdir(modelFolder, { recursive: true }).catch(() => {}); - - let files = (await fetchJanRepoData(originModelId)).siblings; - - // HuggingFace GGUF Repo - Only one file is downloaded - if (originModelId.includes('/') && selection && files.length) { - try { - files = [await selection(files)]; - } catch (e) { - const modelEvent: ModelEvent = { - model: modelId, - event: 'model-downloaded-failed', - metadata: { - error: e.message || e, - }, - }; - this.eventEmitter.emit('model.event', modelEvent); - throw e; - } - } - - // Start downloading the model - const toDownloads: Record< - string, - { - destination: string; - checksum?: string; - } - > = files - .filter((e) => this.validFileDownload(e)) - .reduce( - ( - acc: Record< - string, - { - destination: string; - checksum?: string; - } - >, - file, - ) => { - if (file.downloadUrl) - acc[file.downloadUrl] = { - destination: join(modelFolder, file.rfilename), - checksum: file?.lfs?.oid, - }; - return acc; - }, - {}, - ); - return this.downloadManagerService.submitDownloadRequest( - modelId, - modelId, - DownloadType.Model, - toDownloads, - // Post processing - async () => { - const uploadModelMetadataSpiner = ora( - 'Updating model metadata...', - ).start(); - // Post processing after download - if (existsSync(join(modelFolder, 'model.yml'))) { - const model: CreateModelDto = load( - readFileSync(join(modelFolder, 'model.yml'), 'utf-8'), - ) as CreateModelDto; - if (model.engine === Engines.llamaCPP && model.files) { - const fileUrl = join( - await fileManagerService.getModelsPath(), - normalizeModelId(modelId), - llamaModelFile(model.files), - ); - model.files = [fileUrl]; - model.name = modelId.replace(':main', ''); - } else if (model.engine === Engines.llamaCPP) { - model.files = [ - join( - await fileManagerService.getModelsPath(), - normalizeModelId(modelId), - basename( - files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ?? - files[0].rfilename, - ), - ), - ]; - } else { - model.files = [modelFolder]; - } - model.model = modelId; - if (!(await this.findOne(modelId))) await this.create(model); - } else { - const fileUrl = join( - await fileManagerService.getModelsPath(), - normalizeModelId(modelId), - basename( - files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ?? - files[0].rfilename, - ), - ); - await this.populateHuggingFaceModel( - fileUrl, - modelId.replace(':main', ''), - ); - } - uploadModelMetadataSpiner.succeed('Model metadata updated'); - const modelEvent: ModelEvent = { - model: modelId, - event: 'model-downloaded', - metadata: { - ...(selection ? { file: [files] } : {}), - }, - }; - this.eventEmitter.emit('model.event', modelEvent); - }, - inSequence, - ); - } - - /** - * It's to pull model from HuggingFace repository - * It could be a model from Jan's repo or other authors - * @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b" - */ - async populateHuggingFaceModel(ggufUrl: string, overridenId?: string) { - const metadata = await getHFModelMetadata(ggufUrl); - - const stopWords: string[] = metadata?.stopWord ? [metadata.stopWord] : []; - - const modelId = - overridenId ?? (isLocalFile(ggufUrl) ? parse(ggufUrl).name : ggufUrl); - const model: CreateModelDto = { - files: [ggufUrl], - model: modelId, - name: metadata?.name ?? modelId, - prompt_template: metadata?.promptTemplate, - stop: stopWords, - - // Default Inference Params - stream: true, - max_tokens: 4096, - frequency_penalty: 0.7, - presence_penalty: 0.7, - temperature: 0.7, - top_p: 0.7, - - // Default Model Settings - ctx_len: metadata?.contextLength ?? 4096, - ngl: metadata?.ngl ?? 100, - engine: Engines.llamaCPP, - }; - if (!(await this.findOne(modelId))) await this.create(model); - else throw 'Model already exists.'; - } - - /** - * Get the current status of the models - * @returns Model statuses - */ - getModelStatuses(): Record { - return this.activeModelStatuses; - } - - /** - * Check whether the model is running in the Cortex C++ server - */ - async isModelRunning(modelId: string): Promise { - const model = await this.getModelOrThrow(modelId).catch(() => undefined); - - if (!model) return false; - - const engine = (await this.extensionRepository.findOne( - model.engine ?? Engines.llamaCPP, - )) as EngineExtension | undefined; - - if (!engine) return false; - - return engine.isModelRunning(modelId); - } - - /** - * Check whether the download file is valid or not - * @param file - * @returns - */ - private validFileDownload( - file: HuggingFaceRepoSibling, - ): file is Required { - return !!file.downloadUrl; - } -} diff --git a/cortex-js/src/usecases/telemetry/telemetry.module.ts b/cortex-js/src/usecases/telemetry/telemetry.module.ts deleted file mode 100644 index bb0db2ecb..000000000 --- a/cortex-js/src/usecases/telemetry/telemetry.module.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { Module } from '@nestjs/common'; -import { TelemetryUsecases } from './telemetry.usecases'; -import { HttpModule } from '@nestjs/axios'; -import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; -import { ContextModule } from '@/infrastructure/services/context/context.module'; -import { TelemetryRepositoryImpl } from '@/infrastructure/repositories/telemetry/telemetry.repository'; -import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -export const telemetryProvider = { - provide: 'TELEMETRY_REPOSITORY', - useFactory: () => new TelemetryRepositoryImpl(), - inject: [FileManagerService], -}; - -@Module({ - imports: [HttpModule, FileManagerModule, ContextModule], - providers: [telemetryProvider, TelemetryUsecases], - exports: [telemetryProvider, TelemetryUsecases], -}) -export class TelemetryModule {} diff --git a/cortex-js/src/usecases/telemetry/telemetry.usecases.ts b/cortex-js/src/usecases/telemetry/telemetry.usecases.ts deleted file mode 100644 index 3db376cc8..000000000 --- a/cortex-js/src/usecases/telemetry/telemetry.usecases.ts +++ /dev/null @@ -1,235 +0,0 @@ -import { TelemetryRepository } from '@/domain/repositories/telemetry.interface'; -import { - BenchmarkHardware, - CrashReportAttributes, - EventAttributes, - EventName, - Telemetry, - TelemetryAnonymized, - TelemetrySource, -} from '@/domain/telemetry/telemetry.interface'; -import { ContextService } from '@/infrastructure/services/context/context.service'; -import { HttpException, Inject, Injectable, Scope } from '@nestjs/common'; -import { Cortex } from '@cortexso/cortex.js'; -import { v4 } from 'uuid'; - -@Injectable({ scope: Scope.TRANSIENT }) -export class TelemetryUsecases { - private readonly crashReports: string[] = []; - private readonly maxSize = 10; - private metricQueue: EventAttributes[] = []; - private readonly maxQueueSize = 10; - private readonly flushInterval = 1000 * 5; - private interval: NodeJS.Timeout; - private lastActiveAt?: string | null; - - constructor( - @Inject('TELEMETRY_REPOSITORY') - private readonly telemetryRepository: TelemetryRepository, - private readonly contextService: ContextService, - ) { - this.catchException(); - } - - async createCrashReport( - error: HttpException | Error, - source: TelemetrySource, - ): Promise { - try { - if (!this.isCrashReportEnabled()) return; - - const crashReport: CrashReportAttributes = this.buildCrashReport(error); - if (this.crashReports.includes(JSON.stringify(crashReport))) return; - if (this.crashReports.length >= this.maxSize) { - this.crashReports.shift(); - } - this.crashReports.push(JSON.stringify(crashReport)); - await this.telemetryRepository.createCrashReport(crashReport, source); - } catch (e) {} - return; - } - - async sendCrashReport(): Promise { - if (!this.isCrashReportEnabled()) { - return; - } - const crashReport = await this.telemetryRepository.getLastCrashReport(); - if (crashReport && !crashReport.metadata.sentAt) { - const promises = [ - this.telemetryRepository.sendTelemetryToServer(crashReport.event), - ]; - const collectorEndpoint = process.env.CORTEX_EXPORTER_OLTP_ENDPOINT; - if (collectorEndpoint) { - promises.push( - this.telemetryRepository.sendTelemetryToOTelCollector( - collectorEndpoint, - crashReport, - ), - ); - } - await Promise.all(promises); - await this.telemetryRepository.markLastCrashReportAsSent(); - } - return; - } - - async readCrashReports( - callback: (Telemetry: Telemetry) => void, - ): Promise { - return this.telemetryRepository.readCrashReports(callback); - } - - private buildCrashReport( - error: HttpException | Error, - ): CrashReportAttributes { - return { - message: error.message, - stack: error.stack, - payload: { - modelId: this.contextService.get('modelId') || '', - endpoint: this.contextService.get('endpoint') || '', - command: this.contextService.get('command') || '', - }, - sessionId: this.contextService.get('sessionId') || '', - }; - } - - private isCrashReportEnabled(): boolean { - return process.env.CORTEX_CRASH_REPORT !== '0'; - } - - private isMetricsEnabled(): boolean { - return process.env.CORTEX_METRICS !== '0'; - } - - private isBenchmarkEnabled(): boolean { - return process.env.CORTEX_BENCHMARK !== '0'; - } - - private async catchException(): Promise { - process.on('uncaughtException', async (error: Error) => { - await this.createCrashReport( - error, - this.contextService.get('source') as TelemetrySource, - ); - }); - - process.on('unhandledRejection', async (error: Error) => { - await this.createCrashReport( - error, - this.contextService.get('source') as TelemetrySource, - ); - }); - } - - async initInterval(): Promise { - this.interval = this.flushMetricQueueInterval(); - } - - async sendEvent( - events: EventAttributes[], - source: TelemetrySource, - ): Promise { - try { - if (!this.isMetricsEnabled()) return; - const sessionId = (this.contextService.get('sessionId') as string) || ''; - const sessionEvents = events.map((event) => ({ - ...event, - sessionId, - })); - await this.telemetryRepository.sendEvent(sessionEvents, source); - } catch (e) {} - } - - async sendActivationEvent(source: TelemetrySource): Promise { - try { - if (!this.isMetricsEnabled()) return; - if (!this.lastActiveAt) { - const currentData = await this.telemetryRepository.getAnonymizedData(); - this.lastActiveAt = currentData?.lastActiveAt; - } - const isActivatedToday = - this.lastActiveAt && - new Date(this.lastActiveAt).getDate() === new Date().getDate(); - if (isActivatedToday) return; - const isNewActivation = !this.lastActiveAt; - await this.sendEvent( - [ - { - name: isNewActivation ? EventName.NEW_ACTIVATE : EventName.ACTIVATE, - }, - ], - source, - ); - this.lastActiveAt = new Date().toISOString(); - } catch (e) {} - await this.updateAnonymousData(this.lastActiveAt); - } - - async addEventToQueue(event: EventAttributes): Promise { - if (!this.isMetricsEnabled()) return; - this.metricQueue.push({ - ...event, - sessionId: this.contextService.get('sessionId') || '', - }); - if (this.metricQueue.length >= this.maxQueueSize) { - await this.flushMetricQueue(); - } - } - - private async flushMetricQueue(): Promise { - if (this.metricQueue.length > 0) { - clearInterval(this.interval); - await this.sendEvent(this.metricQueue, TelemetrySource.CORTEX_SERVER); - this.interval = this.flushMetricQueueInterval(); - this.metricQueue = []; - } - } - - private flushMetricQueueInterval(): NodeJS.Timeout { - return setInterval(() => { - this.flushMetricQueue(); - }, this.flushInterval); - } - - async updateAnonymousData( - lastActiveAt?: string | null, - ): Promise { - try { - const currentData = await this.telemetryRepository.getAnonymizedData(); - const updatedData = { - ...currentData, - sessionId: currentData?.sessionId || v4(), - ...(lastActiveAt && { lastActiveAt }), - }; - await this.telemetryRepository.updateAnonymousData(updatedData); - return updatedData; - } catch (e) { - return null; - } - } - - async sendBenchmarkEvent({ - hardware, - results, - metrics, - model, - }: { - hardware: BenchmarkHardware; - results: any; - metrics: any; - model: Cortex.Models.Model; - }): Promise { - try { - if (!this.isBenchmarkEnabled()) return; - const sessionId: string = this.contextService.get('sessionId') || ''; - await this.telemetryRepository.sendBenchmarkToServer({ - hardware, - results, - metrics, - model, - sessionId, - }); - } catch (e) {} - } -} diff --git a/cortex-js/src/usecases/threads/threads.module.ts b/cortex-js/src/usecases/threads/threads.module.ts deleted file mode 100644 index b11756f27..000000000 --- a/cortex-js/src/usecases/threads/threads.module.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { Module } from '@nestjs/common'; -import { ThreadsUsecases } from './threads.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; - -@Module({ - imports: [DatabaseModule], - controllers: [], - providers: [ThreadsUsecases], - exports: [ThreadsUsecases], -}) -export class ThreadsModule {} diff --git a/cortex-js/src/usecases/threads/threads.usecases.spec.ts b/cortex-js/src/usecases/threads/threads.usecases.spec.ts deleted file mode 100644 index 39a9d11dd..000000000 --- a/cortex-js/src/usecases/threads/threads.usecases.spec.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { ThreadsUsecases } from './threads.usecases'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; - -describe('ThreadsService', () => { - let service: ThreadsUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - imports: [DatabaseModule], - providers: [ThreadsUsecases], - }).compile(); - - service = module.get(ThreadsUsecases); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/threads/threads.usecases.ts b/cortex-js/src/usecases/threads/threads.usecases.ts deleted file mode 100644 index 46c38bea2..000000000 --- a/cortex-js/src/usecases/threads/threads.usecases.ts +++ /dev/null @@ -1,269 +0,0 @@ -import { Inject, Injectable, NotFoundException } from '@nestjs/common'; -import { CreateThreadDto } from '@/infrastructure/dtos/threads/create-thread.dto'; -import { UpdateThreadDto } from '@/infrastructure/dtos/threads/update-thread.dto'; -import { v4 as uuidv4 } from 'uuid'; -import { PageDto } from '@/infrastructure/dtos/page.dto'; -import { CreateMessageDto } from '@/infrastructure/dtos/threads/create-message.dto'; -import { ulid } from 'ulid'; -import { Message, MessageContent } from '@/domain/models/message.interface'; -import { UpdateMessageDto } from '@/infrastructure/dtos/threads/update-message.dto'; -import { Thread } from '@/domain/models/thread.interface'; -import DeleteMessageDto from '@/infrastructure/dtos/threads/delete-message.dto'; -import { Assistant } from '@/domain/models/assistant.interface'; -import { Repository } from 'sequelize-typescript'; -import { ThreadEntity } from '@/infrastructure/entities/thread.entity'; -import { MessageEntity } from '@/infrastructure/entities/message.entity'; -import { Op } from 'sequelize'; - -@Injectable() -export class ThreadsUsecases { - constructor( - @Inject('THREAD_REPOSITORY') - private threadRepository: Repository, - @Inject('MESSAGE_REPOSITORY') - private messageRepository: Repository, - ) {} - - async create(createThreadDto: CreateThreadDto): Promise { - const id = uuidv4(); - const { assistants } = createThreadDto; - const assistantEntity: Assistant[] = assistants.map((assistant) => { - const entity: Assistant = { - ...assistant, - response_format: null, - tool_resources: null, - top_p: assistant.top_p ?? null, - temperature: assistant.temperature ?? null, - }; - return entity; - }); - - const thread: Partial = { - id, - assistants: assistantEntity, - object: 'thread', - created_at: Date.now(), - title: 'New Thread', - tool_resources: null, - metadata: null, - }; - - return this.threadRepository.create(thread); - } - - async findAll( - limit: number, - order: 'asc' | 'desc', - after?: string, - before?: string, - ): Promise> { - const normalizedOrder = order === 'asc' ? 'ASC' : 'DESC'; - let afterQuery = {}; - let beforeQuery = {}; - if (after) { - const [afterDate, afterId] = after.split('_'); - const operator = order === 'asc' ? Op.gt : Op.lt; - afterQuery = { - [Op.or]: [ - { - created_at: { [operator]: Number(afterDate) }, - }, - { - created_at: Number(afterDate), - id: { [operator]: afterId }, - }, - ], - }; - } - if (before) { - const [beforeDate, beforeId] = before.split('_'); - const operator = order === 'asc' ? Op.lt : Op.gt; - beforeQuery = { - [Op.or]: [ - { - created_at: { [operator]: Number(beforeDate) }, - }, - { - created_at: Number(beforeDate), - id: { [operator]: beforeId }, - }, - ], - }; - } - const threads = await this.threadRepository.findAll({ - order: [ - ['created_at', normalizedOrder], - ['id', normalizedOrder], - ], - limit: limit + 1, - where: { - [Op.and]: [afterQuery, beforeQuery], - }, - }); - let hasMore = false; - if (threads.length > limit) { - hasMore = true; - threads.pop(); - } - const firstItem = threads[0]; - const lastItem = threads[threads.length - 1]; - const firstId = firstItem - ? `${firstItem.created_at}_${firstItem.id}` - : undefined; - const lastId = lastItem - ? `${lastItem?.created_at}_${lastItem?.id}` - : undefined; - return new PageDto(threads, hasMore, firstId, lastId); - } - - async getMessagesOfThread( - threadId: string, - limit: number, - order: 'asc' | 'desc', - after?: string, - before?: string, - runId?: string, - ) { - await this.getThreadOrThrow(threadId); - const normalizedOrder = order === 'asc' ? 'ASC' : 'DESC'; - - const messages = await this.messageRepository.findAll({ - where: { thread_id: threadId }, - order: [['created_at', normalizedOrder]], - limit: limit + 1, - ...(after && { where: { id: { [Op.gt]: after } } }), - ...(before && { where: { id: { [Op.lt]: before } } }), - }); - - let hasMore = false; - if (messages.length > limit) { - hasMore = true; - messages.pop(); - } - - const firstId = messages[0]?.id ?? undefined; - const lastId = messages[messages.length - 1]?.id ?? undefined; - - return new PageDto(messages, hasMore, firstId, lastId); - } - - async createMessageInThread( - threadId: string, - createMessageDto: CreateMessageDto, - ) { - const thread = await this.getThreadOrThrow(threadId); - const assistantId: string = thread.assistants[0].id; - - const messageContent: MessageContent = { - type: 'text', - text: { - annotations: [], - value: createMessageDto.content, - }, - }; - - const message: Partial = { - id: ulid(), - object: 'thread.message', - thread_id: threadId, - assistant_id: assistantId, - created_at: Date.now(), - status: 'completed', - role: createMessageDto.role, - content: [messageContent], - metadata: null, - run_id: null, - completed_at: null, - incomplete_details: null, - attachments: [], - incomplete_at: null, - }; - - await this.messageRepository.create(message); - return message; - } - - async updateMessage( - threadId: string, - messageId: string, - updateMessageDto: UpdateMessageDto, - ) { - await this.getThreadOrThrow(threadId); - await this.messageRepository.update(updateMessageDto, { - where: { id: messageId }, - }); - return this.messageRepository.findOne({ where: { id: messageId } }); - } - - private async getThreadOrThrow(threadId: string): Promise { - const thread = await this.findOne(threadId); - if (!thread) { - throw new NotFoundException(`Thread with id ${threadId} not found`); - } - return thread; - } - - private async getMessageOrThrow(messageId: string): Promise { - const message = await this.messageRepository.findOne({ - where: { id: messageId }, - }); - if (!message) { - throw new NotFoundException(`Message with id ${messageId} not found`); - } - return message; - } - - findOne(id: string) { - return this.threadRepository.findOne({ where: { id } }); - } - - async update(id: string, updateThreadDto: UpdateThreadDto) { - const assistantEntities: Assistant[] = - updateThreadDto.assistants?.map((assistant) => { - const entity: Assistant = { - ...assistant, - name: assistant.name, - response_format: null, - tool_resources: null, - top_p: assistant.top_p ?? null, - temperature: assistant.temperature ?? null, - }; - return entity; - }) ?? []; - - const entity: Partial = { - ...updateThreadDto, - assistants: assistantEntities, - }; - - return this.threadRepository.update(entity, { where: { id } }); - } - - async remove(id: string) { - await this.threadRepository.destroy({ where: { id } }); - } - - async deleteMessage( - _threadId: string, - messageId: string, - ): Promise { - await this.getMessageOrThrow(messageId); - await this.messageRepository.destroy({ where: { id: messageId } }); - - return { - id: messageId, - object: 'thread.message.deleted', - deleted: true, - }; - } - - async retrieveMessage(_threadId: string, messageId: string) { - // we still allow user to delete message even if the thread is not there - return this.getMessageOrThrow(messageId); - } - - async clean(threadId: string) { - await this.getThreadOrThrow(threadId); - await this.messageRepository.destroy({ where: { thread_id: threadId } }); - } -} diff --git a/cortex-js/src/utils/app-path.ts b/cortex-js/src/utils/app-path.ts deleted file mode 100644 index d4fb0b1e0..000000000 --- a/cortex-js/src/utils/app-path.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { ModelArtifact } from '@/domain/models/model.interface'; -import { existsSync } from 'fs'; -import { basename, join } from 'path'; - -/** - * Path to the root of the application. - */ -export const appPath = join(__dirname, '../../'); - -/** - * Check if a file exists in any of the given paths. - * @param file - * @param paths - * @returns - */ -export const checkFileExistenceInPaths = ( - file: string, - paths: string[], -): boolean => { - return paths.some((p) => existsSync(join(p, file))); -}; - -/** - * Get the model file name from the given files. - * @param files - * @returns - */ -export const llamaModelFile = ( - files: string[] | ModelArtifact | ModelArtifact[], -) => { - let artifact: any = files; - // Legacy model.yml - if (Array.isArray(files)) { - artifact = files[0]; - } - const path = - 'llama_model_path' in artifact - ? ((artifact as ModelArtifact).llama_model_path ?? '') - : 'model_path' in files - ? ((artifact as ModelArtifact).model_path ?? '') - : (artifact as string[])[0]; - return basename(path); -}; diff --git a/cortex-js/src/utils/cortex-cpp.ts b/cortex-js/src/utils/cortex-cpp.ts deleted file mode 100644 index efa28adbd..000000000 --- a/cortex-js/src/utils/cortex-cpp.ts +++ /dev/null @@ -1,7 +0,0 @@ -import * as cortexCPP from 'cortex-cpp'; - -process.title = 'Cortex Engine Process (cortex.cpp)'; -const port = process.env.CORTEX_CPP_PORT - ? parseInt(process.env.CORTEX_CPP_PORT) - : 3929; -cortexCPP.start(port); diff --git a/cortex-js/src/utils/cuda.ts b/cortex-js/src/utils/cuda.ts deleted file mode 100644 index ecddbaf4f..000000000 --- a/cortex-js/src/utils/cuda.ts +++ /dev/null @@ -1,148 +0,0 @@ -import { exec } from 'child_process'; -import { existsSync } from 'fs'; -import { delimiter } from 'path'; -import { checkFileExistenceInPaths } from './app-path'; - -export type GpuSettingInfo = { - id: string; - vram: string; - name: string; - arch?: string; -}; - -/** - * Return the CUDA version installed on the system - * @returns CUDA Version 11 | 12 - */ -export const cudaVersion = async () => { - let filesCuda12: string[]; - let filesCuda11: string[]; - let paths: string[]; - - if (process.platform === 'win32') { - filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']; - filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']; - paths = process.env.PATH ? process.env.PATH.split(delimiter) : []; - } else { - filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']; - filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']; - paths = process.env.LD_LIBRARY_PATH - ? process.env.LD_LIBRARY_PATH.split(delimiter) - : []; - paths.push('/usr/lib/x86_64-linux-gnu/'); - } - - if ( - filesCuda12.every( - (file) => existsSync(file) || checkFileExistenceInPaths(file, paths), - ) - ) - return '12'; - - if ( - filesCuda11.every( - (file) => existsSync(file) || checkFileExistenceInPaths(file, paths), - ) - ) - return '11'; - - return undefined; // No CUDA Toolkit found -}; - -/** - * Check if an NVIDIA GPU is present - * @returns GPU driver exist or not - * TODO: This should be enhanced better - */ -export const checkNvidiaGPUExist = (): Promise => { - return new Promise((resolve) => { - // Execute the nvidia-smi command - exec('nvidia-smi', (error) => { - if (error) { - // If there's an error, it means nvidia-smi is not installed or there's no NVIDIA GPU - console.log('NVIDIA GPU not detected or nvidia-smi not installed.'); - resolve(false); - } else { - // If the command executes successfully, NVIDIA GPU is present - console.log('NVIDIA GPU detected.'); - resolve(true); - } - }); - }); -}; - -export const getCudaVersion = (): Promise => { - return new Promise((resolve, reject) => { - // Execute the nvidia-smi command - exec('nvidia-smi', (error, stdout) => { - if (!error) { - const cudaVersionLine = stdout - .split('\n') - .find((line) => line.includes('CUDA Version')); - - if (cudaVersionLine) { - // Extract the CUDA version number - const cudaVersionMatch = cudaVersionLine.match( - /CUDA Version:\s+(\d+\.\d+)/, - ); - if (cudaVersionMatch) { - const cudaVersion = cudaVersionMatch[1]; - resolve(cudaVersion); - } else { - reject('CUDA Version not found.'); - } - } else { - reject('CUDA Version not found.'); - } - } else { - reject(error); - } - }); - }); -}; - -/** - * Get GPU information from the system - * @returns GPU information - */ -export const getGpuInfo = async (): Promise => - new Promise((resolve) => { - exec( - 'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits', - { - windowsHide: true, - }, - async (error, stdout) => { - if (!error) { - // Get GPU info and gpu has higher memory first - let highestVram = 0; - let highestVramId = '0'; - const gpus: GpuSettingInfo[] = stdout - .trim() - .split('\n') - .map((line) => { - let [id, vram, name] = line.split(', '); - const arch = getGpuArch(name); - vram = vram.replace(/\r/g, ''); - if (parseFloat(vram) > highestVram) { - highestVram = parseFloat(vram); - highestVramId = id; - } - return { id, vram, name, arch }; - }); - - resolve(gpus); - } else { - resolve([]); - } - }, - ); - }); - -const getGpuArch = (gpuName: string): string => { - if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'; - - if (gpuName.includes('30')) return 'ampere'; - else if (gpuName.includes('40')) return 'ada'; - else return 'unknown'; -}; diff --git a/cortex-js/src/utils/download-progress.ts b/cortex-js/src/utils/download-progress.ts deleted file mode 100644 index 68eafcd09..000000000 --- a/cortex-js/src/utils/download-progress.ts +++ /dev/null @@ -1,69 +0,0 @@ -import { Presets, SingleBar } from 'cli-progress'; -import { Cortex } from '@cortexso/cortex.js'; -import { exit, stdin, stdout } from 'node:process'; -import { - DownloadState, - DownloadStatus, - DownloadType, -} from '@/domain/models/download.interface'; -import { isLocalFile } from './urls'; - -export const downloadProgress = async ( - cortex: Cortex, - downloadId?: string, - downloadType?: DownloadType, -) => { - // Do not update on local file symlink - if (downloadId && isLocalFile(downloadId)) return; - - const response = await cortex.events.downloadEvent(); - - const rl = require('readline').createInterface({ - input: stdin, - output: stdout, - }); - - rl.on('SIGINT', () => { - console.log('\nStopping download...'); - process.emit('SIGINT'); - }); - process.on('SIGINT', async () => { - if (downloadId) { - await cortex.models.abortDownload(downloadId); - } - exit(1); - }); - - const progressBar = new SingleBar({}, Presets.shades_classic); - progressBar.start(100, 0); - - for await (const stream of response) { - if (stream.length) { - const data = stream.find( - (data: any) => data.id === downloadId || !downloadId, - ) as DownloadState | undefined; - if (!data) continue; - if (downloadType && data.type !== downloadType) continue; - - if (data.status === DownloadStatus.Downloaded) break; - if (data.status === DownloadStatus.Error) { - rl.close(); - progressBar.stop(); - console.log('\n Download failed: ', data.error); - exit(1); - } - - let totalBytes = 0; - let totalTransferred = 0; - data.children.forEach((child: any) => { - totalBytes += child.size.total; - totalTransferred += child.size.transferred; - }); - progressBar.update( - Math.floor((totalTransferred / (totalBytes || 1)) * 100), - ); - } - } - progressBar.stop(); - rl.close(); -}; diff --git a/cortex-js/src/utils/health.ts b/cortex-js/src/utils/health.ts deleted file mode 100644 index de164d123..000000000 --- a/cortex-js/src/utils/health.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { CORTEX_JS_HEALTH_URL_WITH_API_PATH } from '@/infrastructure/constants/cortex'; - -/** - * Check whether the Cortex CPP is healthy - * @param host - * @param port - * @returns - */ -export function healthCheck(apiPath: string): Promise { - return fetch(CORTEX_JS_HEALTH_URL_WITH_API_PATH(apiPath)) - .then((res) => { - if (res.ok) { - return true; - } - return false; - }) - .catch(() => false); -} - -/** - * Wait until the Cortex Server is healthy - * @param host - * @param port - * @returns - */ -export function waitUntilHealthy(apiPath: string): Promise { - return new Promise((resolve) => { - const interval = setInterval(async () => { - const isHealthy = await healthCheck(apiPath); - if (isHealthy) { - clearInterval(interval); - resolve(true); - } - }, 1000); - }); -} diff --git a/cortex-js/src/utils/huggingface.ts b/cortex-js/src/utils/huggingface.ts deleted file mode 100644 index a69572992..000000000 --- a/cortex-js/src/utils/huggingface.ts +++ /dev/null @@ -1,200 +0,0 @@ -//// PRIVATE METHODS //// - -import { - HuggingFaceRepoData, - HuggingFaceRepoSibling, -} from '@/domain/models/huggingface.interface'; -import { ModelMetadata } from '@/infrastructure/commanders/types/model-tokenizer.interface'; -import { - AllQuantizations, - HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL, - HUGGING_FACE_REPO_MODEL_API_URL, - HUGGING_FACE_REPO_URL, - HUGGING_FACE_TREE_REF_URL, -} from '@/infrastructure/constants/huggingface'; -import { - LLAMA_2, - LLAMA_3, - LLAMA_3_JINJA, - OPEN_CHAT_3_5, - OPEN_CHAT_3_5_JINJA, - ZEPHYR, - ZEPHYR_JINJA, -} from '@/infrastructure/constants/prompt-constants'; -import axios from 'axios'; -import { parseModelHubEngineBranch } from './normalize-model-id'; -import { closeSync, openSync, readSync } from 'fs'; - -// TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider? -export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string { - if (!jinjaCode) { - console.log('No jinja code provided. Returning default LLAMA_2'); - return LLAMA_2; - } - - if (typeof jinjaCode !== 'string') { - console.log( - `Invalid jinja code provided (type is ${typeof jinjaCode}). Returning default LLAMA_2`, - ); - return LLAMA_2; - } - - //TODO: Missing supported templates? - switch (jinjaCode) { - case ZEPHYR_JINJA: - return ZEPHYR; - - case OPEN_CHAT_3_5_JINJA: - return OPEN_CHAT_3_5; - - case LLAMA_3_JINJA: - return LLAMA_3; - - default: - // console.log( - // 'Unknown jinja code:', - // jinjaCode, - // 'Returning default LLAMA_2', - // ); - return LLAMA_2; - } -} - -/** - * Fetch the model data from Jan's repo - * @param modelId HuggingFace model id. e.g. "llama-3:7b" - * @returns - */ -export async function fetchJanRepoData( - modelId: string, -): Promise { - const repo = modelId.split(':')[0]; - const tree = await parseModelHubEngineBranch( - modelId.split(':')[1] ?? (!modelId.includes('/') ? 'main' : ''), - ); - const url = getRepoModelsUrl( - `${!modelId.includes('/') ? 'cortexso/' : ''}${repo}`, - tree, - ); - - const res = await fetch(url); - const jsonData = await res.json(); - if ('siblings' in jsonData) { - AllQuantizations.forEach((quantization) => { - jsonData.siblings.forEach((sibling: HuggingFaceRepoSibling) => { - if (!sibling.quantization && sibling.rfilename.includes(quantization)) { - sibling.quantization = quantization; - sibling.downloadUrl = HUGGING_FACE_DOWNLOAD_FILE_MAIN_URL( - repo, - sibling.rfilename, - ); - } - }); - }); - return jsonData as HuggingFaceRepoData; - } - - const response: - | { - path: string; - size: number; - oid?: string; - }[] - | { error: string } = jsonData; - - if ('error' in response && response.error != null) { - throw new Error(response.error); - } - - // TODO: Support multiple engines - const data: HuggingFaceRepoData = { - siblings: Array.isArray(response) - ? response.map((e) => { - return { - rfilename: e.path, - downloadUrl: HUGGING_FACE_TREE_REF_URL(repo, tree, e.path), - fileSize: e.size ?? 0, - lfs: { - oid: e.oid, - }, - }; - }) - : [], - tags: ['gguf'], - id: modelId, - modelId: modelId, - author: 'cortexso', - sha: '', - downloads: 0, - lastModified: '', - private: false, - disabled: false, - gated: false, - pipeline_tag: 'text-generation', - cardData: {}, - createdAt: '', - }; - - AllQuantizations.forEach((quantization) => { - data.siblings.forEach((sibling: any) => { - if (!sibling.quantization && sibling.rfilename.includes(quantization)) { - sibling.quantization = quantization; - } - }); - }); - - data.modelUrl = url; - return data; -} - -/** - * Get the URL to fetch the model data from HuggingFace API - * @param repoId HuggingFace model id. e.g. "janhq/llama3" - * @param tree The tree ref. e.g. "8b" - * @returns The URL to fetch the model data from HuggingFace API - */ -export function getRepoModelsUrl(repoId: string, tree?: string): string { - return `${HUGGING_FACE_REPO_MODEL_API_URL(repoId)}${tree ? `/tree/${tree}` : ''}`; -} - -/** - * Get the model metadata from HuggingFace - * @param ggufUrl The URL to the GGUF file - * @returns The model tokenizer - */ -export async function getHFModelMetadata( - ggufUrl: string, -): Promise { - try { - const { ggufMetadata } = await import('hyllama'); - // Read first 10mb of gguf file - const fd = openSync(ggufUrl, 'r'); - const buffer = new Uint8Array(10_000_000); - readSync(fd, buffer, 0, 10_000_000, 0); - closeSync(fd); - - // Parse metadata and tensor info - const { metadata } = ggufMetadata(buffer.buffer); - - const index = metadata['tokenizer.ggml.eos_token_id']; - const hfChatTemplate = metadata['tokenizer.chat_template']; - const promptTemplate = guessPromptTemplateFromHuggingFace(hfChatTemplate); - const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? ''; - const name = metadata['general.name']; - const contextLength = metadata['llama.context_length'] ?? 4096; - const ngl = (metadata['llama.block_count'] ?? 32) + 1; - const version: number = metadata['version']; - - return { - contextLength, - ngl, - stopWord, - promptTemplate, - version, - name, - }; - } catch (err) { - console.log('Failed to get model metadata:', err.message); - return undefined; - } -} diff --git a/cortex-js/src/utils/init.ts b/cortex-js/src/utils/init.ts deleted file mode 100644 index 71db89cdf..000000000 --- a/cortex-js/src/utils/init.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { InitOptions } from '@/infrastructure/commanders/types/init-options.interface'; -import { checkNvidiaGPUExist } from './cuda'; - -/** - * Default installation options base on the system - * @returns - */ -export const defaultInstallationOptions = async (): Promise => { - const options: InitOptions = {}; - - // Skip check if darwin - if (process.platform === 'darwin') { - return options; - } - // If Nvidia Driver is installed -> GPU - options.runMode = (await checkNvidiaGPUExist()) ? 'GPU' : 'CPU'; - options.gpuType = 'Nvidia'; - return options; -}; \ No newline at end of file diff --git a/cortex-js/src/utils/logs.ts b/cortex-js/src/utils/logs.ts deleted file mode 100644 index 1802d6d2b..000000000 --- a/cortex-js/src/utils/logs.ts +++ /dev/null @@ -1,80 +0,0 @@ -import { createReadStream, existsSync, stat, writeFileSync } from 'fs'; -import { createInterface } from 'readline'; -import { fileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; - -const logCleaningInterval: number = 120000; -let timeout: NodeJS.Timeout | undefined; - -/** - * Print the last N lines of a file that contain the word 'ERROR' - * @param filename - * @param numLines - */ -export async function printLastErrorLines( - logPath: string, - numLines: number = 10, -): Promise { - const errorLines: string[] = []; - - const fileStream = createReadStream(logPath); - const rl = createInterface({ - input: fileStream, - crlfDelay: Infinity, - }); - - for await (const line of rl) { - errorLines.push(line); - if (errorLines.length > numLines) { - errorLines.shift(); - } - } - - console.log(`Last errors:`); - errorLines.forEach((line) => console.log(line)); - console.log('...'); -} - -export async function cleanLogs( - maxFileSizeBytes?: number | undefined, - daysToKeep?: number | undefined, -): Promise { - // clear existing timeout - // in case we rerun it with different values - if (timeout) clearTimeout(timeout); - timeout = undefined; - - console.log('Validating app logs. Next attempt in ', logCleaningInterval); - - const size = maxFileSizeBytes ?? 1 * 1024 * 1024; // 1 MB - const days = daysToKeep ?? 7; // 7 days - const filePath = await fileManagerService.getLogPath(); - // Perform log cleaning - const currentDate = new Date(); - if (existsSync(filePath)) - stat(filePath, (err, stats) => { - if (err) { - console.error('Error getting file stats:', err); - return; - } - - // Check size - if (stats.size > size) { - writeFileSync(filePath, '', 'utf8'); - } else { - // Check age - const creationDate = new Date(stats.ctime); - const daysDifference = Math.floor( - (currentDate.getTime() - creationDate.getTime()) / (1000 * 3600 * 24), - ); - if (daysDifference > days) { - writeFileSync(filePath, '', 'utf8'); - } - } - }); - - // Schedule the next execution with doubled delays - timeout = setTimeout( - () => cleanLogs(maxFileSizeBytes, daysToKeep), - logCleaningInterval, - ); -} diff --git a/cortex-js/src/utils/model-check.ts b/cortex-js/src/utils/model-check.ts deleted file mode 100644 index 05cd8827f..000000000 --- a/cortex-js/src/utils/model-check.ts +++ /dev/null @@ -1,63 +0,0 @@ -import { MIN_CUDA_VERSION } from '@/infrastructure/constants/cortex'; -import { getCudaVersion } from './cuda'; -import ora from 'ora'; - -export const checkModelCompatibility = async ( - modelId: string, - spinner?: ora.Ora, -) => { - function log(message: string) { - if (spinner) { - spinner.fail(message); - } else { - console.error(message); - } - } - if (modelId.includes('onnx') && process.platform !== 'win32') { - log('The ONNX engine does not support this OS yet.'); - process.exit(1); - } - - if (modelId.includes('tensorrt-llm')) { - if (process.platform === 'darwin') { - log('Tensorrt-LLM models are not supported on this OS'); - process.exit(1); - } - - try { - const version = await getCudaVersion(); - const [currentMajor, currentMinor] = version.split('.').map(Number); - const [requiredMajor, requiredMinor] = - MIN_CUDA_VERSION.split('.').map(Number); - const isMatchRequired = - currentMajor > requiredMajor || - (currentMajor === requiredMajor && currentMinor >= requiredMinor); - if (!isMatchRequired) { - log( - `CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`, - ); - process.exit(1); - } - } catch (e) { - console.error(e.message ?? e); - log(e.message ?? e); - process.exit(1); - } - } -}; - -export const parseVersion = (version: string) => { - return version.split('.').map(Number); -}; - -export const checkRequiredVersion = (version: string, minVersion: string) => { - const [currentMajor, currentMinor, currentPatch] = parseVersion(version); - const [requiredMajor, requiredMinor, requiredPatch] = - parseVersion(minVersion); - return ( - currentMajor > requiredMajor || - (currentMajor === requiredMajor && - (currentMinor > requiredMinor || - (currentMinor === requiredMinor && currentPatch >= requiredPatch))) - ); -}; diff --git a/cortex-js/src/utils/model-parameter.parser.ts b/cortex-js/src/utils/model-parameter.parser.ts deleted file mode 100644 index 5d91d5531..000000000 --- a/cortex-js/src/utils/model-parameter.parser.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { - Model, - ModelRuntimeParams, - ModelSettingParams, -} from '@/domain/models/model.interface'; - -// Make this class injectable -export class ModelParameterParser { - private modelSettingParamTypes: { [key: string]: string } = { - prompt_template: 'string', - ctx_len: 'number', - ngl: 'number', - n_parallel: 'number', - cpu_threads: 'number', - llama_model_path: 'string', - mmproj: 'string', - cont_batching: 'boolean', - pre_prompt: 'string', - model_type: 'string', - embedding: 'boolean', - }; - - private modelRuntimeParamTypes: { [key: string]: string } = { - temperature: 'number', - top_k: 'number', - top_p: 'number', - stream: 'boolean', - max_tokens: 'number', - stop: 'string[]', - frequency_penalty: 'number', - presence_penalty: 'number', - }; - - /** - * Parse the model inference parameters from origin Model - * @param model - * @returns Partial - */ - parseModelInferenceParams(model: Partial): Partial { - const inferenceParams: Partial & ModelRuntimeParams = - structuredClone(model); - return Object.keys(inferenceParams).reduce((acc, key) => { - if (!this.isModelRuntimeParam(key)) { - delete acc[key as keyof typeof acc]; - } - - return acc; - }, inferenceParams); - } - /** - * Parse the model engine settings from origin Model - * @param model - * @returns Partial - */ - parseModelEngineSettings(model: Partial): Partial { - const engineSettings: Partial & ModelSettingParams = - structuredClone(model); - return Object.keys(engineSettings).reduce((acc, key) => { - if (!this.isModelSettingParam(key)) { - delete acc[key as keyof typeof acc]; - } - - return acc; - }, engineSettings); - } - - private isModelSettingParam(key: string): boolean { - return key in this.modelSettingParamTypes; - } - - private isModelRuntimeParam(key: string): boolean { - return key in this.modelRuntimeParamTypes; - } -} diff --git a/cortex-js/src/utils/normalize-model-id.ts b/cortex-js/src/utils/normalize-model-id.ts deleted file mode 100644 index fcc3f721f..000000000 --- a/cortex-js/src/utils/normalize-model-id.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { ModelArtifact } from '@/domain/models/model.interface'; -import { getGpuInfo } from './cuda'; -import { - Engines, - RemoteEngines, -} from '@/infrastructure/commanders/types/engine.interface'; - -export const normalizeModelId = (modelId: string): string => { - return modelId.replace(':main', '').replace(/[:/]/g, '-'); -}; - -export const isLocalModel = ( - modelFiles?: string[] | ModelArtifact, -): boolean => { - return ( - !!modelFiles && - Array.isArray(modelFiles) && - !/^(http|https):\/\/[^/]+\/.*/.test(modelFiles[0]) - ); -}; - -export const isRemoteEngine = (engine: string): boolean => { - return RemoteEngines.includes(engine as Engines); -}; - -/** - * Parse the model hub engine branch - * @param branch - * @returns - */ -export const parseModelHubEngineBranch = async ( - branch: string, -): Promise => { - if (branch.includes('tensorrt')) { - let engineBranch = branch; - const platform = process.platform == 'win32' ? 'windows' : 'linux'; - if (!engineBranch.includes(platform)) { - engineBranch += `-${platform}`; - } - - const gpus = await getGpuInfo(); - if (gpus[0]?.arch && !engineBranch.includes(gpus[0].arch)) { - engineBranch += `-${gpus[0].arch}`; - } - return engineBranch; - } - return branch; -}; diff --git a/cortex-js/src/utils/request.ts b/cortex-js/src/utils/request.ts deleted file mode 100644 index e81c1b0de..000000000 --- a/cortex-js/src/utils/request.ts +++ /dev/null @@ -1,22 +0,0 @@ -export function extractCommonHeaders(headers: any) { - const commonHeaders = [ - 'Content-Type', - 'User-Agent', - 'Accept', - 'Authorization', - 'Origin', - 'Referer', - 'Connection', - ]; - - const extractedHeaders: Record = {}; - - for (const header of commonHeaders) { - const value = headers[header]; - if (value) { - extractedHeaders[header] = value; - } - } - - return extractedHeaders; -} diff --git a/cortex-js/src/utils/system-resource.ts b/cortex-js/src/utils/system-resource.ts deleted file mode 100644 index 527884a9c..000000000 --- a/cortex-js/src/utils/system-resource.ts +++ /dev/null @@ -1,16 +0,0 @@ -import osUtils from 'node-os-utils' - -export type MemoryInformation = { - total: osUtils.MemUsedInfo['totalMemMb']; - used: osUtils.MemUsedInfo['usedMemMb']; - free: osUtils.MemFreeInfo['freeMemMb'] -} - -export const getMemoryInformation = async (): Promise => { - const [usedMemoryInfo, freeMemoryInfo] = await Promise.all([osUtils.mem.used(), osUtils.mem.free()]) - return { - total: usedMemoryInfo.totalMemMb, - used: usedMemoryInfo.usedMemMb, - free: freeMemoryInfo.freeMemMb - } -} \ No newline at end of file diff --git a/cortex-js/src/utils/urls.ts b/cortex-js/src/utils/urls.ts deleted file mode 100644 index 6559ef804..000000000 --- a/cortex-js/src/utils/urls.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { isAbsolute } from 'path'; - -/** - * Check if a string is a valid URL. - * @param input - The string to check. - * @returns True if the string is a valid URL, false otherwise. - */ -export function isValidUrl(input: string | undefined): boolean { - if (!input) return false; - try { - new URL(input); - return true; - } catch (e) { - return false; - } -} - -/** - * Check if the URL is a lcoal file path - * @param modelFiles - * @returns - */ -export const isLocalFile = (path: string): boolean => { - return !/^(http|https):\/\/[^/]+\/.*/.test(path) && isAbsolute(path); -}; diff --git a/cortex-js/test/jest-e2e.json b/cortex-js/test/jest-e2e.json deleted file mode 100644 index c74edd836..000000000 --- a/cortex-js/test/jest-e2e.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "moduleFileExtensions": ["js", "json", "ts"], - "rootDir": ".", - "testEnvironment": "node", - "testRegex": ".e2e-spec.ts$", - "transform": { - "^.+\\.(t|j)s$": "ts-jest" - }, - "moduleNameMapper": { - "@/(.*)$": "/../src/$1", - "@commanders/(.*)$": "/../src/infrastructure/commanders/$1" - } -} diff --git a/cortex-js/test/models.e2e-spec.ts b/cortex-js/test/models.e2e-spec.ts deleted file mode 100644 index f6bf5a2e5..000000000 --- a/cortex-js/test/models.e2e-spec.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { INestApplication } from '@nestjs/common'; -import request from 'supertest'; -import { AppModule } from '@/app.module'; - -describe('ModelsController (e2e)', () => { - let app: INestApplication; - - beforeEach(async () => { - const moduleFixture: TestingModule = await Test.createTestingModule({ - imports: [AppModule], - }).compile(); - - app = moduleFixture.createNestApplication(); - await app.init(); - }); - - it('/ (GET)', () => { - return request(app.getHttpServer()) - .get('/models') - .expect(200) - .expect((e) => Array.isArray(e)); - }); -}); diff --git a/cortex-js/tsconfig.build.json b/cortex-js/tsconfig.build.json deleted file mode 100644 index 64f86c6bd..000000000 --- a/cortex-js/tsconfig.build.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "extends": "./tsconfig.json", - "exclude": ["node_modules", "test", "dist", "**/*spec.ts"] -} diff --git a/cortex-js/tsconfig.json b/cortex-js/tsconfig.json deleted file mode 100644 index 44f33f788..000000000 --- a/cortex-js/tsconfig.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "compilerOptions": { - "module": "node16", - "moduleResolution": "node16", - "declaration": true, - "removeComments": true, - "emitDecoratorMetadata": true, - "experimentalDecorators": true, - "allowSyntheticDefaultImports": true, - "target": "es2018", - "sourceMap": true, - "outDir": "./dist", - "baseUrl": "./", - "incremental": true, - "skipLibCheck": true, - "strictNullChecks": true, - "noImplicitAny": true, - "strictBindCallApply": true, - "forceConsistentCasingInFileNames": true, - "noFallthroughCasesInSwitch": true, - "esModuleInterop": true, - "resolveJsonModule": true, - "paths": { - "@/*": ["src/*"], - "@commanders/*": ["src/infrastructure/commanders/*"] - } - } -} diff --git a/cortex-js/uninstall.js b/cortex-js/uninstall.js deleted file mode 100644 index 2c5b04807..000000000 --- a/cortex-js/uninstall.js +++ /dev/null @@ -1,40 +0,0 @@ -const { promises } = require('node:fs'); -const os = require('os'); -const fs = require('fs'); -const path = require('path'); -const yaml = require('js-yaml'); -const constants = require('./dist/src/infrastructure/constants/cortex.js') - -const uninstall = async () => { - const cortexConfigPath = path.join(os.homedir(), '.cortexrc'); - if (fs.existsSync(cortexConfigPath)) { - const content = await promises.readFile(cortexConfigPath, 'utf8'); - const config = yaml.load(content); - if(!config) { - return; - } - const { dataFolderPath, cortexCppHost, cortexCppPort, apiServerHost, apiServerPort } = config; - - await fetch(constants.CORTEX_CPP_PROCESS_DESTROY_URL(cortexCppHost, cortexCppPort), - { method: 'DELETE' }) - .catch(() => {}) - await fetch(constants.CORTEX_JS_SYSTEM_URL(apiServerHost, apiServerPort), - { method: 'DELETE' }) - .catch(() => {}) - // remove all data in data folder path except models - const modelsFolderPath = path.join(dataFolderPath, 'models'); - const files = fs.readdirSync(dataFolderPath); - for (const file of files) { - const fileStat = fs.statSync(path.join(dataFolderPath, file)); - if (file !== 'models') { - if (fileStat.isDirectory()) { - fs.rmSync(path.join(dataFolderPath, file), { recursive: true }); - } else { - fs.unlinkSync(path.join(dataFolderPath, file)); - } - } - } - } -}; - -uninstall(); From 806977b5f06b5aa30abc781e5e8611bf39c7aae9 Mon Sep 17 00:00:00 2001 From: marknguyen1302 Date: Tue, 27 Aug 2024 14:28:00 +0700 Subject: [PATCH 3/4] rename workflows file --- ...cortex-js-openai-coverage.yml => platform-openai-coverage.yml} | 0 .../{cortex-js-quality-gate.yml => platform-quality-gate.yml} | 0 .github/workflows/{cortex-js.yml => platform.yml} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{cortex-js-openai-coverage.yml => platform-openai-coverage.yml} (100%) rename .github/workflows/{cortex-js-quality-gate.yml => platform-quality-gate.yml} (100%) rename .github/workflows/{cortex-js.yml => platform.yml} (100%) diff --git a/.github/workflows/cortex-js-openai-coverage.yml b/.github/workflows/platform-openai-coverage.yml similarity index 100% rename from .github/workflows/cortex-js-openai-coverage.yml rename to .github/workflows/platform-openai-coverage.yml diff --git a/.github/workflows/cortex-js-quality-gate.yml b/.github/workflows/platform-quality-gate.yml similarity index 100% rename from .github/workflows/cortex-js-quality-gate.yml rename to .github/workflows/platform-quality-gate.yml diff --git a/.github/workflows/cortex-js.yml b/.github/workflows/platform.yml similarity index 100% rename from .github/workflows/cortex-js.yml rename to .github/workflows/platform.yml From bc1884b2baa8c7f39f81b1368b20c262fd6d34cf Mon Sep 17 00:00:00 2001 From: marknguyen1302 Date: Fri, 30 Aug 2024 10:26:56 +0700 Subject: [PATCH 4/4] remove build single binary in cortex platform --- .github/workflows/cortex-build.yml | 288 +---------------------------- 1 file changed, 2 insertions(+), 286 deletions(-) diff --git a/.github/workflows/cortex-build.yml b/.github/workflows/cortex-build.yml index 162d30976..307a53dfe 100644 --- a/.github/workflows/cortex-build.yml +++ b/.github/workflows/cortex-build.yml @@ -223,237 +223,8 @@ jobs: ## cortex-cpp node binding - build-cortex-single-binary: - runs-on: ${{ matrix.runs-on }} - needs: [create-draft-release, build-and-test] - timeout-minutes: 40 - strategy: - fail-fast: false - matrix: - include: - - os: "linux" - name: "amd64" - runs-on: "ubuntu-20-04" - - - os: "windows" - name: "amd64" - runs-on: "windows-2019" - - - os: "mac" - name: "amd64" - runs-on: "macos-13" - - - os: "mac" - name: "arm64" - runs-on: "macos-latest" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - - - uses: actions/setup-dotnet@v3 - if: runner.os == 'Windows' - with: - dotnet-version: "8.0.x" - - - name: Install jq - uses: dcarbone/install-jq-action@v2.0.1 - - - uses: actions/setup-node@v3 - with: - node-version: "20.x" - registry-url: "https://registry.npmjs.org" - - - name: "Update version by tag" - working-directory: platform - shell: bash - run: | - echo "Version: ${{ needs.create-draft-release.outputs.version }}" - # Update the version in package.json - jq --arg version "${{ needs.create-draft-release.outputs.version }}" '.version = $version' package.json > /tmp/package.json - mv /tmp/package.json package.json - - - name: Install choco on Windows - if: runner.os == 'Windows' - run: | - choco install make -y - - - uses: actions/setup-node@v3 - with: - node-version: "20.x" - registry-url: "https://registry.npmjs.org" - - - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - - run: pip3 install --upgrade setuptools - if: runner.os == 'macOS' - - - run: yarn install && yarn build:binary - working-directory: ./platform - - - name: Get Cer for code signing - if: runner.os == 'macOS' - run: base64 -d <<< "$NOTARIZE_P8_BASE64" > /tmp/notary-key.p8 - shell: bash - env: - NOTARIZE_P8_BASE64: ${{ secrets.NOTARIZE_P8_BASE64 }} - - - uses: apple-actions/import-codesign-certs@v2 - continue-on-error: true - if: runner.os == 'macOS' - with: - p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }} - p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }} - - - name: update app info - run: | - cd platform - make update-app-info - - - name: copy bin file macos - if: runner.os == 'macOS' - run: | - cd platform - mkdir -p installer - which cp - which mv - npm install -g cpx - npx cpx ./dist/cortexso ./ - mv cortexso cortex - - - name: Code Signing macOS - if: runner.os == 'macOS' - run: | - cd platform - ./dist/cortexso --help - echo "--------" - ./cortex --help - make codesign-binary CODE_SIGN=true DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" - - # install quill - curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sh -s -- -b /usr/local/bin - - # Notarize the binary - quill notarize ./cortex - env: - QUILL_NOTARY_KEY_ID: ${{ secrets.NOTARY_KEY_ID }} - QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }} - QUILL_NOTARY_KEY: "/tmp/notary-key.p8" - - - name: Create MacOS PKG Installer - if: runner.os == 'macOS' - run: | - cd platform - echo "--------" - npx cpx ./cortex ./installer - ./installer/cortex --help - pkgbuild --identifier ai.cortex.pkg --install-location ./usr/local/bin/ --root ./installer cortex-installer.pkg - make codesign-installer CODE_SIGN=true DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" - - # Notary the installer - xcrun notarytool submit cortex-installer.pkg --apple-id ${{ secrets.APPLE_ID }} --password ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} --team-id ${{ secrets.APPLE_TEAM_ID }} --wait - - - name: Create Linux DEB Installer - if: runner.os == 'Linux' - run: | - cd platform - mkdir -p cortexso/DEBIAN - mkdir -p cortexso/usr/local/bin - sed "s/Version:/Version: ${{ needs.create-draft-release.outputs.version }}/g" control.template > cortexso/DEBIAN/control - cp cortex cortexso/usr/local/bin/cortex - dpkg-deb --build cortexso - - - run: | - cd platform - set PATH=%PATH%;%USERPROFILE%\.dotnet\tools - make codesign-binary CODE_SIGN=true CORTEX_VERSION="0.${{ needs.create-draft-release.outputs.version }}" AZURE_KEY_VAULT_URI="${{ secrets.AZURE_KEY_VAULT_URI }}" AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}" AZURE_TENANT_ID="${{ secrets.AZURE_TENANT_ID }}" AZURE_CLIENT_SECRET="${{ secrets.AZURE_CLIENT_SECRET }}" AZURE_CERT_NAME="${{ secrets.AZURE_CERT_NAME }}" - name: Code Signing Windows - shell: cmd - if: runner.os == 'Windows' - - - name: Update version in installer.iss using sed - if: runner.os == 'Windows' - shell: bash - run: | - cd platform - sed -i "s/AppVersion=1.0/AppVersion=${{ needs.create-draft-release.outputs.version }}/g" installer.iss - cat installer.iss - - - name: Compile .ISS to .EXE Installer - uses: Minionguyjpro/Inno-Setup-Action@v1.2.2 - if: runner.os == 'Windows' - with: - path: platform/installer.iss - options: /O+ - - - run: | - cd platform - set PATH=%PATH%;%USERPROFILE%\.dotnet\tools - make codesign-installer CODE_SIGN=true CORTEX_VERSION="0.${{ needs.create-draft-release.outputs.version }}" AZURE_KEY_VAULT_URI="${{ secrets.AZURE_KEY_VAULT_URI }}" AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}" AZURE_TENANT_ID="${{ secrets.AZURE_TENANT_ID }}" AZURE_CLIENT_SECRET="${{ secrets.AZURE_CLIENT_SECRET }}" AZURE_CERT_NAME="${{ secrets.AZURE_CERT_NAME }}" - name: Code Signing Windows - shell: cmd - if: runner.os == 'Windows' - - - name: Post-Bundle - run: | - cd platform - make postbundle - - - name: Upload Cortex Installer - uses: actions/upload-release-asset@v1.0.1 - if: runner.os != 'Linux' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./platform/cortex-installer.tar.gz - asset_name: cortex-installer-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.tar.gz - asset_content_type: application/gzip - - - name: Upload Cortex Installer - uses: actions/upload-release-asset@v1.0.1 - if: runner.os == 'Windows' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./platform/setup.exe - asset_name: cortex-installer-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.exe - asset_content_type: application/octet-stream - - - uses: actions/upload-release-asset@v1.0.1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./platform/cortex.tar.gz - asset_name: cortex-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.tar.gz - asset_content_type: application/gzip - - - uses: actions/upload-release-asset@v1.0.1 - if: runner.os == 'Linux' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./platform/cortexso.deb - asset_name: cortex-installer-${{ needs.create-draft-release.outputs.version }}-${{ matrix.name }}-${{ matrix.os }}.deb - asset_content_type: application/gzip - - # Upload artifact for package manager - - name: Upload Artifact - if: runner.os == 'Linux' - uses: actions/upload-artifact@v2 - with: - name: cortex-linux - path: ./platform/cortex - update_release_draft: - needs: [build-and-test, build-cortex-single-binary] + needs: [build-and-test] permissions: # write permission is required to create a github release contents: write @@ -476,59 +247,4 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - # Add jobs with approval - public_package_manager: - needs: [create-draft-release, update_release_draft] - runs-on: ubuntu-latest - environment: package-manager - permissions: - contents: read - steps: - - name: checkout - uses: actions/checkout@v4 - - - name: Download artifact - uses: actions/download-artifact@v2 - with: - name: cortex-linux - path: ./cortex-linux - - - name: install dependencies - run: | - ls -al ./cortex-linux - sudo apt-get install dput devscripts build-essential debhelper-compat pbuilder debootstrap devscripts -y - - - name: import gpg key with subkey - uses: crazy-max/ghaction-import-gpg@v4 - with: - gpg_private_key: ${{ secrets.HOMEBREW_GPG_PRIVATE_KEY }} - passphrase: ${{ secrets.HOMEBREW_GPG_PASSPHRASE }} - fingerprint: ${{ secrets.HOMEBREW_GPG_FINGERPRINT }} - - - run: | - mkdir -p package-managers-template/launchpad/cortexso-${{ needs.create-draft-release.outputs.version }}/debian - cp package-managers-template/launchpad/cortexso/debian/* package-managers-template/launchpad/cortexso-${{ needs.create-draft-release.outputs.version }}/debian/ - cp ./cortex-linux/cortex package-managers-template/launchpad/cortexso-${{ needs.create-draft-release.outputs.version }}/ - - - name: Get release notes - id: release_notes - run: | - RELEASE_NOTES="cortex update to version ${{ needs.create-draft-release.outputs.version }}" - echo "RELEASE_NOTES=${RELEASE_NOTES}" >> $GITHUB_ENV - - - name: Update change log - run: | - sed -i "s/VERSION/${{ needs.create-draft-release.outputs.version }}/g" package-managers-template/launchpad/cortexso-${{ needs.create-draft-release.outputs.version }}/debian/changelog - sed -i "s/CHANGELOG_HERE/${{ env.RELEASE_NOTES }}/g" package-managers-template/launchpad/cortexso-${{ needs.create-draft-release.outputs.version }}/debian/changelog - timestamp=$(date -u +"%a, %d %b %Y %H:%M:%S +0000") - sed -i "s/TIME_HERE/${timestamp}/g" package-managers-template/launchpad/cortexso-${{ needs.create-draft-release.outputs.version }}/debian/changelog - - - name: Build package - run: | - cd package-managers-template/launchpad/cortexso-${{ needs.create-draft-release.outputs.version }} - echo "y" | debuild -S -sa -k${{ secrets.HOMEBREW_GPG_FINGERPRINT }} - sudo pbuilder create --distribution jammy --debootstrapopts --variant=buildd - sudo pbuilder build ../cortexso_${{ needs.create-draft-release.outputs.version }}.dsc - dput ppa:homebrew-computer/main ../cortexso_${{ needs.create-draft-release.outputs.version }}_source.changes - + \ No newline at end of file