janhq · louis-jan · Jul 1, 2024 · Jun 27, 2024 · Jul 1, 2024 · Jul 1, 2024
diff --git a/cortex-js/src/domain/models/huggingface.interface.ts b/cortex-js/src/domain/models/huggingface.interface.ts
@@ -4,6 +4,13 @@ export interface HuggingFaceModelVersion {
   fileSize?: number;
   quantization?: Quantization;
 }
+
+export interface HuggingFaceRepoSibling {
+  rfilename: string;
+  downloadUrl?: string;
+  fileSize?: number;
+  quantization?: Quantization;
+}
 export interface HuggingFaceRepoData {
   id: string;
   modelId: string;
@@ -18,12 +25,7 @@ export interface HuggingFaceRepoData {
   pipeline_tag: 'text-generation';
   tags: Array<'transformers' | 'pytorch' | 'safetensors' | string>;
   cardData: Record<CardDataKeys | string, unknown>;
-  siblings: {
-    rfilename: string;
-    downloadUrl?: string;
-    fileSize?: number;
-    quantization?: Quantization;
-  }[];
+  siblings: HuggingFaceRepoSibling[];
   createdAt: string;
 }
 

diff --git a/cortex-js/src/domain/models/model.event.ts b/cortex-js/src/domain/models/model.event.ts
@@ -7,6 +7,8 @@ const ModelLoadingEvents = [
   'stopped',
   'starting-failed',
   'stopping-failed',
+  'model-downloaded',
+  'model-deleted',
 ] as const;
 export type ModelLoadingEvent = (typeof ModelLoadingEvents)[number];
 

diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts
@@ -168,4 +168,5 @@ export interface ModelRuntimeParams {
 export interface ModelArtifact {
   mmproj?: string;
   llama_model_path?: string;
+  model_path?: string;
 }
diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
@@ -8,6 +8,7 @@ import { existsSync } from 'fs';
 import { join } from 'node:path';
 import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
 import { InitCliUsecases } from '../usecases/init.cli.usecases';
+import { checkModelCompatibility } from '@/utils/model-check';
 
 @SubCommand({
   name: 'pull',
@@ -35,6 +36,8 @@ export class ModelPullCommand extends CommandRunner {
     }
     const modelId = passedParams[0];
 
+    checkModelCompatibility(modelId);
+
     await this.modelsCliUsecases.pullModel(modelId).catch((e: Error) => {
       if (e instanceof ModelNotFoundException)
         console.error('Model does not exist.');

diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts
@@ -14,6 +14,7 @@ import { existsSync } from 'node:fs';
 import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
 import { join } from 'node:path';
 import { Engines } from '../types/engine.interface';
+import { checkModelCompatibility } from '@/utils/model-check';
 
 type ModelStartOptions = {
   attach: boolean;
@@ -58,9 +59,14 @@ export class ModelStartCommand extends CommandRunner {
       !Array.isArray(existingModel.files) ||
       /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
     ) {
-      console.error('Model is not available. Please pull the model first.');
+      console.error(
+        `${modelId} not found on filesystem. Please try 'cortex pull ${modelId}' first.`,
+      );
       process.exit(1);
     }
+
+    checkModelCompatibility(modelId);
+
     const engine = existingModel.engine || 'cortex.llamacpp';
     // Pull engine if not exist
     if (
@@ -72,10 +78,7 @@ export class ModelStartCommand extends CommandRunner {
         engine,
       );
     }
-    if (engine === Engines.onnx && process.platform !== 'win32') {
-      console.error('The ONNX engine does not support this OS yet.');
-      process.exit(1);
-    }
+
     await this.cortexUsecases
       .startCortex(options.attach)
       .then(() => this.modelsCliUsecases.startModel(modelId, options.preset))

diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts
@@ -42,8 +42,8 @@ export class ServeCommand extends CommandRunner {
       console.log(
         chalk.blue(`API Playground available at http://${host}:${port}/api`),
       );
-    } catch (err) {
-      console.error(err.message ?? err);
+    } catch {
+      console.error(`Failed to start server. Is port ${port} in use?`);
     }
   }
 

diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
@@ -14,6 +14,7 @@ import { join } from 'path';
 import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
 import { InitCliUsecases } from '../usecases/init.cli.usecases';
 import { Engines } from '../types/engine.interface';
+import { checkModelCompatibility } from '@/utils/model-check';
 
 type RunOptions = {
   threadId?: string;
@@ -55,7 +56,9 @@ export class RunCommand extends CommandRunner {
     // If not exist
     // Try Pull
     if (!(await this.modelsCliUsecases.getModel(modelId))) {
-      console.log(`Model ${modelId} not found. Try pulling model...`);
+      console.log(
+        `${modelId} not found on filesystem. Downloading from remote: https://huggingface.co/cortexhub if possible.`,
+      );
       await this.modelsCliUsecases.pullModel(modelId).catch((e: Error) => {
         if (e instanceof ModelNotFoundException)
           console.error('Model does not exist.');
@@ -71,10 +74,12 @@ export class RunCommand extends CommandRunner {
       !Array.isArray(existingModel.files) ||
       /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
     ) {
-      console.error('Model is not available. Please pull the model first.');
+      console.error('Model is not available.');
       process.exit(1);
     }
 
+    checkModelCompatibility(modelId);
+
     const engine = existingModel.engine || 'cortex.llamacpp';
     // Pull engine if not exist
     if (
@@ -86,10 +91,6 @@ export class RunCommand extends CommandRunner {
         engine,
       );
     }
-    if (engine === Engines.onnx && process.platform !== 'win32') {
-      console.error('The ONNX engine does not support this OS yet.');
-      process.exit(1);
-    }
 
     return this.cortexUsecases
       .startCortex(false)

diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
@@ -18,6 +18,7 @@ import {
 } from '@/infrastructure/constants/cortex';
 import { checkNvidiaGPUExist, cudaVersion } from '@/utils/cuda';
 import { Engines } from '../types/engine.interface';
+import { checkModelCompatibility } from '@/utils/model-check';
 
 @Injectable()
 export class InitCliUsecases {
@@ -71,11 +72,6 @@ export class InitCliUsecases {
     )
       await this.installLlamaCppEngine(options, version);
 
-    if (engine === Engines.onnx && process.platform !== 'win32') {
-      console.error('The ONNX engine does not support this OS yet.');
-      process.exit(1);
-    }
-
     if (engine !== 'cortex.llamacpp')
       await this.installAcceleratedEngine('latest', engine);
 

diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
@@ -1,25 +1,19 @@
 import { exit } from 'node:process';
 import { ModelsUsecases } from '@/usecases/models/models.usecases';
 import { Model } from '@/domain/models/model.interface';
-import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto';
-import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface';
 import { InquirerService } from 'nest-commander';
 import { Inject, Injectable } from '@nestjs/common';
 import { Presets, SingleBar } from 'cli-progress';
-import { LLAMA_2 } from '@/infrastructure/constants/prompt-constants';
 
 import { HttpService } from '@nestjs/axios';
 import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
 import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
 import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
-import { join, basename } from 'path';
+import { join } from 'path';
 import { load } from 'js-yaml';
 import { existsSync, readdirSync, readFileSync } from 'fs';
-import { isLocalModel, normalizeModelId } from '@/utils/normalize-model-id';
-import { fetchJanRepoData, getHFModelMetadata } from '@/utils/huggingface';
-import { createWriteStream, mkdirSync, promises } from 'node:fs';
-import { firstValueFrom } from 'rxjs';
-import { Engines } from '../types/engine.interface';
+import { isLocalModel } from '@/utils/normalize-model-id';
+import { HuggingFaceRepoSibling } from '@/domain/models/huggingface.interface';
 
 @Injectable()
 export class ModelsCliUsecases {
@@ -120,170 +114,34 @@ export class ModelsCliUsecases {
       console.error('Model already exists');
       process.exit(1);
     }
-
-    if (modelId.includes('onnx') || modelId.includes('tensorrt')) {
-      await this.pullEngineModelFiles(modelId);
-    } else {
-      await this.pullGGUFModel(modelId);
-      const bar = new SingleBar({}, Presets.shades_classic);
-      bar.start(100, 0);
-      const callback = (progress: number) => {
-        bar.update(progress);
-      };
-
-      try {
-        await this.modelsUsecases.downloadModel(modelId, callback);
-
-        const model = await this.modelsUsecases.findOne(modelId);
-        const fileUrl = join(
-          await this.fileService.getModelsPath(),
-          normalizeModelId(modelId),
-          basename((model?.files as string[])[0]),
-        );
-        await this.modelsUsecases.update(modelId, {
-          files: [fileUrl],
-          name: modelId.replace(':default', ''),
-        });
-      } catch (err) {
-        bar.stop();
-        throw err;
-      }
-    }
-  }
-
-  /**
-   * It's to pull engine model files from HuggingFace repository
-   * @param modelId
-   */
-  private async pullEngineModelFiles(modelId: string) {
-    const modelsContainerDir = await this.fileService.getModelsPath();
-
-    if (!existsSync(modelsContainerDir)) {
-      mkdirSync(modelsContainerDir, { recursive: true });
-    }
-
-    const modelFolder = join(modelsContainerDir, normalizeModelId(modelId));
-    await promises.mkdir(modelFolder, { recursive: true }).catch(() => {});
-
-    const files = (await fetchJanRepoData(modelId)).siblings;
-    for (const file of files) {
-      console.log(`Downloading ${file.rfilename}`);
-      const bar = new SingleBar({}, Presets.shades_classic);
-      bar.start(100, 0);
-      const response = await firstValueFrom(
-        this.httpService.get(file.downloadUrl ?? '', {
-          responseType: 'stream',
-        }),
-      );
-      if (!response) {
-        throw new Error('Failed to download model');
-      }
-
-      await new Promise((resolve, reject) => {
-        const writer = createWriteStream(join(modelFolder, file.rfilename));
-        let receivedBytes = 0;
-        const totalBytes = response.headers['content-length'];
-
-        writer.on('finish', () => {
-          resolve(true);
-        });
-
-        writer.on('error', (error) => {
-          reject(error);
-        });
-
-        response.data.on('data', (chunk: any) => {
-          receivedBytes += chunk.length;
-          bar.update(Math.floor((receivedBytes / totalBytes) * 100));
-        });
-
-        response.data.pipe(writer);
+    await this.modelsUsecases.pullModel(modelId, true, (files) => {
+      return new Promise<HuggingFaceRepoSibling>(async (resolve) => {
+        const listChoices = files
+          .filter((e) => e.quantization != null)
+          .map((e) => {
+            return {
+              name: e.quantization,
+              value: e.quantization,
+            };
+          });
+
+        if (listChoices.length > 1) {
+          const { quantization } = await this.inquirerService.inquirer.prompt({
+            type: 'list',
+            name: 'quantization',
+            message: 'Select quantization',
+            choices: listChoices,
+          });
+          resolve(
+            files
+              .filter((e) => !!e.quantization)
+              .find((e: any) => e.quantization === quantization) ?? files[0],
+          );
+        } else {
+          resolve(files.find((e) => e.rfilename.includes('.gguf')) ?? files[0]);
+        }
       });
-      bar.stop();
-    }
-
-    const model: CreateModelDto = load(
-      readFileSync(join(modelFolder, 'model.yml'), 'utf-8'),
-    ) as CreateModelDto;
-    model.files = [join(modelFolder)];
-    model.model = modelId;
-
-    if (!(await this.modelsUsecases.findOne(modelId)))
-      await this.modelsUsecases.create(model);
-
-    if (model.engine === Engines.tensorrtLLM) {
-      if (process.platform === 'win32')
-        console.log(
-          'Please ensure that you install MPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work. Refs:\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisetup.exe\n- https://github.com/microsoft/Microsoft-MPI/releases/download/v10.1.1/msmpisdk.msi',
-        );
-      else if (process.platform === 'linux')
-        console.log(
-          'Please ensure that you install OpenMPI and its SDK to use the TensorRT engine, as it also requires the Cuda Toolkit 12.3 to work.\nYou can install OpenMPI by running "sudo apt update && sudo apt install openmpi-bin libopenmpi-dev"',
-        );
-    }
-  }
-  /**
-   * It's to pull model from HuggingFace repository
-   * It could be a model from Jan's repo or other authors
-   * @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b"
-   */
-  private async pullGGUFModel(modelId: string) {
-    const data: HuggingFaceRepoData =
-      await this.modelsUsecases.fetchModelMetadata(modelId);
-
-    let modelVersion;
-
-    const listChoices = data.siblings
-      .filter((e) => e.quantization != null)
-      .map((e) => {
-        return {
-          name: e.quantization,
-          value: e.quantization,
-        };
-      });
-
-    if (listChoices.length > 1) {
-      const { quantization } = await this.inquirerService.inquirer.prompt({
-        type: 'list',
-        name: 'quantization',
-        message: 'Select quantization',
-        choices: listChoices,
-      });
-      modelVersion = data.siblings
-        .filter((e) => !!e.quantization)
-        .find((e: any) => e.quantization === quantization);
-    } else {
-      modelVersion = data.siblings.find((e) => e.rfilename.includes('.gguf'));
-    }
-
-    if (!modelVersion) throw 'No expected quantization found';
-    const metadata = await getHFModelMetadata(modelVersion.downloadUrl!);
-
-    const promptTemplate = metadata?.promptTemplate ?? LLAMA_2;
-    const stopWords: string[] = [metadata?.stopWord ?? ''];
-
-    const model: CreateModelDto = {
-      files: [modelVersion.downloadUrl ?? ''],
-      model: modelId,
-      name: modelId,
-      prompt_template: promptTemplate,
-      stop: stopWords,
-
-      // Default Inference Params
-      stream: true,
-      max_tokens: 4098,
-      frequency_penalty: 0.7,
-      presence_penalty: 0.7,
-      temperature: 0.7,
-      top_p: 0.7,
-
-      // Default Model Settings
-      ctx_len: 4096,
-      ngl: 100,
-      engine: Engines.llamaCPP,
-    };
-    if (!(await this.modelsUsecases.findOne(modelId)))
-      await this.modelsUsecases.create(model);
+    });
   }
 
   /**