feat: pull model yaml from hf

namchuai · namchuai · commit d4b8a75b330b · 2024-05-25T22:47:53.000+07:00
diff --git a/cortex-js/package.json b/cortex-js/package.json
@@ -26,6 +26,7 @@
   },
   "dependencies": {
     "@huggingface/gguf": "^0.1.5",
+    "@huggingface/hub": "^0.15.1",
     "@nestjs/axios": "^3.0.2",
     "@nestjs/common": "^10.0.0",
     "@nestjs/config": "^3.2.2",
@@ -47,7 +48,8 @@
     "sqlite": "^5.1.1",
     "sqlite3": "^5.1.7",
     "typeorm": "^0.3.20",
-    "ulid": "^2.3.0"
+    "ulid": "^2.3.0",
+    "yaml": "^2.4.2"
   },
   "devDependencies": {
     "@nestjs/cli": "^10.0.0",
diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
@@ -1,25 +1,124 @@
-import { CommandRunner, SubCommand } from 'nest-commander';
+import { CommandRunner, InquirerService, SubCommand } from 'nest-commander';
 import { exit } from 'node:process';
 import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
+import { RepoDesignation, listFiles } from '@huggingface/hub';
+import { basename } from 'node:path';
 
 @SubCommand({
   name: 'pull',
   aliases: ['download'],
   description: 'Download a model. Working with HuggingFace model id.',
 })
 export class ModelPullCommand extends CommandRunner {
-  constructor(private readonly modelsCliUsecases: ModelsCliUsecases) {
+  private janHqModelPrefix = 'janhq';
+
+  constructor(
+    private readonly inquirerService: InquirerService,
+    private readonly modelsCliUsecases: ModelsCliUsecases,
+  ) {
     super();
   }
 
   async run(input: string[]) {
     if (input.length < 1) {
-      console.error('Model ID is required');
+      console.error('Model Id is required');
       exit(1);
     }
 
-    await this.modelsCliUsecases.pullModel(input[0]);
+    const branches = await this.tryToGetBranches(input[0]);
+
+    if (!branches) {
+      await this.modelsCliUsecases.pullModel(input[0]);
+    } else {
+      // if there's metadata.yaml file, we assumed it's a JanHQ model
+      await this.handleJanHqModel(input[0], branches);
+    }
+
     console.log('\nDownload complete!');
     exit(0);
   }
+
+  private async tryToGetBranches(input: string): Promise<any> {
+    try {
+      // try to append with janhq/ if it's not already
+      const sanitizedInput = input.trim().startsWith(this.janHqModelPrefix)
+        ? input
+        : `${this.janHqModelPrefix}/${input}`;
+
+      const repo: RepoDesignation = {
+        type: 'model',
+        name: sanitizedInput,
+      };
+
+      for await (const _fileInfo of listFiles({ repo })) {
+        break;
+      }
+
+      const response = await fetch(
+        `https://huggingface.co/api/models/${sanitizedInput}/refs`,
+      );
+      const data = await response.json();
+      const branches: string[] = data.branches.map((branch: any) => {
+        return branch.name;
+      });
+
+      return branches;
+    } catch (err) {
+      return undefined;
+    }
+  }
+
+  private async versionInquiry(tags: string[]): Promise<string> {
+    const { tag } = await this.inquirerService.inquirer.prompt({
+      type: 'list',
+      name: 'tag',
+      message: 'Select version',
+      choices: tags,
+    });
+
+    return tag;
+  }
+
+  private async handleJanHqModel(repoName: string, branches: string[]) {
+    const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix)
+      ? repoName
+      : `${this.janHqModelPrefix}/${repoName}`;
+
+    let selectedTag = branches[0];
+
+    if (branches.length > 1) {
+      selectedTag = await this.versionInquiry(branches);
+    }
+
+    const revision = selectedTag;
+    if (!revision) {
+      console.error("Can't find model revision.");
+      exit(1);
+    }
+
+    const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
+    let ggufUrl: string | undefined = undefined;
+    let fileSize = 0;
+    for await (const fileInfo of listFiles({
+      repo: repo,
+      revision: revision,
+    })) {
+      if (fileInfo.path.endsWith('.gguf')) {
+        ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
+        fileSize = fileInfo.size;
+        break;
+      }
+    }
+
+    if (!ggufUrl) {
+      console.error("Can't find model file.");
+      exit(1);
+    }
+    console.log('Downloading', basename(ggufUrl));
+    await this.modelsCliUsecases.pullModelWithExactUrl(
+      `${sanitizedRepoName}/${revision}`,
+      ggufUrl,
+      fileSize,
+    );
+  }
 }
diff --git a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts
@@ -0,0 +1,4 @@
+export interface ModelTokenizer {
+  stopWord?: string;
+  promptTemplate: string;
+}
diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts
@@ -6,6 +6,7 @@ import decompress from 'decompress';
 import { exit } from 'node:process';
 import { InitOptions } from '../types/init-options.interface';
 import { Injectable } from '@nestjs/common';
+import { firstValueFrom } from 'rxjs';
 
 @Injectable()
 export class InitCliUsecases {
@@ -19,17 +20,17 @@ export class InitCliUsecases {
     engineFileName: string,
     version: string = 'latest',
   ): Promise<any> => {
-    const res = await this.httpService
-      .get(
+    const res = await firstValueFrom(
+      this.httpService.get(
         this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`,
         {
           headers: {
             'X-GitHub-Api-Version': '2022-11-28',
             Accept: 'application/vnd.github+json',
           },
         },
-      )
-      .toPromise();
+      ),
+    );
 
     if (!res?.data) {
       console.log('Failed to fetch releases');
@@ -55,11 +56,11 @@ export class InitCliUsecases {
     const engineDir = resolve(this.rootDir(), 'cortex-cpp');
     if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });
 
-    const download = await this.httpService
-      .get(toDownloadAsset.browser_download_url, {
+    const download = await firstValueFrom(
+      this.httpService.get(toDownloadAsset.browser_download_url, {
         responseType: 'stream',
-      })
-      .toPromise();
+      }),
+    );
     if (!download) {
       console.log('Failed to download model');
       process.exit(1);
@@ -183,11 +184,11 @@ export class InitCliUsecases {
     ).replace('<platform>', platform);
     const destination = resolve(this.rootDir(), 'cuda-toolkit.tar.gz');
 
-    const download = await this.httpService
-      .get(url, {
+    const download = await firstValueFrom(
+      this.httpService.get(url, {
         responseType: 'stream',
-      })
-      .toPromise();
+      }),
+    );
 
     if (!download) {
       console.log('Failed to download dependency');
diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
@@ -21,6 +21,9 @@ import {
   ZEPHYR,
   ZEPHYR_JINJA,
 } from '../prompt-constants';
+import { ModelTokenizer } from '../types/model-tokenizer.interface';
+import { HttpService } from '@nestjs/axios';
+import { firstValueFrom } from 'rxjs';
 
 const AllQuantizations = [
   'Q3_K_S',
@@ -51,6 +54,7 @@ export class ModelsCliUsecases {
     private readonly modelsUsecases: ModelsUsecases,
     @Inject(InquirerService)
     private readonly inquirerService: InquirerService,
+    private readonly httpService: HttpService,
   ) {}
 
   /**
@@ -139,6 +143,47 @@ export class ModelsCliUsecases {
     return this.modelsUsecases.remove(modelId);
   }
 
+  async pullModelWithExactUrl(modelId: string, url: string, fileSize: number) {
+    const tokenizer = await this.getHFModelTokenizer(url);
+    const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2;
+    const stopWords: string[] = [tokenizer?.stopWord ?? ''];
+
+    const model: CreateModelDto = {
+      sources: [
+        {
+          url: url,
+        },
+      ],
+      id: modelId,
+      name: modelId,
+      version: '',
+      format: ModelFormat.GGUF,
+      description: '',
+      settings: {
+        prompt_template: promptTemplate,
+      },
+      parameters: {
+        stop: stopWords,
+      },
+      metadata: {
+        author: 'janhq',
+        size: fileSize,
+        tags: [],
+      },
+      engine: 'cortex',
+    };
+    if (!(await this.modelsUsecases.findOne(modelId))) {
+      await this.modelsUsecases.create(model);
+    }
+
+    const bar = new SingleBar({}, Presets.shades_classic);
+    bar.start(100, 0);
+    const callback = (progress: number) => {
+      bar.update(progress);
+    };
+    await this.modelsUsecases.downloadModel(modelId, callback);
+  }
+
   /**
    * Pull model from Model repository (HF, Jan...)
    * @param modelId
@@ -155,6 +200,30 @@ export class ModelsCliUsecases {
     await this.modelsUsecases.downloadModel(modelId, callback);
   }
 
+  private async getHFModelTokenizer(
+    ggufUrl: string,
+  ): Promise<ModelTokenizer | undefined> {
+    try {
+      const { metadata } = await gguf(ggufUrl);
+      // @ts-expect-error "tokenizer.ggml.eos_token_id"
+      const index = metadata['tokenizer.ggml.eos_token_id'];
+      // @ts-expect-error "tokenizer.ggml.eos_token_id"
+      const hfChatTemplate = metadata['tokenizer.chat_template'];
+      const promptTemplate =
+        this.guessPromptTemplateFromHuggingFace(hfChatTemplate);
+      // @ts-expect-error "tokenizer.ggml.tokens"
+      const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? '';
+
+      return {
+        stopWord,
+        promptTemplate,
+      };
+    } catch (err) {
+      console.log('Failed to get model metadata:', err);
+      return undefined;
+    }
+  }
+
   //// PRIVATE METHODS ////
 
   /**
@@ -193,26 +262,10 @@ export class ModelsCliUsecases {
       sibling = data.siblings.find((e) => e.rfilename.includes('.gguf'));
     }
     if (!sibling) throw 'No expected quantization found';
+    const tokenizer = await this.getHFModelTokenizer(sibling.downloadUrl!);
 
-    let stopWord = '';
-    let promptTemplate = LLAMA_2;
-
-    try {
-      const { metadata } = await gguf(sibling.downloadUrl!);
-      // @ts-expect-error "tokenizer.ggml.eos_token_id"
-      const index = metadata['tokenizer.ggml.eos_token_id'];
-      // @ts-expect-error "tokenizer.ggml.eos_token_id"
-      const hfChatTemplate = metadata['tokenizer.chat_template'];
-      promptTemplate = this.guessPromptTemplateFromHuggingFace(hfChatTemplate);
-
-      // @ts-expect-error "tokenizer.ggml.tokens"
-      stopWord = metadata['tokenizer.ggml.tokens'][index] ?? '';
-    } catch (err) {}
-
-    const stopWords: string[] = [];
-    if (stopWord.length > 0) {
-      stopWords.push(stopWord);
-    }
+    const promptTemplate = tokenizer?.promptTemplate ?? LLAMA_2;
+    const stopWords: string[] = [tokenizer?.stopWord ?? ''];
 
     const model: CreateModelDto = {
       sources: [
@@ -343,8 +396,8 @@ export class ModelsCliUsecases {
   private async fetchHuggingFaceRepoData(repoId: string) {
     const sanitizedUrl = this.getRepoModelsUrl(repoId);
 
-    const res = await fetch(sanitizedUrl);
-    const response = await res.json();
+    const res = await firstValueFrom(this.httpService.get(sanitizedUrl));
+    const response = res.data;
     if (response['error'] != null) {
       throw new Error(response['error']);
     }
diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
@@ -7,6 +7,7 @@ import { HttpService } from '@nestjs/axios';
 import { defaultCortexCppHost, defaultCortexCppPort } from 'constant';
 import { readdirSync } from 'node:fs';
 import { normalizeModelId } from '@/infrastructure/commanders/utils/normalize-model-id';
+import { firstValueFrom } from 'rxjs';
 
 /**
  * A class that implements the InferenceExtension interface from the @janhq/core package.
@@ -72,13 +73,15 @@ export default class CortexProvider extends OAIEngineExtension {
       modelSettings.ai_prompt = prompt.ai_prompt;
     }
 
-    await this.httpService.post(this.loadModelUrl, modelSettings).toPromise();
+    await firstValueFrom(
+      this.httpService.post(this.loadModelUrl, modelSettings),
+    );
   }
 
   override async unloadModel(modelId: string): Promise<void> {
-    await this.httpService
-      .post(this.unloadModelUrl, { model: modelId })
-      .toPromise();
+    await firstValueFrom(
+      this.httpService.post(this.unloadModelUrl, { model: modelId }),
+    );
   }
 
   private readonly promptTemplateConverter = (
diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts