diff --git a/cortex-js/.eslintrc.cjs b/cortex-js/.eslintrc.cjs
new file mode 100644
index 000000000..f116c675b
--- /dev/null
+++ b/cortex-js/.eslintrc.cjs
@@ -0,0 +1,46 @@
+module.exports = {
+  parser: '@typescript-eslint/parser',
+  parserOptions: {
+    project: 'tsconfig.json',
+    tsconfigRootDir: __dirname,
+    sourceType: 'module',
+  },
+  plugins: ['@typescript-eslint/eslint-plugin'],
+  extends: [
+    'plugin:@typescript-eslint/recommended',
+    'plugin:prettier/recommended',
+  ],
+  root: true,
+  env: {
+    node: true,
+    jest: true,
+  },
+  ignorePatterns: ['.eslintrc.js'],
+  rules: {
+    '@typescript-eslint/interface-name-prefix': 'off',
+    '@typescript-eslint/explicit-function-return-type': 'off',
+    '@typescript-eslint/explicit-module-boundary-types': 'off',
+    '@typescript-eslint/no-explicit-any': 'off',
+    '@typescript-eslint/no-unused-vars': ['warn'],
+    '@typescript-eslint/no-floating-promises': 'warn',
+    '@typescript-eslint/no-var-requires': 'warn',
+    '@typescript-eslint/ban-types': 'warn',
+    'no-unused-vars': 'off',
+    'require-await': 'off',
+    'prefer-const': 'warn',
+    'no-restricted-syntax': [
+      'warn',
+      {
+        selector:
+          'CallExpression[callee.object.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true])), CallExpression[callee.object.property.name=configService][callee.property.name=/^(get|getOrThrow)$/]:not(:has([arguments.1] Property[key.name=infer][value.value=true]))',
+        message:
+          'Add "{ infer: true }" to configService.get() for correct typechecking. Example: configService.get("database.port", { infer: true })',
+      },
+      {
+        selector:
+          'CallExpression[callee.name=it][arguments.0.value!=/^should/]',
+        message: '"it" should start with "should"',
+      },
+    ],
+  },
+};
diff --git a/cortex-js/.eslintrc.js b/cortex-js/.eslintrc.js
deleted file mode 100644
index 448a2d910..000000000
--- a/cortex-js/.eslintrc.js
+++ /dev/null
@@ -1,31 +0,0 @@
-module.exports = {
-  parser: '@typescript-eslint/parser',
-  parserOptions: {
-    project: 'tsconfig.json',
-    tsconfigRootDir: __dirname,
-    sourceType: 'module',
-  },
-  plugins: ['@typescript-eslint/eslint-plugin'],
-  extends: [
-    'plugin:@typescript-eslint/recommended',
-    'plugin:prettier/recommended',
-  ],
-  root: true,
-  env: {
-    node: true,
-    jest: true,
-  },
-  ignorePatterns: ['.eslintrc.js'],
-  rules: {
-    '@typescript-eslint/interface-name-prefix': 'off',
-    '@typescript-eslint/explicit-function-return-type': 'off',
-    '@typescript-eslint/explicit-module-boundary-types': 'off',
-    '@typescript-eslint/no-explicit-any': 'off',
-    "prettier/prettier": [
-      "error",
-      {
-        "endOfLine": "auto"
-      },
-    ],
-  },
-};
diff --git a/cortex-js/package.json b/cortex-js/package.json
index 52214d2e0..2ecd7fdf9 100644
--- a/cortex-js/package.json
+++ b/cortex-js/package.json
@@ -24,7 +24,7 @@
     "start:dev": "nest start --watch",
     "start:debug": "nest start --debug --watch",
     "start:prod": "node dist/src/main --trace-deprecation",
-    "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix",
+    "lint": "eslint \"{src,apps,libs,test}/**/*.ts\"",
     "test": "jest",
     "test:watch": "jest --watch",
     "test:cov": "jest --coverage",
@@ -39,8 +39,6 @@
   },
   "dependencies": {
     "@cortexso/cortex.js": "^0.1.5",
-    "@huggingface/gguf": "^0.1.5",
-    "@huggingface/hub": "^0.15.1",
     "@nestjs/axios": "^3.0.2",
     "@nestjs/common": "^10.0.0",
     "@nestjs/config": "^3.2.2",
@@ -59,6 +57,7 @@
     "cortex-cpp": "0.4.34",
     "cpu-instructions": "^0.0.11",
     "decompress": "^4.2.1",
+    "hyllama": "^0.2.2",
     "js-yaml": "^4.1.0",
     "nest-commander": "^3.13.0",
     "ora": "5.4.1",
@@ -88,14 +87,16 @@
     "@types/supertest": "^6.0.2",
     "@types/update-notifier": "^6.0.8",
     "@types/uuid": "^9.0.8",
-    "@typescript-eslint/eslint-plugin": "^6.0.0",
-    "@typescript-eslint/parser": "^6.0.0",
+    "@typescript-eslint/eslint-plugin": "7.16.1",
+    "@typescript-eslint/parser": "7.16.1",
     "@vercel/ncc": "^0.38.0",
     "@yao-pkg/pkg": "^5.12.0",
     "cpx": "^1.5.0",
-    "eslint": "^8.42.0",
-    "eslint-config-prettier": "^9.0.0",
-    "eslint-plugin-prettier": "^5.0.0",
+    "env-cmd": "10.1.0",
+    "eslint": "8.57.0",
+    "eslint-config-prettier": "9.1.0",
+    "eslint-plugin-import": "2.29.1",
+    "eslint-plugin-prettier": "5.2.1",
     "hanbi": "^1.0.3",
     "is-primitive": "^3.0.1",
     "jest": "^29.5.0",
@@ -112,10 +113,6 @@
     "tsconfig-paths": "^4.2.0",
     "typescript": "^5.1.3"
   },
-  "resolutions": {
-    "ajv": "8.15.0",
-    "whatwg-url": "14.0.0"
-  },
   "files": [
     "dist"
   ],
diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts
index 2b5adda53..260418d89 100644
--- a/cortex-js/src/infrastructure/commanders/chat.command.ts
+++ b/cortex-js/src/infrastructure/commanders/chat.command.ts
@@ -93,7 +93,7 @@ export class ChatCommand extends BaseCommand {
     ) {
       console.log('Downloading engine...');
       await this.cortex.engines.init(engine);
-      await downloadProgress(this.cortex, undefined, DownloadType.Engine)
+      await downloadProgress(this.cortex, undefined, DownloadType.Engine);
     }
 
     if (!message) options.attach = true;
@@ -108,13 +108,13 @@ export class ChatCommand extends BaseCommand {
     );
 
     const preset = await this.fileService.getPreset(options.preset);
-      
+
     return this.chatClient.chat(
-        modelId,
-        options.threadId,
-        message, // Accept both message from inputs or arguments
-        preset ? preset : {},
-      )
+      modelId,
+      options.threadId,
+      message, // Accept both message from inputs or arguments
+      preset ? preset : {},
+    );
   }
 
   modelInquiry = async (models: Cortex.Model[]) => {
diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts
index 563f08214..5afe01ca7 100644
--- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts
+++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts
@@ -84,10 +84,7 @@ export class CortexCommand extends CommandRunner {
     return this.startServer(showLogs, dataFolderPath);
   }
 
-  private async startServer(
-    attach: boolean,
-    dataFolderPath?: string,
-  ) {
+  private async startServer(attach: boolean, dataFolderPath?: string) {
     const config = await this.fileManagerService.getConfig();
     try {
       const startEngineSpinner = ora('Starting Cortex engine...');
@@ -119,9 +116,13 @@ export class CortexCommand extends CommandRunner {
       } else {
         await this.cortexUseCases.startServerDetached(this.host, this.port);
       }
-      console.log(chalk.blue(`Started server at http://${this.host}:${this.port}`));
       console.log(
-        chalk.blue(`API Playground available at http://${this.host}:${this.port}/api`),
+        chalk.blue(`Started server at http://${this.host}:${this.port}`),
+      );
+      console.log(
+        chalk.blue(
+          `API Playground available at http://${this.host}:${this.port}/api`,
+        ),
       );
       await this.fileManagerService.writeConfigFile({
         ...config,
diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
index 14940e1c7..5e1a4a812 100644
--- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts
@@ -19,6 +19,7 @@ import { downloadProgress } from '@/utils/download-progress';
 import { CortexClient } from '../services/cortex.client';
 import { DownloadType } from '@/domain/models/download.interface';
 import ora from 'ora';
+import { isLocalFile } from '@/utils/urls';
 
 @SubCommand({
   name: 'pull',
@@ -61,9 +62,8 @@ export class ModelPullCommand extends BaseCommand {
       exit(1);
     });
 
-    ora().succeed('Model downloaded');
-
     await downloadProgress(this.cortex, modelId);
+    ora().succeed('Model downloaded');
 
     const existingModel = await this.cortex.models.retrieve(modelId);
     const engine = existingModel?.engine || Engines.llamaCPP;
diff --git a/cortex-js/src/infrastructure/commanders/run.command.ts b/cortex-js/src/infrastructure/commanders/run.command.ts
index 45416847f..086e6f701 100644
--- a/cortex-js/src/infrastructure/commanders/run.command.ts
+++ b/cortex-js/src/infrastructure/commanders/run.command.ts
@@ -13,6 +13,8 @@ import { ChatClient } from './services/chat-client';
 import { downloadProgress } from '@/utils/download-progress';
 import { CortexClient } from './services/cortex.client';
 import { DownloadType } from '@/domain/models/download.interface';
+import { isLocalFile } from '@/utils/urls';
+import { parse } from 'node:path';
 
 type RunOptions = {
   threadId?: string;
@@ -71,6 +73,12 @@ export class RunCommand extends BaseCommand {
       await downloadProgress(this.cortex, modelId);
       checkingSpinner.succeed('Model downloaded');
 
+      // Update to persisted modelId
+      // TODO: Should be retrieved from the request
+      if (isLocalFile(modelId)) {
+        modelId = parse(modelId).name;
+      }
+
       // Second check if model is available
       existingModel = await this.cortex.models.retrieve(modelId);
       if (!existingModel) {
@@ -93,6 +101,7 @@ export class RunCommand extends BaseCommand {
     }
 
     const startingSpinner = ora('Loading model...').start();
+
     return this.cortex.models
       .start(modelId, await this.fileService.getPreset(options.preset))
       .then(() => {
diff --git a/cortex-js/src/infrastructure/commanders/services/chat-client.ts b/cortex-js/src/infrastructure/commanders/services/chat-client.ts
index 7da01e3e7..f617f83bc 100644
--- a/cortex-js/src/infrastructure/commanders/services/chat-client.ts
+++ b/cortex-js/src/infrastructure/commanders/services/chat-client.ts
@@ -55,7 +55,14 @@ export class ChatClient {
     });
 
     rl.on('line', (input) =>
-      this.sendCompletionMessage(input, messages, modelId, thread.id, rl, settings),
+      this.sendCompletionMessage(
+        input,
+        messages,
+        modelId,
+        thread.id,
+        rl,
+        settings,
+      ),
     );
   }
 
diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts
index d6829e0c4..83259eb8b 100644
--- a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts
+++ b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts
@@ -1,149 +1,146 @@
-import { TestingModule } from '@nestjs/testing';
-import { spy, Stub, stubMethod } from 'hanbi';
-import { CommandTestFactory } from 'nest-commander-testing';
-import { CommandModule } from '@/command.module';
-import { LogService } from '@/infrastructure/commanders/test/log.service';
-import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
-
-import { join } from 'path';
-import { rmSync } from 'fs';
-import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
-
-let commandInstance: TestingModule,
-  exitSpy: Stub<typeof process.exit>,
-  stdoutSpy: Stub<typeof process.stdout.write>,
-  stderrSpy: Stub<typeof process.stderr.write>;
+// import { TestingModule } from '@nestjs/testing';
+// import { spy, Stub, stubMethod } from 'hanbi';
+// import { CommandTestFactory } from 'nest-commander-testing';
+// import { CommandModule } from '@/command.module';
+// import { LogService } from '@/infrastructure/commanders/test/log.service';
+// import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
+
+// import { join } from 'path';
+// import { rmSync } from 'fs';
+// import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
+
+// let commandInstance: TestingModule,
+//   exitSpy: Stub<typeof process.exit>,
+//   stdoutSpy: Stub<typeof process.stdout.write>,
+//   stderrSpy: Stub<typeof process.stderr.write>;
 export const timeout = 500000;
 
-beforeAll(
-  () =>
-    new Promise<void>(async (res) => {
-      stubMethod(process.stderr, 'write');
-      exitSpy = stubMethod(process, 'exit');
-      stdoutSpy = stubMethod(process.stdout, 'write');
-      stderrSpy = stubMethod(process.stderr, 'write');
-      commandInstance = await CommandTestFactory.createTestingCommand({
-        imports: [CommandModule],
-      })
-        .overrideProvider(LogService)
-        .useValue({ log: spy().handler })
-        .compile();
-
-      const fileService =
-        await commandInstance.resolve<FileManagerService>(FileManagerService);
-
-      // Attempt to create test folder
-      await fileService.writeConfigFile({
-        dataFolderPath: join(__dirname, 'test_data'),
-        cortexCppHost: 'localhost',
-        cortexCppPort: 3929,
-      });
-      const cortexUseCases =
-        await commandInstance.resolve<CortexUsecases>(CortexUsecases);
-      jest
-        .spyOn(cortexUseCases, 'isAPIServerOnline')
-        .mockImplementation(() => Promise.resolve(true));
-      res();
-    }),
-);
-
-afterEach(() => {
-  stdoutSpy.reset();
-  stderrSpy.reset();
-  exitSpy.reset();
-});
-
-afterAll(
-  () =>
-    new Promise<void>(async (res) => {
-      // Attempt to clean test folder
-      rmSync(join(__dirname, 'test_data'), {
-        recursive: true,
-        force: true,
-      });
-      res();
-    }),
-);
+// beforeAll(
+//   () =>
+//     new Promise<void>(async (res) => {
+//       stubMethod(process.stderr, 'write');
+//       exitSpy = stubMethod(process, 'exit');
+//       stdoutSpy = stubMethod(process.stdout, 'write');
+//       stderrSpy = stubMethod(process.stderr, 'write');
+//       commandInstance = await CommandTestFactory.createTestingCommand({
+//         imports: [CommandModule],
+//       })
+//         .overrideProvider(LogService)
+//         .useValue({ log: spy().handler })
+//         .compile();
+
+//       const fileService =
+//         await commandInstance.resolve<FileManagerService>(FileManagerService);
+
+//       // Attempt to create test folder
+//       await fileService.writeConfigFile({
+//         dataFolderPath: join(__dirname, 'test_data'),
+//         cortexCppHost: 'localhost',
+//         cortexCppPort: 3929,
+//       });
+//       const cortexUseCases =
+//         await commandInstance.resolve<CortexUsecases>(CortexUsecases);
+//       jest
+//         .spyOn(cortexUseCases, 'isAPIServerOnline')
+//         .mockImplementation(() => Promise.resolve(true));
+//       res();
+//     }),
+// );
+
+// afterEach(() => {
+//   stdoutSpy.reset();
+//   stderrSpy.reset();
+//   exitSpy.reset();
+// });
+
+// afterAll(
+//   () =>
+//     new Promise<void>(async (res) => {
+//       // Attempt to clean test folder
+//       rmSync(join(__dirname, 'test_data'), {
+//         recursive: true,
+//         force: true,
+//       });
+//       res();
+//     }),
+// );
 
 describe('Helper commands', () => {
-  // test(
-  //   'Init with hardware auto detection',
-  //   async () => {
-  //     await CommandTestFactory.run(commandInstance, ['init', '-s']);
-  //
-  //     // Wait for a brief period to allow the command to execute
-  //     await new Promise((resolve) => setTimeout(resolve, 1000));
-  //
-  //     expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0);
-  //   },
-  //   timeout,
-  // );
-
-  // test('Chat with option -m', async () => {
-  //   const logMock = stubMethod(console, 'log');
-  //
-  //   await CommandTestFactory.run(commandInstance, [
-  //     'chat',
-  //     // '-m',
-  //     // 'hello',
-  //     // '>output.txt',
-  //   ]);
-  //   expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'.");
-  //   // expect(exitSpy.callCount).toBe(1);
-  //   // expect(exitSpy.firstCall?.args[0]).toBe(1);
-  // });
-
   test(
-    'Show stop running models',
+    'Init with hardware auto detection',
     async () => {
-      // const tableMock = stubMethod(console, 'table');
-
-      // const logMock = stubMethod(console, 'log');
-      // await CommandTestFactory.run(commandInstance, ['stop']);
-      // await CommandTestFactory.run(commandInstance, ['ps']);
-
-      // expect(logMock.firstCall?.args[0]).toEqual('API server stopped');
-      // expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array);
-      // expect(tableMock.firstCall?.args[0].length).toEqual(0);
+      //   //     await CommandTestFactory.run(commandInstance, ['init', '-s']);
+      //   //
+      //   //     // Wait for a brief period to allow the command to execute
+      //   //     await new Promise((resolve) => setTimeout(resolve, 1000));
+      //   //
+      //   //     expect(stdoutSpy.firstCall?.args.length).toBeGreaterThan(0);
     },
     timeout,
   );
 
-  test('Help command return guideline to users', async () => {
-    // await CommandTestFactory.run(commandInstance, ['-h']);
-    // expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array);
-    // expect(stdoutSpy.firstCall?.args.length).toBe(1);
-    // expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command');
-
-    // expect(exitSpy.callCount).toBeGreaterThan(1);
-    // expect(exitSpy.firstCall?.args[0]).toBe(0);
-  });
-
-  test('Should handle missing command', async () => {
-    // await CommandTestFactory.run(commandInstance, ['--unknown']);
-    // expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option');
-    // expect(stderrSpy.firstCall?.args[0]).toContain('--unknown');
-    // expect(exitSpy.callCount).toBeGreaterThan(0);
-    // expect(exitSpy.firstCall?.args[0]).toBe(1);
-  });
-
-  // test('Local API server via default host/port localhost:1337/api', async () => {
-  //   await CommandTestFactory.run(commandInstance, ['serve', '--detach']);
-  //
-  //   await new Promise((resolve) => setTimeout(resolve, 2000));
-  //
-  //   expect(stdoutSpy.firstCall?.args[0]).toContain(
-  //     'Started server at http://localhost:1337',
-  //   );
-  //   // Add a delay
-  //   // Temporally disable for further investigation
-  //   return new Promise<void>(async (resolve) => {
-  //     setTimeout(async () => {
-  //       // Send a request to the API server to check if it's running
-  //       const response = await axios.get('http://localhost:1337/api');
-  //       expect(response.status).toBe(200);
-  //       resolve();
-  //     }, 5000);
-  //   });
-  // }, 15000);
+  //   // test('Chat with option -m', async () => {
+  //   //   const logMock = stubMethod(console, 'log');
+  //   //
+  //   //   await CommandTestFactory.run(commandInstance, [
+  //   //     'chat',
+  //   //     // '-m',
+  //   //     // 'hello',
+  //   //     // '>output.txt',
+  //   //   ]);
+  //   //   expect(logMock.firstCall?.args[0]).toBe("Inorder to exit, type 'exit()'.");
+  //   //   // expect(exitSpy.callCount).toBe(1);
+  //   //   // expect(exitSpy.firstCall?.args[0]).toBe(1);
 });
+
+//   test(
+//     'Show stop running models',
+//     async () => {
+//       // const tableMock = stubMethod(console, 'table');
+//       // const logMock = stubMethod(console, 'log');
+//       // await CommandTestFactory.run(commandInstance, ['stop']);
+//       // await CommandTestFactory.run(commandInstance, ['ps']);
+//       // expect(logMock.firstCall?.args[0]).toEqual('API server stopped');
+//       // expect(tableMock.firstCall?.args[0]).toBeInstanceOf(Array);
+//       // expect(tableMock.firstCall?.args[0].length).toEqual(0);
+//     },
+//     timeout,
+//   );
+
+//   test('Help command return guideline to users', async () => {
+//     // await CommandTestFactory.run(commandInstance, ['-h']);
+//     // expect(stdoutSpy.firstCall?.args).toBeInstanceOf(Array);
+//     // expect(stdoutSpy.firstCall?.args.length).toBe(1);
+//     // expect(stdoutSpy.firstCall?.args[0]).toContain('display help for command');
+//     // expect(exitSpy.callCount).toBeGreaterThan(1);
+//     // expect(exitSpy.firstCall?.args[0]).toBe(0);
+//   });
+
+//   test('Should handle missing command', async () => {
+//     // await CommandTestFactory.run(commandInstance, ['--unknown']);
+//     // expect(stderrSpy.firstCall?.args[0]).toContain('error: unknown option');
+//     // expect(stderrSpy.firstCall?.args[0]).toContain('--unknown');
+//     // expect(exitSpy.callCount).toBeGreaterThan(0);
+//     // expect(exitSpy.firstCall?.args[0]).toBe(1);
+//   });
+
+//   // test('Local API server via default host/port localhost:1337/api', async () => {
+//   //   await CommandTestFactory.run(commandInstance, ['serve', '--detach']);
+//   //
+//   //   await new Promise((resolve) => setTimeout(resolve, 2000));
+//   //
+//   //   expect(stdoutSpy.firstCall?.args[0]).toContain(
+//   //     'Started server at http://localhost:1337',
+//   //   );
+//   //   // Add a delay
+//   //   // Temporally disable for further investigation
+//   //   return new Promise<void>(async (resolve) => {
+//   //     setTimeout(async () => {
+//   //       // Send a request to the API server to check if it's running
+//   //       const response = await axios.get('http://localhost:1337/api');
+//   //       expect(response.status).toBe(200);
+//   //       resolve();
+//   //     }, 5000);
+//   //   });
+//   // }, 15000);
+// });
diff --git a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts
index a1c6fa0e2..27b655cd6 100644
--- a/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts
+++ b/cortex-js/src/infrastructure/commanders/types/model-tokenizer.interface.ts
@@ -2,4 +2,5 @@ export interface ModelMetadata {
   stopWord?: string;
   promptTemplate: string;
   version: number;
+  name?: string;
 }
diff --git a/cortex-js/src/infrastructure/constants/benchmark.ts b/cortex-js/src/infrastructure/constants/benchmark.ts
index 4c253d46f..477c4eba3 100644
--- a/cortex-js/src/infrastructure/constants/benchmark.ts
+++ b/cortex-js/src/infrastructure/constants/benchmark.ts
@@ -28,7 +28,7 @@ export const defaultBenchmarkConfiguration: BenchmarkConfig = {
     min: 1024,
     max: 2048,
     samples: 10,
-  },  
+  },
   output: 'table',
   hardware: ['cpu', 'gpu', 'psu', 'chassis', 'ram'],
   concurrency: 1,
diff --git a/cortex-js/src/infrastructure/controllers/engines.controller.ts b/cortex-js/src/infrastructure/controllers/engines.controller.ts
index 2aa770aab..0a25f74e0 100644
--- a/cortex-js/src/infrastructure/controllers/engines.controller.ts
+++ b/cortex-js/src/infrastructure/controllers/engines.controller.ts
@@ -80,15 +80,19 @@ export class EnginesController {
     description: 'The unique identifier of the engine.',
   })
   @Post(':name(*)/init')
-  initialize(@Param('name') name: string, @Body() body: InitEngineDto | undefined, @Res() res: Response) {
-    try{
-    this.initUsescases.installEngine(body, name, true);
-    res.json({
+  initialize(
+    @Param('name') name: string,
+    @Body() body: InitEngineDto | undefined,
+    @Res() res: Response,
+  ) {
+    try {
+      this.initUsescases.installEngine(body, name, true);
+      res.json({
         message: 'Engine initialization started successfully.',
-    })
-  } catch (error) {
-    res.status(400).send(error.message);
-  }
+      });
+    } catch (error) {
+      res.status(400).send(error.message);
+    }
   }
 
   @HttpCode(200)
diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts
index 71933d005..dc0043166 100644
--- a/cortex-js/src/infrastructure/controllers/models.controller.ts
+++ b/cortex-js/src/infrastructure/controllers/models.controller.ts
@@ -138,10 +138,11 @@ export class ModelsController {
   @Post(':modelId(*)/pull')
   pullModel(
     @Param('modelId') modelId: string,
-    @Body() body?: {
+    @Body()
+    body?: {
       fileName?: string;
       persistedModelId?: string;
-    }
+    },
   ) {
     const { fileName, persistedModelId } = body || {};
     this.modelsUsecases
diff --git a/cortex-js/src/infrastructure/controllers/system.controller.ts b/cortex-js/src/infrastructure/controllers/system.controller.ts
index f4d73eed3..da16ae38c 100644
--- a/cortex-js/src/infrastructure/controllers/system.controller.ts
+++ b/cortex-js/src/infrastructure/controllers/system.controller.ts
@@ -51,7 +51,7 @@ export class SystemController {
   }
 
   @ApiOperation({
-    summary: "Get health status",
+    summary: 'Get health status',
     description: "Retrieves the health status of your Cortex's system.",
   })
   @HttpCode(200)
diff --git a/cortex-js/src/infrastructure/database/providers/assistant.providers.ts b/cortex-js/src/infrastructure/database/providers/assistant.providers.ts
index 02ed0c90d..7a7dc9462 100644
--- a/cortex-js/src/infrastructure/database/providers/assistant.providers.ts
+++ b/cortex-js/src/infrastructure/database/providers/assistant.providers.ts
@@ -4,7 +4,7 @@ import { Sequelize } from 'sequelize-typescript';
 export const assistantProviders = [
   {
     provide: 'ASSISTANT_REPOSITORY',
-    useFactory: async(sequelize: Sequelize) =>{
+    useFactory: async (sequelize: Sequelize) => {
       return sequelize.getRepository(AssistantEntity);
     },
     inject: ['DATA_SOURCE'],
diff --git a/cortex-js/src/infrastructure/database/providers/message.providers.ts b/cortex-js/src/infrastructure/database/providers/message.providers.ts
index aa6cc9261..9b1b50227 100644
--- a/cortex-js/src/infrastructure/database/providers/message.providers.ts
+++ b/cortex-js/src/infrastructure/database/providers/message.providers.ts
@@ -1,10 +1,10 @@
-import { MessageEntity } from "@/infrastructure/entities/message.entity";
-import { Sequelize } from "sequelize-typescript";
+import { MessageEntity } from '@/infrastructure/entities/message.entity';
+import { Sequelize } from 'sequelize-typescript';
 
 export const messageProviders = [
   {
     provide: 'MESSAGE_REPOSITORY',
-    useFactory: async(sequelize: Sequelize) =>{
+    useFactory: async (sequelize: Sequelize) => {
       return sequelize.getRepository(MessageEntity);
     },
     inject: ['DATA_SOURCE'],
diff --git a/cortex-js/src/infrastructure/database/providers/thread.providers.ts b/cortex-js/src/infrastructure/database/providers/thread.providers.ts
index 8f91cb9a7..0db54a6dd 100644
--- a/cortex-js/src/infrastructure/database/providers/thread.providers.ts
+++ b/cortex-js/src/infrastructure/database/providers/thread.providers.ts
@@ -4,10 +4,9 @@ import { Sequelize } from 'sequelize-typescript';
 export const threadProviders = [
   {
     provide: 'THREAD_REPOSITORY',
-    useFactory: async(sequelize: Sequelize) =>{
+    useFactory: async (sequelize: Sequelize) => {
       return sequelize.getRepository(ThreadEntity);
     },
     inject: ['DATA_SOURCE'],
   },
 ];
-
diff --git a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts b/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts
index 2e3ed859d..19fd17999 100644
--- a/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/chat/chat-completion-message.dto.ts
@@ -2,13 +2,13 @@ import { IsString } from 'class-validator';
 import { ApiProperty } from '@nestjs/swagger';
 
 export class ChatCompletionMessage {
-  @ApiProperty({ description: 'The Content of the chat message.',  })
+  @ApiProperty({ description: 'The Content of the chat message.' })
   @IsString()
   content: string;
 
   @ApiProperty({
     description: 'The role of the entity in the chat completion.',
-    example: 'user'
+    example: 'user',
   })
   role: 'user' | 'assistant';
 }
diff --git a/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts b/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts
index d7d421e3b..07b2f429c 100644
--- a/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/chat/create-chat-completion.dto.ts
@@ -30,7 +30,7 @@ export class CreateChatCompletionDto {
   @ApiProperty({
     description:
       'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.',
-      example: true,
+    example: true,
   })
   @IsOptional()
   @IsBoolean()
@@ -39,7 +39,7 @@ export class CreateChatCompletionDto {
   @ApiProperty({
     description:
       'Sets the upper limit on the number of tokens the model can generate in a single output.',
-      example: 4096,
+    example: 4096,
   })
   @IsOptional()
   @IsNumber()
@@ -48,7 +48,7 @@ export class CreateChatCompletionDto {
   @ApiProperty({
     description:
       'Defines specific tokens or phrases that signal the model to stop producing further output.',
-      example: ["End"],
+    example: ['End'],
   })
   @IsOptional()
   @IsArray()
@@ -57,7 +57,7 @@ export class CreateChatCompletionDto {
   @ApiProperty({
     description:
       'Modifies the likelihood of the model repeating the same words or phrases within a single output.',
-      example: 0.2,
+    example: 0.2,
   })
   @IsOptional()
   @IsNumber()
@@ -66,7 +66,7 @@ export class CreateChatCompletionDto {
   @ApiProperty({
     description:
       'Reduces the likelihood of repeating tokens, promoting novelty in the output.',
-      example: 0.6,
+    example: 0.6,
   })
   @IsOptional()
   @IsNumber()
diff --git a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts
index 3d7cf65b3..dac89dcfc 100644
--- a/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/chat/embeddings-response.dto.ts
@@ -15,13 +15,15 @@ export class EmbeddingsResponseDto {
   model: string;
 
   @ApiProperty({
-    description: 'The embedding vector represented as an array of floating-point numbers. ',
+    description:
+      'The embedding vector represented as an array of floating-point numbers. ',
     type: [Number],
   })
   embedding: [number];
 
   @ApiProperty({
-    description: 'Details of token usage, including prompt_tokens and total_tokens.',
+    description:
+      'Details of token usage, including prompt_tokens and total_tokens.',
     type: UsageDto,
   })
   usage: UsageDto;
diff --git a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts b/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts
index 6c81f10c9..a99ad86b6 100644
--- a/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts
@@ -28,8 +28,8 @@ export class CreateMessageDto implements Partial<Message> {
     example: [
       {
         type: 'text',
-        data: 'Hello, how can I help you today?'
-      }
+        data: 'Hello, how can I help you today?',
+      },
     ],
     description: 'The content of the messages.',
   })
@@ -44,7 +44,8 @@ export class CreateMessageDto implements Partial<Message> {
 
   @ApiProperty({
     example: { urgency: 'high', tags: ['customer_support'] },
-    description: 'Optional dictionary for additional unstructured message information.',
+    description:
+      'Optional dictionary for additional unstructured message information.',
   })
   metadata?: Record<string, unknown>;
 
@@ -55,4 +56,3 @@ export class CreateMessageDto implements Partial<Message> {
   @IsString()
   type?: string;
 }
-
diff --git a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
index e3998d0fd..19cca03c1 100644
--- a/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/models/create-model.dto.ts
@@ -12,7 +12,10 @@ import { ApiProperty, getSchemaPath } from '@nestjs/swagger';
 
 export class CreateModelDto implements Partial<Model> {
   // Cortex Meta
-  @ApiProperty({ description: 'The unique identifier of the model.', example: 'mistral' })
+  @ApiProperty({
+    description: 'The unique identifier of the model.',
+    example: 'mistral',
+  })
   @IsString()
   model: string;
 
@@ -21,7 +24,8 @@ export class CreateModelDto implements Partial<Model> {
   name?: string;
 
   @ApiProperty({
-    description: 'The URL sources from which the model downloaded or accessed.', example: ['https://huggingface.co/cortexso/mistral/tree/gguf'],
+    description: 'The URL sources from which the model downloaded or accessed.',
+    example: ['https://huggingface.co/cortexso/mistral/tree/gguf'],
     oneOf: [
       { type: 'array', items: { type: 'string' } },
       { $ref: getSchemaPath(ModelArtifactDto) },
@@ -33,8 +37,9 @@ export class CreateModelDto implements Partial<Model> {
   // Model Input / Output Syntax
   @ApiProperty({
     description:
-      "A predefined text or framework that guides the AI model's response generation.", example: `
-      You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.` 
+      "A predefined text or framework that guides the AI model's response generation.",
+    example: `
+      You are an expert in {subject}. Provide a detailed and thorough explanation on the topic of {topic}.`,
   })
   @IsOptional()
   @IsString()
@@ -42,7 +47,8 @@ export class CreateModelDto implements Partial<Model> {
 
   @ApiProperty({
     description:
-      'Defines specific tokens or phrases that signal the model to stop producing further output.', example: ['End']
+      'Defines specific tokens or phrases that signal the model to stop producing further output.',
+    example: ['End'],
   })
   @IsOptional()
   @IsArray()
@@ -59,14 +65,16 @@ export class CreateModelDto implements Partial<Model> {
   max_tokens?: number;
 
   @ApiProperty({
-    description: 'Sets probability threshold for more relevant outputs.', example: 0.9
+    description: 'Sets probability threshold for more relevant outputs.',
+    example: 0.9,
   })
   @IsOptional()
   @IsNumber()
   top_p?: number;
 
   @ApiProperty({
-    description: "Influences the randomness of the model's output.", example: 0.7
+    description: "Influences the randomness of the model's output.",
+    example: 0.7,
   })
   @IsOptional()
   @IsNumber()
@@ -74,7 +82,8 @@ export class CreateModelDto implements Partial<Model> {
 
   @ApiProperty({
     description:
-      'Modifies the likelihood of the model repeating the same words or phrases within a single output.', example: 0.5
+      'Modifies the likelihood of the model repeating the same words or phrases within a single output.',
+    example: 0.5,
   })
   @IsOptional()
   @IsNumber()
@@ -82,7 +91,8 @@ export class CreateModelDto implements Partial<Model> {
 
   @ApiProperty({
     description:
-      'Reduces the likelihood of repeating tokens, promoting novelty in the output.', example: 0.6
+      'Reduces the likelihood of repeating tokens, promoting novelty in the output.',
+    example: 0.6,
   })
   @IsOptional()
   @IsNumber()
@@ -90,7 +100,8 @@ export class CreateModelDto implements Partial<Model> {
 
   @ApiProperty({
     description:
-      'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.', example: true
+      'Determines the format for output generation. If set to `true`, the output is generated continuously, allowing for real-time streaming of responses. If set to `false`, the output is delivered in a single JSON file.',
+    example: true,
   })
   @IsOptional()
   @IsBoolean()
diff --git a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts
index 8bf2d4204..fbca1138c 100644
--- a/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts
+++ b/cortex-js/src/infrastructure/dtos/threads/create-thread-assistant.dto.ts
@@ -17,7 +17,7 @@ export class CreateThreadAssistantDto implements Assistant {
 
   @ApiProperty({
     example: 'https://example.com/avatar.png',
-    description: 'URL of the assistant\'s avatar image.',
+    description: "URL of the assistant's avatar image.",
     type: 'string',
   })
   @IsOptional()
@@ -34,23 +34,32 @@ export class CreateThreadAssistantDto implements Assistant {
 
   @ApiProperty({
     example: 'mistral',
-    description: 'The model\'s unique identifier and settings.',
+    description: "The model's unique identifier and settings.",
     type: 'string',
   })
   @IsString()
   model: string;
 
   @ApiProperty({
-    example: 'Assist with customer queries and provide information based on the company database.',
-    description: 'The assistant\'s specific instructions.',
+    example:
+      'Assist with customer queries and provide information based on the company database.',
+    description: "The assistant's specific instructions.",
     type: 'string',
   })
   @IsString()
   instructions: string;
 
   @ApiProperty({
-    example: [{ name: 'Knowledge Retrieval', settings: { source: 'internal', endpoint: 'https://api.example.com/knowledge' } }],
-    description: 'The thread\'s tool(Knowledge Retrieval) configurations.',
+    example: [
+      {
+        name: 'Knowledge Retrieval',
+        settings: {
+          source: 'internal',
+          endpoint: 'https://api.example.com/knowledge',
+        },
+      },
+    ],
+    description: "The thread's tool(Knowledge Retrieval) configurations.",
     type: 'array',
   })
   @IsOptional()
@@ -58,7 +67,8 @@ export class CreateThreadAssistantDto implements Assistant {
   tools: any;
 
   @ApiProperty({
-    example: 'This assistant helps with customer support by retrieving relevant information.',
+    example:
+      'This assistant helps with customer support by retrieving relevant information.',
     description: 'The description of the assistant.',
     type: 'string',
   })
@@ -121,4 +131,4 @@ export class CreateThreadAssistantDto implements Assistant {
   })
   @IsOptional()
   tool_resources?: AssistantToolResources;
-}
\ No newline at end of file
+}
diff --git a/cortex-js/src/infrastructure/entities/assistant.entity.ts b/cortex-js/src/infrastructure/entities/assistant.entity.ts
index f9ef78435..132a14ad9 100644
--- a/cortex-js/src/infrastructure/entities/assistant.entity.ts
+++ b/cortex-js/src/infrastructure/entities/assistant.entity.ts
@@ -1,11 +1,17 @@
-import { Table, Column, Model, PrimaryKey, DataType } from 'sequelize-typescript';
+import {
+  Table,
+  Column,
+  Model,
+  PrimaryKey,
+  DataType,
+} from 'sequelize-typescript';
 import { Assistant } from '@/domain/models/assistant.interface';
 import type {
   AssistantToolResources,
   AssistantResponseFormatOption,
 } from '@/domain/models/assistant.interface';
 
-@Table({ tableName: 'assistants', timestamps: false})
+@Table({ tableName: 'assistants', timestamps: false })
 export class AssistantEntity extends Model implements Assistant {
   @PrimaryKey
   @Column({
diff --git a/cortex-js/src/infrastructure/entities/message.entity.ts b/cortex-js/src/infrastructure/entities/message.entity.ts
index 1fe6192a5..c40beaecc 100644
--- a/cortex-js/src/infrastructure/entities/message.entity.ts
+++ b/cortex-js/src/infrastructure/entities/message.entity.ts
@@ -1,4 +1,10 @@
-import { Table, Column, Model, PrimaryKey, DataType } from 'sequelize-typescript';
+import {
+  Table,
+  Column,
+  Model,
+  PrimaryKey,
+  DataType,
+} from 'sequelize-typescript';
 import type {
   Message,
   MessageContent,
@@ -6,7 +12,7 @@ import type {
   MessageAttachment,
 } from '@/domain/models/message.interface';
 
-@Table({ tableName: 'messages', timestamps: false})
+@Table({ tableName: 'messages', timestamps: false })
 export class MessageEntity extends Model implements Message {
   @PrimaryKey
   @Column({
diff --git a/cortex-js/src/infrastructure/entities/thread.entity.ts b/cortex-js/src/infrastructure/entities/thread.entity.ts
index fabd11338..2f02e5ff8 100644
--- a/cortex-js/src/infrastructure/entities/thread.entity.ts
+++ b/cortex-js/src/infrastructure/entities/thread.entity.ts
@@ -1,8 +1,17 @@
-import { Table, Column, Model, PrimaryKey, DataType } from 'sequelize-typescript';
-import type { Thread, ThreadToolResources } from '@/domain/models/thread.interface';
+import {
+  Table,
+  Column,
+  Model,
+  PrimaryKey,
+  DataType,
+} from 'sequelize-typescript';
+import type {
+  Thread,
+  ThreadToolResources,
+} from '@/domain/models/thread.interface';
 import { AssistantEntity } from './assistant.entity';
 
-@Table({ tableName: 'threads', timestamps: false})
+@Table({ tableName: 'threads', timestamps: false })
 export class ThreadEntity extends Model implements Thread {
   @PrimaryKey
   @Column({
diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
index 8f9a9ce67..0744876f3 100644
--- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
+++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts
@@ -30,8 +30,8 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
   }
   /**
    * Persist extension to the extensions map
-   * @param object 
-   * @returns 
+   * @param object
+   * @returns
    */
   create(object: Extension): Promise<Extension> {
     this.extensions.set(object.name ?? '', object);
@@ -40,7 +40,7 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
 
   /**
    * Find all extensions
-   * @returns 
+   * @returns
    */
   findAll(): Promise<Extension[]> {
     return Promise.resolve(Array.from(this.extensions.values()));
@@ -48,8 +48,8 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
 
   /**
    * Find one extension by id
-   * @param id 
-   * @returns 
+   * @param id
+   * @returns
    */
   findOne(id: string): Promise<Extension | null> {
     return Promise.resolve(this.extensions.get(id) ?? null);
@@ -65,8 +65,8 @@ export class ExtensionRepositoryImpl implements ExtensionRepository {
 
   /**
    * Remove extension from the extensions map
-   * @param id 
-   * @returns 
+   * @param id
+   * @returns
    */
   remove(id: string): Promise<void> {
     this.extensions.delete(id);
diff --git a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts
index cd4a3ebd0..9ed2887af 100644
--- a/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts
+++ b/cortex-js/src/infrastructure/services/download-manager/download-manager.service.ts
@@ -136,7 +136,7 @@ export class DownloadManagerService {
         this.httpService.get(url, {
           responseType: 'stream',
           signal: controller.signal,
-        })
+        }),
       );
 
       // check if response is success
@@ -169,16 +169,16 @@ export class DownloadManagerService {
       const resetTimeout = () => {
         if (timeoutId) clearTimeout(timeoutId);
         timeoutId = setTimeout(() => {
-          try{
-          this.handleError(
-            new Error('Download timeout'),
-            downloadId,
-            destination,
-          )
-        } finally {
-          bar.stop();
-          resolve();
-        }
+          try {
+            this.handleError(
+              new Error('Download timeout'),
+              downloadId,
+              destination,
+            );
+          } finally {
+            bar.stop();
+            resolve();
+          }
         }, timeout);
       };
 
@@ -265,29 +265,29 @@ export class DownloadManagerService {
   }
 
   private handleError(error: Error, downloadId: string, destination: string) {
-    console.log(this.allDownloadStates, downloadId, destination)
+    console.log(this.allDownloadStates, downloadId, destination);
     delete this.abortControllers[downloadId][destination];
-          const currentDownloadState = this.allDownloadStates.find(
-            (downloadState) => downloadState.id === downloadId,
-          );
-          if (!currentDownloadState) return;
+    const currentDownloadState = this.allDownloadStates.find(
+      (downloadState) => downloadState.id === downloadId,
+    );
+    if (!currentDownloadState) return;
 
-          const downloadItem = currentDownloadState?.children.find(
-            (downloadItem) => downloadItem.id === destination,
-          );
-          if (downloadItem) {
-            downloadItem.status = DownloadStatus.Error;
-            downloadItem.error = error.message;
-          }
+    const downloadItem = currentDownloadState?.children.find(
+      (downloadItem) => downloadItem.id === destination,
+    );
+    if (downloadItem) {
+      downloadItem.status = DownloadStatus.Error;
+      downloadItem.error = error.message;
+    }
 
-          currentDownloadState.status = DownloadStatus.Error;
-          currentDownloadState.error = error.message;
+    currentDownloadState.status = DownloadStatus.Error;
+    currentDownloadState.error = error.message;
 
-          // remove download state if all children is downloaded
-          this.allDownloadStates = this.allDownloadStates.filter(
-            (downloadState) => downloadState.id !== downloadId,
-          );
-          this.eventEmitter.emit('download.event', [currentDownloadState]);
-          this.eventEmitter.emit('download.event', this.allDownloadStates);
+    // remove download state if all children is downloaded
+    this.allDownloadStates = this.allDownloadStates.filter(
+      (downloadState) => downloadState.id !== downloadId,
+    );
+    this.eventEmitter.emit('download.event', [currentDownloadState]);
+    this.eventEmitter.emit('download.event', this.allDownloadStates);
   }
 }
diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts
index 12855ad3b..1a6784f49 100644
--- a/cortex-js/src/usecases/chat/chat.usecases.ts
+++ b/cortex-js/src/usecases/chat/chat.usecases.ts
@@ -52,13 +52,11 @@ export class ChatUsecases {
     }
     const payload = {
       ...createChatDto,
-      ...(model.engine && !isRemoteEngine(model.engine) && { engine: model.engine }),
+      ...(model.engine &&
+        !isRemoteEngine(model.engine) && { engine: model.engine }),
     };
     try {
-      return await engine.inference(
-        payload,
-        headers,
-      );
+      return await engine.inference(payload, headers);
     } catch (error) {
       await this.telemetryUseCases.createCrashReport(
         error,
diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts
index da05e2918..07a4c5aa9 100644
--- a/cortex-js/src/usecases/cortex/cortex.usecases.ts
+++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts
@@ -75,9 +75,9 @@ export class CortexUsecases implements BeforeApplicationShutdown {
 
     // Handle process exit
     this.cortexProcess.on('close', (code) => {
-      this.cortexProcess = undefined
+      this.cortexProcess = undefined;
       console.log(`child process exited with code ${code}`);
-    }); 
+    });
 
     // Await for the /healthz status ok
     return new Promise<CortexOperationSuccessfullyDto>((resolve, reject) => {
diff --git a/cortex-js/src/usecases/messages/messages.usecases.ts b/cortex-js/src/usecases/messages/messages.usecases.ts
index 13eaac7c4..2e4fab8d8 100644
--- a/cortex-js/src/usecases/messages/messages.usecases.ts
+++ b/cortex-js/src/usecases/messages/messages.usecases.ts
@@ -10,7 +10,7 @@ import { Repository } from 'sequelize-typescript';
 export class MessagesUsecases {
   constructor(
     @Inject('MESSAGE_REPOSITORY')
-    private messageRepository:  Repository<MessageEntity>,
+    private messageRepository: Repository<MessageEntity>,
   ) {}
 
   async create(createMessageDto: CreateMessageDto) {
@@ -44,10 +44,11 @@ export class MessagesUsecases {
   }
 
   async update(id: string, updateMessageDto: UpdateMessageDto) {
-    const [numberOfAffectedRows, [updatedMessage]] = await this.messageRepository.update(updateMessageDto, {
-      where: { id },
-      returning: true,
-    });
+    const [numberOfAffectedRows, [updatedMessage]] =
+      await this.messageRepository.update(updateMessageDto, {
+        where: { id },
+        returning: true,
+      });
     return { numberOfAffectedRows, updatedMessage };
   }
 
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts
index 1f67f35f6..ed8064e0c 100644
--- a/cortex-js/src/usecases/models/models.usecases.ts
+++ b/cortex-js/src/usecases/models/models.usecases.ts
@@ -4,7 +4,7 @@ import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
 import { BadRequestException, Injectable } from '@nestjs/common';
 import { Model, ModelSettingParams } from '@/domain/models/model.interface';
 import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
-import { basename, join } from 'path';
+import { basename, join, parse } from 'path';
 import { promises, existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
 import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
 import { ExtensionRepository } from '@/domain/repositories/extension.interface';
@@ -17,7 +17,6 @@ import { TelemetrySource } from '@/domain/telemetry/telemetry.interface';
 import { ModelRepository } from '@/domain/repositories/model.interface';
 import { ModelParameterParser } from '@/utils/model-parameter.parser';
 import {
-  HuggingFaceModelVersion,
   HuggingFaceRepoData,
   HuggingFaceRepoSibling,
 } from '@/domain/models/huggingface.interface';
@@ -26,7 +25,10 @@ import {
   fetchJanRepoData,
   getHFModelMetadata,
 } from '@/utils/huggingface';
-import { DownloadType } from '@/domain/models/download.interface';
+import {
+  DownloadStatus,
+  DownloadType,
+} from '@/domain/models/download.interface';
 import { EventEmitter2 } from '@nestjs/event-emitter';
 import { ModelEvent, ModelId, ModelStatus } from '@/domain/models/model.event';
 import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service';
@@ -35,6 +37,7 @@ import { Engines } from '@/infrastructure/commanders/types/engine.interface';
 import { load } from 'js-yaml';
 import { llamaModelFile } from '@/utils/app-path';
 import { CortexUsecases } from '../cortex/cortex.usecases';
+import { isLocalFile } from '@/utils/urls';
 
 @Injectable()
 export class ModelsUsecases {
@@ -127,7 +130,9 @@ export class ModelsUsecases {
     )) as EngineExtension | undefined;
 
     if (engine) {
-      await engine.unloadModel(id, model.engine || Engines.llamaCPP).catch(() => {}); // Silent fail
+      await engine
+        .unloadModel(id, model.engine || Engines.llamaCPP)
+        .catch(() => {}); // Silent fail
     }
     return this.modelRepository
       .remove(id)
@@ -174,7 +179,7 @@ export class ModelsUsecases {
     }
 
     // Attempt to start cortex
-    await this.cortexUsecases.startCortex()
+    await this.cortexUsecases.startCortex();
 
     const loadingModelSpinner = ora('Loading model...').start();
     // update states and emitting event
@@ -341,10 +346,26 @@ export class ModelsUsecases {
   ) {
     const modelId = persistedModelId ?? originModelId;
     const existingModel = await this.findOne(modelId);
+
     if (isLocalModel(existingModel?.files)) {
       throw new BadRequestException('Model already exists');
     }
 
+    // Pull a local model file
+    if (isLocalFile(originModelId)) {
+      await this.populateHuggingFaceModel(originModelId, persistedModelId);
+      this.eventEmitter.emit('download.event', [
+        {
+          id: modelId,
+          type: DownloadType.Model,
+          status: DownloadStatus.Downloaded,
+          progress: 100,
+          children: [],
+        },
+      ]);
+      return;
+    }
+
     const modelsContainerDir = await this.fileManagerService.getModelsPath();
 
     if (!existsSync(modelsContainerDir)) {
@@ -422,22 +443,18 @@ export class ModelsUsecases {
           model.model = modelId;
           if (!(await this.findOne(modelId))) await this.create(model);
         } else {
-          await this.populateHuggingFaceModel(modelId, files[0]);
-          const model = await this.findOne(modelId);
-          if (model) {
-            const fileUrl = join(
-              await this.fileManagerService.getModelsPath(),
-              normalizeModelId(modelId),
-              basename(
-                files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ??
-                  files[0].rfilename,
-              ),
-            );
-            await this.update(modelId, {
-              files: [fileUrl],
-              name: modelId.replace(':main', ''),
-            });
-          }
+          const fileUrl = join(
+            await this.fileManagerService.getModelsPath(),
+            normalizeModelId(modelId),
+            basename(
+              files.find((e) => e.rfilename.endsWith('.gguf'))?.rfilename ??
+                files[0].rfilename,
+            ),
+          );
+          await this.populateHuggingFaceModel(
+            fileUrl,
+            modelId.replace(':main', ''),
+          );
         }
         uploadModelMetadataSpiner.succeed('Model metadata updated');
         const modelEvent: ModelEvent = {
@@ -458,21 +475,18 @@ export class ModelsUsecases {
    * It could be a model from Jan's repo or other authors
    * @param modelId HuggingFace model id. e.g. "janhq/llama-3 or llama3:7b"
    */
-  async populateHuggingFaceModel(
-    modelId: string,
-    modelVersion: HuggingFaceModelVersion,
-  ) {
-    if (!modelVersion) throw 'No expected quantization found';
-
-    const tokenizer = await getHFModelMetadata(modelVersion.downloadUrl!);
+  async populateHuggingFaceModel(ggufUrl: string, overridenId?: string) {
+    const metadata = await getHFModelMetadata(ggufUrl);
 
-    const stopWords: string[] = tokenizer?.stopWord ? [tokenizer.stopWord] : [];
+    const stopWords: string[] = metadata?.stopWord ? [metadata.stopWord] : [];
 
+    const modelId =
+      overridenId ?? (isLocalFile(ggufUrl) ? parse(ggufUrl).name : ggufUrl);
     const model: CreateModelDto = {
-      files: [modelVersion.downloadUrl ?? ''],
+      files: [ggufUrl],
       model: modelId,
-      name: modelId,
-      prompt_template: tokenizer?.promptTemplate,
+      name: metadata?.name ?? modelId,
+      prompt_template: metadata?.promptTemplate,
       stop: stopWords,
 
       // Default Inference Params
diff --git a/cortex-js/src/usecases/threads/threads.usecases.ts b/cortex-js/src/usecases/threads/threads.usecases.ts
index 0ffcfa522..7791b955e 100644
--- a/cortex-js/src/usecases/threads/threads.usecases.ts
+++ b/cortex-js/src/usecases/threads/threads.usecases.ts
@@ -21,7 +21,7 @@ export class ThreadsUsecases {
     @Inject('THREAD_REPOSITORY')
     private threadRepository: Repository<ThreadEntity>,
     @Inject('MESSAGE_REPOSITORY')
-    private messageRepository:  Repository<MessageEntity>,
+    private messageRepository: Repository<MessageEntity>,
   ) {}
 
   async create(createThreadDto: CreateThreadDto): Promise<Thread> {
@@ -131,7 +131,9 @@ export class ThreadsUsecases {
     updateMessageDto: UpdateMessageDto,
   ) {
     await this.getThreadOrThrow(threadId);
-    await this.messageRepository.update(updateMessageDto, { where: { id: messageId } });
+    await this.messageRepository.update(updateMessageDto, {
+      where: { id: messageId },
+    });
     return this.messageRepository.findOne({ where: { id: messageId } });
   }
 
@@ -183,7 +185,10 @@ export class ThreadsUsecases {
     await this.threadRepository.destroy({ where: { id } });
   }
 
-  async deleteMessage(_threadId: string, messageId: string): Promise<DeleteMessageDto> {
+  async deleteMessage(
+    _threadId: string,
+    messageId: string,
+  ): Promise<DeleteMessageDto> {
     await this.getMessageOrThrow(messageId);
     await this.messageRepository.destroy({ where: { id: messageId } });
 
@@ -195,7 +200,7 @@ export class ThreadsUsecases {
   }
 
   async retrieveMessage(_threadId: string, messageId: string) {
-     // we still allow user to delete message even if the thread is not there
+    // we still allow user to delete message even if the thread is not there
     return this.getMessageOrThrow(messageId);
   }
 
diff --git a/cortex-js/src/utils/app-path.ts b/cortex-js/src/utils/app-path.ts
index 08124fa46..d4fb0b1e0 100644
--- a/cortex-js/src/utils/app-path.ts
+++ b/cortex-js/src/utils/app-path.ts
@@ -35,9 +35,9 @@ export const llamaModelFile = (
   }
   const path =
     'llama_model_path' in artifact
-      ? (artifact as ModelArtifact).llama_model_path ?? ''
+      ? ((artifact as ModelArtifact).llama_model_path ?? '')
       : 'model_path' in files
-        ? (artifact as ModelArtifact).model_path ?? ''
+        ? ((artifact as ModelArtifact).model_path ?? '')
         : (artifact as string[])[0];
   return basename(path);
 };
diff --git a/cortex-js/src/utils/cuda.ts b/cortex-js/src/utils/cuda.ts
index c533414f9..679f61842 100644
--- a/cortex-js/src/utils/cuda.ts
+++ b/cortex-js/src/utils/cuda.ts
@@ -15,7 +15,6 @@ export type GpuSettingInfo = {
  * @returns CUDA Version 11 | 12
  */
 export const cudaVersion = async () => {
- 
   let filesCuda12: string[];
   let filesCuda11: string[];
   let paths: string[];
@@ -77,24 +76,27 @@ export const getCudaVersion = (): Promise<string> => {
     // Execute the nvidia-smi command
     exec('nvidia-smi', (error, stdout) => {
       if (!error) {
-        const cudaVersionLine = stdout.split('\n').find(line => line.includes('CUDA Version'));
-    
+        const cudaVersionLine = stdout
+          .split('\n')
+          .find((line) => line.includes('CUDA Version'));
+
         if (cudaVersionLine) {
-            // Extract the CUDA version number
-            const cudaVersionMatch = cudaVersionLine.match(/CUDA Version:\s+(\d+\.\d+)/);
-            if (cudaVersionMatch) {
-                const cudaVersion = cudaVersionMatch[1];
-                resolve(cudaVersion);
-            } else {
-                reject('CUDA Version not found.');
-            }
-        } else {
+          // Extract the CUDA version number
+          const cudaVersionMatch = cudaVersionLine.match(
+            /CUDA Version:\s+(\d+\.\d+)/,
+          );
+          if (cudaVersionMatch) {
+            const cudaVersion = cudaVersionMatch[1];
+            resolve(cudaVersion);
+          } else {
             reject('CUDA Version not found.');
+          }
+        } else {
+          reject('CUDA Version not found.');
         }
       } else {
         reject(error);
       }
-
     });
   });
 };
diff --git a/cortex-js/src/utils/download-progress.ts b/cortex-js/src/utils/download-progress.ts
index dd803f84f..68eafcd09 100644
--- a/cortex-js/src/utils/download-progress.ts
+++ b/cortex-js/src/utils/download-progress.ts
@@ -1,53 +1,69 @@
-import { Presets, SingleBar } from "cli-progress";
-import { Cortex } from "@cortexso/cortex.js";
+import { Presets, SingleBar } from 'cli-progress';
+import { Cortex } from '@cortexso/cortex.js';
 import { exit, stdin, stdout } from 'node:process';
-import { DownloadState, DownloadStatus, DownloadType } from "@/domain/models/download.interface";
-
-export const downloadProgress = async (cortex: Cortex, downloadId?: string, downloadType?: DownloadType) => {
-    const response = await cortex.events.downloadEvent();
-
-    const rl = require('readline').createInterface({
-      input: stdin,
-      output: stdout,
-    });
-
-    rl.on('SIGINT', () => {
-      console.log('\nStopping download...');
-      process.emit('SIGINT');
-    });
-    process.on('SIGINT', async () => {
-      if (downloadId){
-        await cortex.models.abortDownload(downloadId);
-      }
-      exit(1);
-    });
-
-    const progressBar = new SingleBar({}, Presets.shades_classic);
-    progressBar.start(100, 0);
-
-    for await (const stream of response) {
-      if (stream.length) {
-        const data = (stream.find((data: any) => data.id === downloadId || !downloadId) as DownloadState | undefined);
-        if (!data) continue;
-        if (downloadType && data.type !== downloadType) continue;
-
-        if (data.status === DownloadStatus.Downloaded) break;
-        if(data.status === DownloadStatus.Error) {
-          rl.close();
-          progressBar.stop();
-          console.log('\n Download failed: ', data.error);
-          exit(1);
-        }
-
-        let totalBytes = 0;
-        let totalTransferred = 0;
-        data.children.forEach((child: any) => {
-          totalBytes += child.size.total;
-          totalTransferred += child.size.transferred;
-        });
-        progressBar.update(Math.floor((totalTransferred / (totalBytes || 1)) * 100));
+import {
+  DownloadState,
+  DownloadStatus,
+  DownloadType,
+} from '@/domain/models/download.interface';
+import { isLocalFile } from './urls';
+
+export const downloadProgress = async (
+  cortex: Cortex,
+  downloadId?: string,
+  downloadType?: DownloadType,
+) => {
+  // Do not update on local file symlink
+  if (downloadId && isLocalFile(downloadId)) return;
+
+  const response = await cortex.events.downloadEvent();
+
+  const rl = require('readline').createInterface({
+    input: stdin,
+    output: stdout,
+  });
+
+  rl.on('SIGINT', () => {
+    console.log('\nStopping download...');
+    process.emit('SIGINT');
+  });
+  process.on('SIGINT', async () => {
+    if (downloadId) {
+      await cortex.models.abortDownload(downloadId);
+    }
+    exit(1);
+  });
+
+  const progressBar = new SingleBar({}, Presets.shades_classic);
+  progressBar.start(100, 0);
+
+  for await (const stream of response) {
+    if (stream.length) {
+      const data = stream.find(
+        (data: any) => data.id === downloadId || !downloadId,
+      ) as DownloadState | undefined;
+      if (!data) continue;
+      if (downloadType && data.type !== downloadType) continue;
+
+      if (data.status === DownloadStatus.Downloaded) break;
+      if (data.status === DownloadStatus.Error) {
+        rl.close();
+        progressBar.stop();
+        console.log('\n Download failed: ', data.error);
+        exit(1);
       }
+
+      let totalBytes = 0;
+      let totalTransferred = 0;
+      data.children.forEach((child: any) => {
+        totalBytes += child.size.total;
+        totalTransferred += child.size.transferred;
+      });
+      progressBar.update(
+        Math.floor((totalTransferred / (totalBytes || 1)) * 100),
+      );
     }
-    progressBar.stop();
-    rl.close();
-};
\ No newline at end of file
+  }
+  progressBar.stop();
+  rl.close();
+};
diff --git a/cortex-js/src/utils/huggingface.ts b/cortex-js/src/utils/huggingface.ts
index 85d1afe94..90383436b 100644
--- a/cortex-js/src/utils/huggingface.ts
+++ b/cortex-js/src/utils/huggingface.ts
@@ -21,9 +21,9 @@ import {
   ZEPHYR,
   ZEPHYR_JINJA,
 } from '@/infrastructure/constants/prompt-constants';
-import { gguf } from '@huggingface/gguf';
 import axios from 'axios';
 import { parseModelHubEngineBranch } from './normalize-model-id';
+import { closeSync, openSync, readSync } from 'fs';
 
 // TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider?
 export function guessPromptTemplateFromHuggingFace(jinjaCode?: string): string {
@@ -209,20 +209,29 @@ export async function getHFModelMetadata(
   ggufUrl: string,
 ): Promise<ModelMetadata | undefined> {
   try {
-    const { metadata } = await gguf(ggufUrl);
-    // @ts-expect-error "tokenizer.ggml.eos_token_id"
+    let metadata: any;
+    const { ggufMetadata } = await import('hyllama');
+    // Read first 10mb of gguf file
+    const fd = openSync(ggufUrl, 'r');
+    const buffer = new Uint8Array(10_000_000);
+    readSync(fd, buffer, 0, 10_000_000, 0);
+    closeSync(fd);
+
+    // Parse metadata and tensor info
+    ({ metadata } = ggufMetadata(buffer.buffer));
+
     const index = metadata['tokenizer.ggml.eos_token_id'];
-    // @ts-expect-error "tokenizer.ggml.eos_token_id"
     const hfChatTemplate = metadata['tokenizer.chat_template'];
     const promptTemplate = guessPromptTemplateFromHuggingFace(hfChatTemplate);
-    // @ts-expect-error "tokenizer.ggml.tokens"
     const stopWord: string = metadata['tokenizer.ggml.tokens'][index] ?? '';
+    const name = metadata['general.name'];
 
     const version: number = metadata['version'];
     return {
       stopWord,
       promptTemplate,
       version,
+      name,
     };
   } catch (err) {
     console.log('Failed to get model metadata:', err.message);
diff --git a/cortex-js/src/utils/init.ts b/cortex-js/src/utils/init.ts
index 9eb27346e..022c3063e 100644
--- a/cortex-js/src/utils/init.ts
+++ b/cortex-js/src/utils/init.ts
@@ -7,7 +7,7 @@ import { checkNvidiaGPUExist } from './cuda';
  * @returns
  */
 export const defaultInstallationOptions = async (): Promise<InitOptions> => {
-  let options: InitOptions = {};
+  const options: InitOptions = {};
 
   // Skip check if darwin
   if (process.platform === 'darwin') {
diff --git a/cortex-js/src/utils/model-check.ts b/cortex-js/src/utils/model-check.ts
index 53ef84d47..78ca84b60 100644
--- a/cortex-js/src/utils/model-check.ts
+++ b/cortex-js/src/utils/model-check.ts
@@ -1,8 +1,11 @@
-import { MIN_CUDA_VERSION } from "@/infrastructure/constants/cortex";
-import { getCudaVersion } from "./cuda";
-import ora from "ora";
+import { MIN_CUDA_VERSION } from '@/infrastructure/constants/cortex';
+import { getCudaVersion } from './cuda';
+import ora from 'ora';
 
-export const checkModelCompatibility = async (modelId: string, spinner?: ora.Ora) => {  
+export const checkModelCompatibility = async (
+  modelId: string,
+  spinner?: ora.Ora,
+) => {
   function log(message: string) {
     if (spinner) {
       spinner.fail(message);
@@ -15,8 +18,8 @@ export const checkModelCompatibility = async (modelId: string, spinner?: ora.Ora
     process.exit(1);
   }
 
-  if (modelId.includes('tensorrt-llm') ) {
-    if(process.platform === 'darwin'){
+  if (modelId.includes('tensorrt-llm')) {
+    if (process.platform === 'darwin') {
       log('Tensorrt-LLM models are not supported on this OS');
       process.exit(1);
     }
@@ -24,17 +27,21 @@ export const checkModelCompatibility = async (modelId: string, spinner?: ora.Ora
     try {
       const version = await getCudaVersion();
       const [currentMajor, currentMinor] = version.split('.').map(Number);
-      const [requiredMajor, requiredMinor] = MIN_CUDA_VERSION.split('.').map(Number);
-      const isMatchRequired = currentMajor > requiredMajor || (currentMajor === requiredMajor && currentMinor >= requiredMinor);
+      const [requiredMajor, requiredMinor] =
+        MIN_CUDA_VERSION.split('.').map(Number);
+      const isMatchRequired =
+        currentMajor > requiredMajor ||
+        (currentMajor === requiredMajor && currentMinor >= requiredMinor);
       if (!isMatchRequired) {
-        log(`CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`)
+        log(
+          `CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`,
+        );
         process.exit(1);
       }
-      } catch (e) {
-        console.error(e.message ?? e);
-        log(e.message ?? e);
-        process.exit(1);
-      }
-    
+    } catch (e) {
+      console.error(e.message ?? e);
+      log(e.message ?? e);
+      process.exit(1);
+    }
   }
 };
diff --git a/cortex-js/src/utils/urls.ts b/cortex-js/src/utils/urls.ts
index edadabf79..6559ef804 100644
--- a/cortex-js/src/utils/urls.ts
+++ b/cortex-js/src/utils/urls.ts
@@ -1,3 +1,5 @@
+import { isAbsolute } from 'path';
+
 /**
  * Check if a string is a valid URL.
  * @param input - The string to check.
@@ -12,3 +14,12 @@ export function isValidUrl(input: string | undefined): boolean {
     return false;
   }
 }
+
+/**
+ * Check if the URL is a lcoal file path
+ * @param modelFiles
+ * @returns
+ */
+export const isLocalFile = (path: string): boolean => {
+  return !/^(http|https):\/\/[^/]+\/.*/.test(path) && isAbsolute(path);
+};
diff --git a/cortex-js/tsconfig.json b/cortex-js/tsconfig.json
index d6395629e..44f33f788 100644
--- a/cortex-js/tsconfig.json
+++ b/cortex-js/tsconfig.json
@@ -1,7 +1,7 @@
 {
   "compilerOptions": {
-    "module": "commonjs",
-    "moduleResolution": "node",
+    "module": "node16",
+    "moduleResolution": "node16",
     "declaration": true,
     "removeComments": true,
     "emitDecoratorMetadata": true,