feat: User Selectable GPUs and GPU-based Model Recommendations (#1730)

janhq · Feb 6, 2024 · 4471b2c · 4471b2c
1 parent 29a7fb8
commit 4471b2c
Show file tree

Hide file tree

Showing 10 changed files with 196 additions and 58 deletions.
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts
@@ -25,12 +25,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
     if (nvidiaInfo["run_mode"] === "cpu") {
       binaryFolder = path.join(binaryFolder, "win-cpu");
     } else {
-      if (nvidiaInfo["cuda"].version === "12") {
-        binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
-      } else {
+      if (nvidiaInfo["cuda"].version === "11") {
         binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
+      } else {
+        binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
       }
-      cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
+      cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
     }
     binaryName = "nitro.exe";
   } else if (process.platform === "darwin") {
@@ -50,12 +50,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
     if (nvidiaInfo["run_mode"] === "cpu") {
       binaryFolder = path.join(binaryFolder, "linux-cpu");
     } else {
-      if (nvidiaInfo["cuda"].version === "12") {
-        binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
-      } else {
+      if (nvidiaInfo["cuda"].version === "11") {
         binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
+      } else {
+        binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
       }
-      cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
+      cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
     }
   }
   return {

diff --git a/extensions/inference-nitro-extension/src/node/nvidia.ts b/extensions/inference-nitro-extension/src/node/nvidia.ts
@@ -19,6 +19,8 @@ const DEFALT_SETTINGS = {
   },
   gpus: [],
   gpu_highest_vram: "",
+  gpus_in_use: [],
+  is_initial: true,
 };
 
 /**
@@ -48,11 +50,15 @@ export interface NitroProcessInfo {
  */
 export async function updateNvidiaInfo() {
   if (process.platform !== "darwin") {
-    await Promise.all([
-      updateNvidiaDriverInfo(),
-      updateCudaExistence(),
-      updateGpuInfo(),
-    ]);
+    let data;
+    try {
+      data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
+    } catch (error) {
+      data = DEFALT_SETTINGS;
+      writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
+    }
+    updateNvidiaDriverInfo();
+    updateGpuInfo();
   }
 }
 
@@ -73,12 +79,7 @@ export async function updateNvidiaDriverInfo(): Promise<void> {
   exec(
     "nvidia-smi --query-gpu=driver_version --format=csv,noheader",
     (error, stdout) => {
-      let data;
-      try {
-        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-      } catch (error) {
-        data = DEFALT_SETTINGS;
-      }
+      let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
 
       if (!error) {
         const firstLine = stdout.split("\n")[0].trim();
@@ -107,7 +108,7 @@ export function checkFileExistenceInPaths(
 /**
  * Validate cuda for linux and windows
  */
-export function updateCudaExistence() {
+export function updateCudaExistence(data: Record<string, any> = DEFALT_SETTINGS): Record<string, any> {
   let filesCuda12: string[];
   let filesCuda11: string[];
   let paths: string[];
@@ -141,34 +142,24 @@ export function updateCudaExistence() {
     cudaVersion = "12";
   }
 
-  let data;
-  try {
-    data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-  } catch (error) {
-    data = DEFALT_SETTINGS;
-  }
-
   data["cuda"].exist = cudaExists;
   data["cuda"].version = cudaVersion;
-  if (cudaExists) {
+  console.log(data["is_initial"], data["gpus_in_use"]);
+  if (cudaExists && data["is_initial"] && data["gpus_in_use"].length > 0) {
     data.run_mode = "gpu";
   }
-  writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
+  data.is_initial = false;
+  return data;
 }
 
 /**
  * Get GPU information
  */
 export async function updateGpuInfo(): Promise<void> {
   exec(
-    "nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
+    "nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits",
     (error, stdout) => {
-      let data;
-      try {
-        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-      } catch (error) {
-        data = DEFALT_SETTINGS;
-      }
+      let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
 
       if (!error) {
         // Get GPU info and gpu has higher memory first
@@ -178,21 +169,27 @@ export async function updateGpuInfo(): Promise<void> {
           .trim()
           .split("\n")
           .map((line) => {
-            let [id, vram] = line.split(", ");
+            let [id, vram, name] = line.split(", ");
             vram = vram.replace(/\r/g, "");
             if (parseFloat(vram) > highestVram) {
               highestVram = parseFloat(vram);
               highestVramId = id;
             }
-            return { id, vram };
+            return { id, vram, name };
           });
 
-        data["gpus"] = gpus;
-        data["gpu_highest_vram"] = highestVramId;
+        data.gpus = gpus;
+        data.gpu_highest_vram = highestVramId;
       } else {
-        data["gpus"] = [];
+        data.gpus = [];
+        data.gpu_highest_vram = "";
+      }
+
+      if (!data["gpus_in_use"] || data["gpus_in_use"].length === 0) {
+        data.gpus_in_use = [data["gpu_highest_vram"]];
       }
 
+      data = updateCudaExistence(data);
       writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
       Promise.resolve();
     }

diff --git a/extensions/monitoring-extension/package.json b/extensions/monitoring-extension/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@janhq/monitoring-extension",
-  "version": "1.0.9",
+  "version": "1.0.10",
   "description": "This extension provides system health and OS level data",
   "main": "dist/index.js",
   "module": "dist/module.js",
@@ -26,6 +26,7 @@
     "README.md"
   ],
   "bundleDependencies": [
-    "node-os-utils"
+    "node-os-utils",
+    "@janhq/core"
   ]
 }
diff --git a/extensions/monitoring-extension/src/module.ts b/extensions/monitoring-extension/src/module.ts
@@ -1,4 +1,14 @@
 const nodeOsUtils = require("node-os-utils");
+const getJanDataFolderPath = require("@janhq/core/node").getJanDataFolderPath;
+const path = require("path");
+const { readFileSync } = require("fs");
+const exec = require("child_process").exec;
+
+const NVIDIA_INFO_FILE = path.join(
+  getJanDataFolderPath(),
+  "settings",
+  "settings.json"
+);
 
 const getResourcesInfo = () =>
   new Promise((resolve) => {
@@ -16,18 +26,48 @@ const getResourcesInfo = () =>
   });
 
 const getCurrentLoad = () =>
-  new Promise((resolve) => {
+  new Promise((resolve, reject) => {
     nodeOsUtils.cpu.usage().then((cpuPercentage) => {
-      const response = {
-        cpu: {
-          usage: cpuPercentage,
-        },
+      let data = {
+        run_mode: "cpu",
+        gpus_in_use: [],
       };
-      resolve(response);
+      if (process.platform !== "darwin") {
+        data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
+      }
+      if (data.run_mode === "gpu" && data.gpus_in_use.length > 0) {
+        const gpuIds = data["gpus_in_use"].join(",");
+        if (gpuIds !== "") {
+          exec(
+            `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
+            (error, stdout, stderr) => {
+              if (error) {
+                console.error(`exec error: ${error}`);
+                reject(error);
+                return;
+              }
+              const gpuInfo = stdout.trim().split("\n").map((line) => {
+                const [id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization] = line.split(", ").map(item => item.replace(/\r/g, ""));
+                return { id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization };
+              });
+              resolve({
+                cpu: { usage: cpuPercentage },
+                gpu: gpuInfo
+              });
+            }
+          );
+        } else {
+          // Handle the case where gpuIds is empty
+          resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
+        }
+      } else {
+        // Handle the case where run_mode is not 'gpu' or no GPUs are in use
+        resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
+      }
     });
   });
 
 module.exports = {
   getResourcesInfo,
   getCurrentLoad,
-};
+};
diff --git a/web/containers/Layout/BottomBar/index.tsx b/web/containers/Layout/BottomBar/index.tsx
@@ -48,7 +48,7 @@ const menuLinks = [
 
 const BottomBar = () => {
   const { activeModel, stateModel } = useActiveModel()
-  const { ram, cpu } = useGetSystemResources()
+  const { ram, cpu, gpus } = useGetSystemResources()
   const progress = useAtomValue(appDownloadProgress)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
 
@@ -57,6 +57,13 @@ const BottomBar = () => {
   const setShowSelectModelModal = useSetAtom(showSelectModelModalAtom)
   const [serverEnabled] = useAtom(serverEnabledAtom)
 
+  const calculateGpuMemoryUsage = (gpu: Record<string, never>) => {
+    const total = parseInt(gpu.memoryTotal)
+    const free = parseInt(gpu.memoryFree)
+    if (!total || !free) return 0
+    return Math.round(((total - free) / total) * 100)
+  }
+
   return (
     <div className="fixed bottom-0 left-16 z-20 flex h-12 w-[calc(100%-64px)] items-center justify-between border-t border-border bg-background/80 px-3">
       <div className="flex flex-shrink-0 items-center gap-x-2">
@@ -119,6 +126,17 @@ const BottomBar = () => {
           <SystemItem name="CPU:" value={`${cpu}%`} />
           <SystemItem name="Mem:" value={`${ram}%`} />
         </div>
+        {gpus.length > 0 && (
+          <div className="flex items-center gap-x-2">
+            {gpus.map((gpu, index) => (
+              <SystemItem
+                key={index}
+                name={`GPU ${gpu.id}:`}
+                value={`${gpu.utilization}% Util, ${calculateGpuMemoryUsage(gpu)}% Mem`}
+              />
+            ))}
+          </div>
+        )}
         {/* VERSION is defined by webpack, please see next.config.js */}
         <span className="text-xs text-muted-foreground">
           Jan v{VERSION ?? ''}

diff --git a/web/helpers/atoms/SystemBar.atom.ts b/web/helpers/atoms/SystemBar.atom.ts
@@ -5,3 +5,5 @@ export const usedRamAtom = atom<number>(0)
 export const availableRamAtom = atom<number>(0)
 
 export const cpuUsageAtom = atom<number>(0)
+
+export const nvidiaTotalVramAtom = atom<number>(0)
diff --git a/web/hooks/useGetSystemResources.ts b/web/hooks/useGetSystemResources.ts
@@ -10,15 +10,19 @@ import {
   cpuUsageAtom,
   totalRamAtom,
   usedRamAtom,
+  nvidiaTotalVramAtom,
 } from '@/helpers/atoms/SystemBar.atom'
 
 export default function useGetSystemResources() {
   const [ram, setRam] = useState<number>(0)
   const [cpu, setCPU] = useState<number>(0)
+
+  const [gpus, setGPUs] = useState<Record<string, never>[]>([])
   const setTotalRam = useSetAtom(totalRamAtom)
   const setUsedRam = useSetAtom(usedRamAtom)
   const setAvailableRam = useSetAtom(availableRamAtom)
   const setCpuUsage = useSetAtom(cpuUsageAtom)
+  const setTotalNvidiaVram = useSetAtom(nvidiaTotalVramAtom)
 
   const getSystemResources = async () => {
     if (
@@ -48,12 +52,25 @@ export default function useGetSystemResources() {
       )
     setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
     setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
+
+    const gpus = currentLoadInfor?.gpu ?? []
+    setGPUs(gpus)
+
+    let totalNvidiaVram = 0
+    if (gpus.length > 0) {
+      totalNvidiaVram = gpus.reduce(
+        (total: number, gpu: { memoryTotal: string }) =>
+          total + Number(gpu.memoryTotal),
+        0
+      )
+    }
+    setTotalNvidiaVram(totalNvidiaVram)
   }
 
   useEffect(() => {
     getSystemResources()
 
-    // Fetch interval - every 0.5s
+    // Fetch interval - every 2s
     // TODO: Will we really need this?
     // There is a possibility that this will be removed and replaced by the process event hook?
     const intervalId = setInterval(() => {
@@ -69,5 +86,6 @@ export default function useGetSystemResources() {
     totalRamAtom,
     ram,
     cpu,
+    gpus,
   }
 }
diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts
@@ -47,14 +47,17 @@ export const useSettings = () => {
   const saveSettings = async ({
     runMode,
     notify,
+    gpusInUse,
   }: {
     runMode?: string | undefined
     notify?: boolean | undefined
+    gpusInUse?: string[] | undefined
   }) => {
     const settingsFile = await joinPath(['file://settings', 'settings.json'])
     const settings = await readSettings()
     if (runMode != null) settings.run_mode = runMode
     if (notify != null) settings.notify = notify
+    if (gpusInUse != null) settings.gpus_in_use = gpusInUse
     await fs.writeFileSync(settingsFile, JSON.stringify(settings))
   }