Skip to content

Commit

Permalink
feat: User Selectable GPUs and GPU-based Model Recommendations (#1730)
Browse files Browse the repository at this point in the history
  • Loading branch information
hiento09 committed Feb 6, 2024
1 parent 29a7fb8 commit 4471b2c
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 58 deletions.
16 changes: 8 additions & 8 deletions extensions/inference-nitro-extension/src/node/execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "win-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
} else {
if (nvidiaInfo["cuda"].version === "11") {
binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
} else {
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
}
binaryName = "nitro.exe";
} else if (process.platform === "darwin") {
Expand All @@ -50,12 +50,12 @@ export const executableNitroFile = (): NitroExecutableOptions => {
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "linux-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
} else {
if (nvidiaInfo["cuda"].version === "11") {
binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
} else {
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
cudaVisibleDevices = nvidiaInfo["gpus_in_use"].join(",");
}
}
return {
Expand Down
63 changes: 30 additions & 33 deletions extensions/inference-nitro-extension/src/node/nvidia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ const DEFALT_SETTINGS = {
},
gpus: [],
gpu_highest_vram: "",
gpus_in_use: [],
is_initial: true,
};

/**
Expand Down Expand Up @@ -48,11 +50,15 @@ export interface NitroProcessInfo {
*/
export async function updateNvidiaInfo() {
if (process.platform !== "darwin") {
await Promise.all([
updateNvidiaDriverInfo(),
updateCudaExistence(),
updateGpuInfo(),
]);
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
}
updateNvidiaDriverInfo();
updateGpuInfo();
}
}

Expand All @@ -73,12 +79,7 @@ export async function updateNvidiaDriverInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=driver_version --format=csv,noheader",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));

if (!error) {
const firstLine = stdout.split("\n")[0].trim();
Expand Down Expand Up @@ -107,7 +108,7 @@ export function checkFileExistenceInPaths(
/**
* Validate cuda for linux and windows
*/
export function updateCudaExistence() {
export function updateCudaExistence(data: Record<string, any> = DEFALT_SETTINGS): Record<string, any> {
let filesCuda12: string[];
let filesCuda11: string[];
let paths: string[];
Expand Down Expand Up @@ -141,34 +142,24 @@ export function updateCudaExistence() {
cudaVersion = "12";
}

let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}

data["cuda"].exist = cudaExists;
data["cuda"].version = cudaVersion;
if (cudaExists) {
console.log(data["is_initial"], data["gpus_in_use"]);
if (cudaExists && data["is_initial"] && data["gpus_in_use"].length > 0) {
data.run_mode = "gpu";
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
data.is_initial = false;
return data;
}

/**
* Get GPU information
*/
export async function updateGpuInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
"nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
let data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));

if (!error) {
// Get GPU info and gpu has higher memory first
Expand All @@ -178,21 +169,27 @@ export async function updateGpuInfo(): Promise<void> {
.trim()
.split("\n")
.map((line) => {
let [id, vram] = line.split(", ");
let [id, vram, name] = line.split(", ");
vram = vram.replace(/\r/g, "");
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram);
highestVramId = id;
}
return { id, vram };
return { id, vram, name };
});

data["gpus"] = gpus;
data["gpu_highest_vram"] = highestVramId;
data.gpus = gpus;
data.gpu_highest_vram = highestVramId;
} else {
data["gpus"] = [];
data.gpus = [];
data.gpu_highest_vram = "";
}

if (!data["gpus_in_use"] || data["gpus_in_use"].length === 0) {
data.gpus_in_use = [data["gpu_highest_vram"]];
}

data = updateCudaExistence(data);
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
Promise.resolve();
}
Expand Down
5 changes: 3 additions & 2 deletions extensions/monitoring-extension/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@janhq/monitoring-extension",
"version": "1.0.9",
"version": "1.0.10",
"description": "This extension provides system health and OS level data",
"main": "dist/index.js",
"module": "dist/module.js",
Expand All @@ -26,6 +26,7 @@
"README.md"
],
"bundleDependencies": [
"node-os-utils"
"node-os-utils",
"@janhq/core"
]
}
54 changes: 47 additions & 7 deletions extensions/monitoring-extension/src/module.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
const nodeOsUtils = require("node-os-utils");
const getJanDataFolderPath = require("@janhq/core/node").getJanDataFolderPath;
const path = require("path");
const { readFileSync } = require("fs");
const exec = require("child_process").exec;

const NVIDIA_INFO_FILE = path.join(
getJanDataFolderPath(),
"settings",
"settings.json"
);

const getResourcesInfo = () =>
new Promise((resolve) => {
Expand All @@ -16,18 +26,48 @@ const getResourcesInfo = () =>
});

const getCurrentLoad = () =>
new Promise((resolve) => {
new Promise((resolve, reject) => {
nodeOsUtils.cpu.usage().then((cpuPercentage) => {
const response = {
cpu: {
usage: cpuPercentage,
},
let data = {
run_mode: "cpu",
gpus_in_use: [],
};
resolve(response);
if (process.platform !== "darwin") {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
}
if (data.run_mode === "gpu" && data.gpus_in_use.length > 0) {
const gpuIds = data["gpus_in_use"].join(",");
if (gpuIds !== "") {
exec(
`nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
(error, stdout, stderr) => {
if (error) {
console.error(`exec error: ${error}`);
reject(error);
return;
}
const gpuInfo = stdout.trim().split("\n").map((line) => {
const [id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization] = line.split(", ").map(item => item.replace(/\r/g, ""));
return { id, name, temperature, utilization, memoryTotal, memoryFree, memoryUtilization };
});
resolve({
cpu: { usage: cpuPercentage },
gpu: gpuInfo
});
}
);
} else {
// Handle the case where gpuIds is empty
resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
}
} else {
// Handle the case where run_mode is not 'gpu' or no GPUs are in use
resolve({ cpu: { usage: cpuPercentage }, gpu: [] });
}
});
});

module.exports = {
getResourcesInfo,
getCurrentLoad,
};
};
20 changes: 19 additions & 1 deletion web/containers/Layout/BottomBar/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ const menuLinks = [

const BottomBar = () => {
const { activeModel, stateModel } = useActiveModel()
const { ram, cpu } = useGetSystemResources()
const { ram, cpu, gpus } = useGetSystemResources()
const progress = useAtomValue(appDownloadProgress)
const downloadedModels = useAtomValue(downloadedModelsAtom)

Expand All @@ -57,6 +57,13 @@ const BottomBar = () => {
const setShowSelectModelModal = useSetAtom(showSelectModelModalAtom)
const [serverEnabled] = useAtom(serverEnabledAtom)

const calculateGpuMemoryUsage = (gpu: Record<string, never>) => {
const total = parseInt(gpu.memoryTotal)
const free = parseInt(gpu.memoryFree)
if (!total || !free) return 0
return Math.round(((total - free) / total) * 100)
}

return (
<div className="fixed bottom-0 left-16 z-20 flex h-12 w-[calc(100%-64px)] items-center justify-between border-t border-border bg-background/80 px-3">
<div className="flex flex-shrink-0 items-center gap-x-2">
Expand Down Expand Up @@ -119,6 +126,17 @@ const BottomBar = () => {
<SystemItem name="CPU:" value={`${cpu}%`} />
<SystemItem name="Mem:" value={`${ram}%`} />
</div>
{gpus.length > 0 && (
<div className="flex items-center gap-x-2">
{gpus.map((gpu, index) => (
<SystemItem
key={index}
name={`GPU ${gpu.id}:`}
value={`${gpu.utilization}% Util, ${calculateGpuMemoryUsage(gpu)}% Mem`}
/>
))}
</div>
)}
{/* VERSION is defined by webpack, please see next.config.js */}
<span className="text-xs text-muted-foreground">
Jan v{VERSION ?? ''}
Expand Down
2 changes: 2 additions & 0 deletions web/helpers/atoms/SystemBar.atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ export const usedRamAtom = atom<number>(0)
export const availableRamAtom = atom<number>(0)

export const cpuUsageAtom = atom<number>(0)

export const nvidiaTotalVramAtom = atom<number>(0)
20 changes: 19 additions & 1 deletion web/hooks/useGetSystemResources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@ import {
cpuUsageAtom,
totalRamAtom,
usedRamAtom,
nvidiaTotalVramAtom,
} from '@/helpers/atoms/SystemBar.atom'

export default function useGetSystemResources() {
const [ram, setRam] = useState<number>(0)
const [cpu, setCPU] = useState<number>(0)

const [gpus, setGPUs] = useState<Record<string, never>[]>([])
const setTotalRam = useSetAtom(totalRamAtom)
const setUsedRam = useSetAtom(usedRamAtom)
const setAvailableRam = useSetAtom(availableRamAtom)
const setCpuUsage = useSetAtom(cpuUsageAtom)
const setTotalNvidiaVram = useSetAtom(nvidiaTotalVramAtom)

const getSystemResources = async () => {
if (
Expand Down Expand Up @@ -48,12 +52,25 @@ export default function useGetSystemResources() {
)
setCPU(Math.round(currentLoadInfor?.cpu?.usage ?? 0))
setCpuUsage(Math.round(currentLoadInfor?.cpu?.usage ?? 0))

const gpus = currentLoadInfor?.gpu ?? []
setGPUs(gpus)

let totalNvidiaVram = 0
if (gpus.length > 0) {
totalNvidiaVram = gpus.reduce(
(total: number, gpu: { memoryTotal: string }) =>
total + Number(gpu.memoryTotal),
0
)
}
setTotalNvidiaVram(totalNvidiaVram)
}

useEffect(() => {
getSystemResources()

// Fetch interval - every 0.5s
// Fetch interval - every 2s
// TODO: Will we really need this?
// There is a possibility that this will be removed and replaced by the process event hook?
const intervalId = setInterval(() => {
Expand All @@ -69,5 +86,6 @@ export default function useGetSystemResources() {
totalRamAtom,
ram,
cpu,
gpus,
}
}
3 changes: 3 additions & 0 deletions web/hooks/useSettings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,17 @@ export const useSettings = () => {
const saveSettings = async ({
runMode,
notify,
gpusInUse,
}: {
runMode?: string | undefined
notify?: boolean | undefined
gpusInUse?: string[] | undefined
}) => {
const settingsFile = await joinPath(['file://settings', 'settings.json'])
const settings = await readSettings()
if (runMode != null) settings.run_mode = runMode
if (notify != null) settings.notify = notify
if (gpusInUse != null) settings.gpus_in_use = gpusInUse
await fs.writeFileSync(settingsFile, JSON.stringify(settings))
}

Expand Down

0 comments on commit 4471b2c

Please sign in to comment.