diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 184cd419554f8..f97e696aaaac8 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -237,6 +237,15 @@ cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENAB cmake --build buildWithCublas --config Release ``` +**oneDNN**: The current oneDNN releases *(shipped with the oneAPI base-toolkit)* do not include the NVIDIA backend. Therefore, oneDNN must be compiled from source to enable the NVIDIA target: + +```sh +git clone https://github.com/oneapi-src/oneDNN.git +cd oneDNN +cmake -GNinja -Bbuild-nvidia -DDNNL_CPU_RUNTIME=DPCPP -DDNNL_GPU_RUNTIME=DPCPP -DDNNL_GPU_VENDOR=NVIDIA -DONEDNN_BUILD_GRAPH=OFF -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake --build build-nvidia --config Release +``` + - **Adding support to AMD GPUs** **oneAPI Plugin**: In order to enable SYCL support on AMD GPUs, please install the [Codeplay oneAPI Plugin for AMD GPUs](https://developer.codeplay.com/products/oneapi/amd/download). As with Nvidia GPUs, the user should also make sure the plugin version matches the installed base toolkit. @@ -327,10 +336,10 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture # Option 1: Use FP32 (recommended for better performance in most cases) -cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DDNNL_DIR=/path/to/oneDNN/build-nvidia/install/lib/cmake/dnnl # Option 2: Use FP16 -cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON +cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DDNNL_DIR=/path/to/oneDNN/build-nvidia/install/lib/cmake/dnnl # build all binary cmake --build build --config Release -j -v diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index c7a3c426b623c..d0e6da8e4a1e0 100644 Binary files a/examples/server/public/index.html.gz and b/examples/server/public/index.html.gz differ diff --git a/examples/server/webui/src/components/ChatScreen.tsx b/examples/server/webui/src/components/ChatScreen.tsx index 79de305326241..d12b06e125e5a 100644 --- a/examples/server/webui/src/components/ChatScreen.tsx +++ b/examples/server/webui/src/components/ChatScreen.tsx @@ -99,13 +99,9 @@ export default function ChatScreen() { canvasData, replaceMessageAndGenerate, } = useAppContext(); - const [inputMsg, setInputMsg] = useState(prefilledMsg.content()); - const inputRef = useRef(null); + const textarea = useOptimizedTextarea(prefilledMsg.content()); - const { extraContext, clearExtraContext } = useVSCodeContext( - inputRef, - setInputMsg - ); + const { extraContext, clearExtraContext } = useVSCodeContext(textarea); // TODO: improve this when we have "upload file" feature const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined; @@ -135,9 +131,10 @@ export default function ChatScreen() { }; const sendNewMessage = async () => { - if (inputMsg.trim().length === 0 || isGenerating(currConvId ?? '')) return; - const lastInpMsg = inputMsg; - setInputMsg(''); + const lastInpMsg = textarea.value(); + if (lastInpMsg.trim().length === 0 || isGenerating(currConvId ?? '')) + return; + textarea.setValue(''); scrollToBottom(false); setCurrNodeId(-1); // get the last message node @@ -146,13 +143,13 @@ export default function ChatScreen() { !(await sendMessage( currConvId, lastMsgNodeId, - inputMsg, + lastInpMsg, currExtra, onChunk )) ) { // restore the input message if failed - setInputMsg(lastInpMsg); + textarea.setValue(lastInpMsg); } // OK clearExtraContext(); @@ -195,16 +192,13 @@ export default function ChatScreen() { // send the prefilled message if needed sendNewMessage(); } else { - // otherwise, focus on the input and move the cursor to the end - if (inputRef.current) { - inputRef.current.focus(); - inputRef.current.selectionStart = inputRef.current.value.length; - } + // otherwise, focus on the input + textarea.focus(); } prefilledMsg.clear(); // no need to keep track of sendNewMessage // eslint-disable-next-line react-hooks/exhaustive-deps - }, [inputRef]); + }, [textarea.ref]); // due to some timing issues of StorageUtils.appendMsg(), we need to make sure the pendingMsg is not duplicated upon rendering (i.e. appears once in the saved conversation and once in the pendingMsg) const pendingMsgDisplay: MessageDisplay[] = @@ -258,9 +252,7 @@ export default function ChatScreen() {