Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions examples/example-docker/cuda.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Use Debian stable slim as the base image
FROM nvidia/cuda:12.3.1-devel-ubuntu22.04

# Set working directory
WORKDIR /app

# Install required packages
RUN apt-get update && \
apt-get install -y git cmake numactl uuid-dev && \
git clone --recurse https://github.com/janhq/nitro nitro && \
cd nitro && \
./install_deps.sh && \
mkdir build && \
cd build && \
cmake .. -DDEBUG=ON -DLLAMA_CUBLAS=ON -DLLAMA_CUDA_F16=ON -DLLAMA_CUDA_DMMV_X=64 -DLLAMA_CUDA_MMV_Y=32 && \
cmake --build . --config Release -j $(nproc) && \
apt-get remove --purge -y git cmake && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Expose the port
EXPOSE 3928

# Change the permissions of the nitro binary to make it executable
RUN chmod +x /app/nitro/build/nitro

# Set the command to run the nitro binary with numactl limiting to cores 0-7
ENTRYPOINT ["/app/nitro/build/nitro"]
CMD ["1", "0.0.0.0", "3928"]