Skip to content

Commit

Permalink
add workflow and update dockerfile for nightly docker
Browse files Browse the repository at this point in the history
  • Loading branch information
leasunhy committed May 30, 2024
1 parent f66a0c6 commit 3bc5d0e
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 12 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/docker_rdma_nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Build and Publish Docker

on:
push:
branches:
- docker-nightly

jobs:
docker:
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v3
-
name: Set up QEMU
uses: docker/setup-qemu-action@v2
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: false
swap-storage: true
-
name: Build and push with rdma (nightly)
uses: docker/build-push-action@v3
with:
context: ./docker/rdma/
push: true
tags: dptechnology/unicore:nightly-rdma
22 changes: 10 additions & 12 deletions docker/rdma/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvcr.io/nvidia/pytorch:23.04-py3
FROM nvcr.io/nvidia/pytorch:24.05-py3

RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
rm -rf /var/lib/apt/lists/* \
Expand Down Expand Up @@ -50,31 +50,29 @@ RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
nfs-common \
bc

# RUN ldconfig

# # ==================================================================
# # pytorch
# # ------------------------------------------------------------------
ENV TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0;9.0"

RUN pip3 install --upgrade sentry-sdk requests
RUN pip3 install --no-cache-dir ninja typing packaging

RUN cd /tmp && \
git clone https://github.com/dptech-corp/Uni-Core && \
cd Uni-Core && \
python setup.py install && \
rm -rf /tmp/* && rm -rf ~/.cache/pip
RUN pip3 install --upgrade --no-cache-dir sentry-sdk requests

RUN pip3 uninstall flash_attn -y
RUN pip3 install --no-cache-dir tokenizers wandb lmdb biopython ml-collections timeout-decorator urllib3 tree dm-tree scipy tqdm tensorboardX && rm -rf ~/.cache/pip

RUN cd /tmp && \
git clone https://github.com/Dao-AILab/flash-attention.git && \
cd flash-attention && \
git clone https://github.com/dptech-corp/Uni-Core && \
cd Uni-Core && \
python setup.py install && \
rm -rf /tmp/* && rm -rf ~/.cache/pip
rm -rf /tmp/* && rm -rf ~/.cache/pip

RUN pip3 install --no-cache-dir tokenizers lmdb biopython ml-collections timeout-decorator urllib3 tree dm-tree && rm -rf ~/.cache/pip
RUN pip3 install -U 'flash-attn<2.5.0' --no-build-isolation --no-cache-dir

RUN ldconfig && \
apt-get clean && \
apt-get autoremove && \
rm -rf /var/lib/apt/lists/* /tmp/*

0 comments on commit 3bc5d0e

Please sign in to comment.