2.3.1 (mosaicml#1312)

dakinggg · Jun 27, 2024 · f141ee1 · f141ee1
1 parent 472d009
commit f141ee1
Show file tree

Hide file tree

Showing 14 changed files with 16 additions and 41 deletions.
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -17,12 +17,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "2.3.0_cu121_flash2"
-          base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          dep_groups: "[gpu-flash2]"
-        - name: "2.3.0_cu121_flash2_aws"
-          base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws
-          dep_groups: "[gpu-flash2]"
         - name: "2.3.1_cu121"
           base_image: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
           dep_groups: "[gpu]"

diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
@@ -19,10 +19,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "cpu-2.3.0"
-          container: mosaicml/pytorch:2.3.0_cpu-python3.11-ubuntu20.04
-          markers: "not gpu"
-          pytest_command: "coverage run -m pytest"
         - name: "cpu-2.3.1"
           container: mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04
           markers: "not gpu"

diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
@@ -19,11 +19,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - name: "gpu-2.3.0-1"
-          container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          pip_deps: "[all]"
         - name: "gpu-2.3.1-1"
           container: mosaicml/llm-foundry:2.3.1_cu121-latest
           markers: "gpu"
@@ -49,11 +44,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - name: "gpu-2.3.0-2"
-          container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          pip_deps: "[all]"
         - name: "gpu-2.3.1-2"
           container: mosaicml/llm-foundry:2.3.1_cu121-latest
           markers: "gpu"
@@ -79,11 +69,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - name: "gpu-2.3.0-4"
-          container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          pip_deps: "[all]"
         - name: "gpu-2.3.1-4"
           container: mosaicml/llm-foundry:2.3.1_cu121-latest
           markers: "gpu"

diff --git a/README.md b/README.md
@@ -113,24 +113,24 @@ If you have success/failure using LLM Foundry on other systems, please let us kn
 
 | Device         | Torch Version | Cuda Version | Status                       |
 | -------------- | ------------- | ------------ | ---------------------------- |
-| A100-40GB/80GB | 2.3.0         | 12.1         | :white_check_mark: Supported |
-| H100-80GB      | 2.3.0         | 12.1         | :white_check_mark: Supported |
+| A100-40GB/80GB | 2.3.1         | 12.1         | :white_check_mark: Supported |
+| H100-80GB      | 2.3.1         | 12.1         | :white_check_mark: Supported |
 
 ## MosaicML Docker Images
 We highly recommend using our prebuilt Docker images. You can find them here: https://hub.docker.com/orgs/mosaicml/repositories.
 
 The `mosaicml/pytorch` images are pinned to specific PyTorch and CUDA versions, and are stable and rarely updated.
 
 The `mosaicml/llm-foundry` images are built with new tags upon every commit to the `main` branch.
-You can select a specific commit hash such as `mosaicml/llm-foundry:2.3.0_cu121_flash2-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest`.
+You can select a specific commit hash such as `mosaicml/llm-foundry:2.3.1_cu121-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.3.1_cu121-latest`.
 
 **Please Note:** The `mosaicml/llm-foundry` images do not come with the `llm-foundry` package preinstalled, just the dependencies. You will still need to `pip install llm-foundry` either from PyPi or from source.
 
 | Docker Image                                           | Torch Version | Cuda Version      | LLM Foundry dependencies installed? |
 | ------------------------------------------------------ | ------------- | ----------------- | ----------------------------------- |
-| `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04`  | 2.3.0         | 12.1 (Infiniband) | No                                  |
-| `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest`       | 2.3.0         | 12.1 (Infiniband) | Yes                                 |
-| `mosaicml/llm-foundry:2.3.0_cu121_flash2_aws-latest`   | 2.3.0         | 12.1 (EFA)        | Yes                                 |
+| `mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04`  | 2.3.1         | 12.1 (Infiniband) | No                                  |
+| `mosaicml/llm-foundry:2.3.1_cu121-latest`              | 2.3.1         | 12.1 (Infiniband) | Yes                                 |
+| `mosaicml/llm-foundry:2.3.1_cu121_aws-latest`          | 2.3.1         | 12.1 (EFA)        | Yes                                 |
 
 
 # Installation

diff --git a/mcli/mcli-1b-eval.yaml b/mcli/mcli-1b-eval.yaml
@@ -9,7 +9,7 @@ integrations:
 command: |
   cd llm-foundry/scripts/
   composer eval/eval.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: mpt-1b-eval
 
 compute:

diff --git a/mcli/mcli-1b-max-seq-len-8k.yaml b/mcli/mcli-1b-max-seq-len-8k.yaml
@@ -17,7 +17,7 @@ command: |
     --out_root ./my-copy-c4 --splits train_small val_small \
     --concat_tokens 8192 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>'
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: mpt-1b-ctx-8k-gpus-8
 
 compute:

diff --git a/mcli/mcli-1b.yaml b/mcli/mcli-1b.yaml
@@ -21,7 +21,7 @@ command: |
     eval_loader.dataset.split=val_small \
     max_duration=100ba \
     eval_interval=0
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: mpt-1b-gpus-8
 
 compute:

diff --git a/mcli/mcli-benchmark-mpt.yaml b/mcli/mcli-benchmark-mpt.yaml
@@ -6,7 +6,7 @@ compute:
   # cluster: TODO # Name of the cluster to use for this run
   # gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 
 integrations:
 - integration_type: git_repo

diff --git a/mcli/mcli-convert-composer-to-hf.yaml b/mcli/mcli-convert-composer-to-hf.yaml
@@ -13,7 +13,7 @@ command: |
     --hf_output_path s3://bucket/folder/hf/ \
     --output_precision bf16 \
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: convert-composer-hf
 
 compute:

diff --git a/mcli/mcli-hf-eval.yaml b/mcli/mcli-hf-eval.yaml
@@ -16,7 +16,7 @@ gpu_num: 8
 # gpu_type:
 # cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:

diff --git a/mcli/mcli-hf-generate.yaml b/mcli/mcli-hf-generate.yaml
@@ -35,7 +35,7 @@ command: |
       "Here's a quick recipe for baking chocolate chip cookies: Start by" \
       "The best 5 cities to visit in Europe are"
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: hf-generate
 
 compute:

diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml
@@ -9,7 +9,7 @@ integrations:
 command: |
   cd llm-foundry/scripts
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: llama2-finetune
 
 compute:

diff --git a/mcli/mcli-openai-eval.yaml b/mcli/mcli-openai-eval.yaml
@@ -16,7 +16,7 @@ gpu_num:  #
 gpu_type:  #
 cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:

diff --git a/mcli/mcli-pretokenize-oci-upload.yaml b/mcli/mcli-pretokenize-oci-upload.yaml
@@ -1,5 +1,5 @@
 name: c4-2k-pre-tokenized
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 compute:
   gpus: 8  # Number of GPUs to use