aws-neuron · trsharm25 · Sep 17, 2024 · Sep 16, 2024
@@ -134,6 +134,7 @@ dmypy.json
 
 build
 .vscode/
+*.iml
 .attach_pid*
 src/neuronx_distributed.egg-info/
 *.whl

@@ -0,0 +1,25 @@
+default_language_version:
+  # force all unspecified python hooks to run python3
+  python: python3
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v2.3.0
+  hooks:
+    - id: end-of-file-fixer
+    - id: trailing-whitespace
+    - id: detect-aws-credentials
+- repo: https://github.com/pocc/pre-commit-hooks
+  rev: v1.1.1
+  hooks:
+    - id: clang-format
+      args: [--style=file, -i]
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.5.0
+  hooks:
+    - id: ruff
+      name: ruff
+      entry: ruff
+      args: [check, --fix, "--line-length=120", "--ignore=F401,E203"]
+      types: [python]
+      language: system
+      exclude: cases_update
@@ -9,10 +9,10 @@ To install the library, please follow the instructions mentioned here: https://a
 To build from source, run the following command:
 
 ```
-python3 setup.py bdist_wheel
+bash ./build.sh
 ```
-
-It should place the wheel at `dist/`
+ 
+It should place the wheel at `build/`
 
 ## API Reference Guide
 

@@ -8,43 +8,12 @@ LICENSE_TXT_PATH=${BUILD_PATH}/private/LICENSE.txt
 BUILD_PATH_NEURONX_DISTRIBUTED=${BUILD_PATH}/public/NeuronxDistributed
 mkdir -p ${BUILD_PATH_NEURONX_DISTRIBUTED}
 
-# check against flake8 linter
-# Options used:
-#   --max-line-length=120 is used since a lot of docstrings
-#   contain lines longer than 120 that wouldn't make sense
-#   to split (ex. code snippets)
-#
-#   Warnings that are ignored
-#   F401: unused import
-#     - Reason to ignore: Side effects might occur on import.
-#       Also, neuronx-cc check would trip this.
-#   W503/504: newline before/after binary operator.
-#     - Reason to Ignore: conditionals are often split into
-#       multiple lines for readability).
-#
-#   More info in the following links:
-#   1) https://flake8.pycqa.org/en/latest/user/error-codes.html
-#   2) https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes
-
-FLAKE8_MSG=$(flake8 --max-line-length=120 --ignore=F401,W503,W504,E203 ${SRC_PATH}/src/neuronx_distributed || true)
-
-python3.8 -m pip install flake8==3.7
-if [[ ! -z $FLAKE8_MSG ]]
-then
-  echo "FLAKE8 LINTING HAS DETECTED FORMATTING AND POTENTIALLY SOME SYNTAX ERRORS, PLEASE CHECK ABOVE OUTPUT!"
-  exit 1
-fi
-
-if [[ "$1" == "flake8" ]]
-then
-  exit 0
-fi
-
-# # Copy Python source files
+# Copy Python source files
 cp setup.py ${BUILD_PATH_NEURONX_DISTRIBUTED}/
 cp -r src ${BUILD_PATH_NEURONX_DISTRIBUTED}/
 cp $LICENSE_TXT_PATH ${BUILD_PATH_NEURONX_DISTRIBUTED}/
 
-## Build wheel
-DIST_DIR=${BUILD_PATH}/pip/public/neuronx-distributed
-python3.8 setup.py bdist_wheel --dist-dir ${DIST_DIR}
+
+export DIST_DIR=${BUILD_PATH}/pip/public/neuronx-distributed
+
+bash build.sh
@@ -0,0 +1,30 @@
+#! /bin/bash
+set -e
+
+: ${DIST_DIR:=build}
+
+python3.8 -m pip install ruff
+# removing cache fails in ToD
+python3.8 -m ruff check --no-cache --line-length=120 --ignore=F401,E203
+# exit when asked to run `ruff` only
+if [[ "$1" == "ruff" ]]
+then
+  exit 0
+fi
+
+# Run static code analysis
+python3.8 -m pip install mypy
+# Install type bindings
+python3.8 -m pip install types-requests boto3-stubs[s3]
+# removing cache fails in ToD
+python3.8 -m mypy --no-incremental || true
+# exit when asked to run `mypy` only
+if [[ "$1" == "mypy" ]]
+then
+  exit 0
+fi
+
+
+
+# Build wheel
+python3.8 setup.py bdist_wheel --dist-dir ${DIST_DIR}
@@ -0,0 +1,79 @@
+import torch
+from dbrx.neuron_modeling_dbrx import (
+    NeuronDbrxConfig,
+    NeuronDbrxForCausalLM,
+    NeuronDbrxModel,
+)
+from runner import InferenceRunner
+from transformers import AutoTokenizer
+
+from neuronx_distributed.parallel_layers.checkpointing import _invoke_preshard_hook
+
+
+class DbrxRunner(InferenceRunner):
+    def load_hf_model(self):
+        config = NeuronDbrxConfig.from_pretrained(self.model_path)
+        return NeuronDbrxForCausalLM.load_hf_model(self.model_path, config)
+
+    def load_neuron_model_on_cpu(self, max_prompt_length, sequence_length, batch_size, **kwargs):
+        # On CPU we can only run tensor parallelism with degree 1
+        config = self.get_config_for_nxd(
+            batch_size,
+            1,
+            max_prompt_length=max_prompt_length,
+            sequence_length=sequence_length,
+            enable_bucketing=False,
+            **kwargs)
+        config.torch_dtype = torch.float32
+
+        self.init_ditributed_env()
+        neuron_model = NeuronDbrxModel(config)
+
+        state_dict = NeuronDbrxForCausalLM.get_state_dict(self.model_path, config)
+
+        _invoke_preshard_hook(neuron_model, state_dict)
+
+        neuron_model.load_state_dict(state_dict, strict=False)
+
+        if config.torch_dtype == torch.bfloat16:
+            neuron_model.bfloat16()
+
+        model = NeuronDbrxForCausalLM(None, config)
+        model.context_encoding_model.model = neuron_model
+        model.token_generation_model.model = neuron_model
+        return model
+
+    def load_neuron_model(self, traced_model_path):
+        config = NeuronDbrxConfig.from_pretrained(traced_model_path)
+        model = NeuronDbrxForCausalLM.from_pretrained("", config)
+
+        model.load(traced_model_path)
+        if config.torch_dtype == torch.bfloat16:
+            model.bfloat16()
+
+        return model
+
+    def load_tokenizer(self, padding_side=None):
+        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path)
+        tokenizer.pad_token = tokenizer.unk_token
+        tokenizer.padding_side = padding_side if padding_side else self.get_padding_side()
+        return tokenizer
+
+    def get_config_cls(self):
+        return NeuronDbrxConfig
+
+    def get_model_cls(self):
+        return NeuronDbrxForCausalLM
+
+    def get_padding_side(self):
+        return "right"
+
+    def get_default_hf_generation_config_kwargs(self):
+        config = super().get_default_hf_generation_config_kwargs()
+        config['pad_token_id'] = 0
+
+        return config
+
+
+if __name__ == "__main__":
+    DbrxRunner.cmd_execute()
-Original file line number
+Diff line change
@@ Expand Up / @@ -134,6 +134,7 @@ dmypy.json @@
     build
     .vscode/
+    *.iml
     .attach_pid*
     src/neuronx_distributed.egg-info/
     *.whl
@@ Expand Down @@