Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ dmypy.json

build
.vscode/
*.iml
.attach_pid*
src/neuronx_distributed.egg-info/
*.whl
Expand Down
25 changes: 25 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
default_language_version:
# force all unspecified python hooks to run python3
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- id: detect-aws-credentials
- repo: https://github.com/pocc/pre-commit-hooks
rev: v1.1.1
hooks:
- id: clang-format
args: [--style=file, -i]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0
hooks:
- id: ruff
name: ruff
entry: ruff
args: [check, --fix, "--line-length=120", "--ignore=F401,E203"]
types: [python]
language: system
exclude: cases_update
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ To install the library, please follow the instructions mentioned here: https://a
To build from source, run the following command:

```
python3 setup.py bdist_wheel
bash ./build.sh
```

It should place the wheel at `dist/`
It should place the wheel at `build/`

## API Reference Guide

Expand Down
41 changes: 5 additions & 36 deletions build-tools/bin/custom-build
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,12 @@ LICENSE_TXT_PATH=${BUILD_PATH}/private/LICENSE.txt
BUILD_PATH_NEURONX_DISTRIBUTED=${BUILD_PATH}/public/NeuronxDistributed
mkdir -p ${BUILD_PATH_NEURONX_DISTRIBUTED}

# check against flake8 linter
# Options used:
# --max-line-length=120 is used since a lot of docstrings
# contain lines longer than 120 that wouldn't make sense
# to split (ex. code snippets)
#
# Warnings that are ignored
# F401: unused import
# - Reason to ignore: Side effects might occur on import.
# Also, neuronx-cc check would trip this.
# W503/504: newline before/after binary operator.
# - Reason to Ignore: conditionals are often split into
# multiple lines for readability).
#
# More info in the following links:
# 1) https://flake8.pycqa.org/en/latest/user/error-codes.html
# 2) https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes

FLAKE8_MSG=$(flake8 --max-line-length=120 --ignore=F401,W503,W504,E203 ${SRC_PATH}/src/neuronx_distributed || true)

python3.8 -m pip install flake8==3.7
if [[ ! -z $FLAKE8_MSG ]]
then
echo "FLAKE8 LINTING HAS DETECTED FORMATTING AND POTENTIALLY SOME SYNTAX ERRORS, PLEASE CHECK ABOVE OUTPUT!"
exit 1
fi

if [[ "$1" == "flake8" ]]
then
exit 0
fi

# # Copy Python source files
# Copy Python source files
cp setup.py ${BUILD_PATH_NEURONX_DISTRIBUTED}/
cp -r src ${BUILD_PATH_NEURONX_DISTRIBUTED}/
cp $LICENSE_TXT_PATH ${BUILD_PATH_NEURONX_DISTRIBUTED}/

## Build wheel
DIST_DIR=${BUILD_PATH}/pip/public/neuronx-distributed
python3.8 setup.py bdist_wheel --dist-dir ${DIST_DIR}

export DIST_DIR=${BUILD_PATH}/pip/public/neuronx-distributed

bash build.sh
30 changes: 30 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#! /bin/bash
set -e

: ${DIST_DIR:=build}

python3.8 -m pip install ruff
# removing cache fails in ToD
python3.8 -m ruff check --no-cache --line-length=120 --ignore=F401,E203
# exit when asked to run `ruff` only
if [[ "$1" == "ruff" ]]
then
exit 0
fi

# Run static code analysis
python3.8 -m pip install mypy
# Install type bindings
python3.8 -m pip install types-requests boto3-stubs[s3]
# removing cache fails in ToD
python3.8 -m mypy --no-incremental || true
# exit when asked to run `mypy` only
if [[ "$1" == "mypy" ]]
then
exit 0
fi



# Build wheel
python3.8 setup.py bdist_wheel --dist-dir ${DIST_DIR}
79 changes: 79 additions & 0 deletions examples/inference/dbrx/dbrx_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import torch
from dbrx.neuron_modeling_dbrx import (
NeuronDbrxConfig,
NeuronDbrxForCausalLM,
NeuronDbrxModel,
)
from runner import InferenceRunner
from transformers import AutoTokenizer

from neuronx_distributed.parallel_layers.checkpointing import _invoke_preshard_hook


class DbrxRunner(InferenceRunner):
def load_hf_model(self):
config = NeuronDbrxConfig.from_pretrained(self.model_path)
return NeuronDbrxForCausalLM.load_hf_model(self.model_path, config)

def load_neuron_model_on_cpu(self, max_prompt_length, sequence_length, batch_size, **kwargs):
# On CPU we can only run tensor parallelism with degree 1
config = self.get_config_for_nxd(
batch_size,
1,
max_prompt_length=max_prompt_length,
sequence_length=sequence_length,
enable_bucketing=False,
**kwargs)
config.torch_dtype = torch.float32

self.init_ditributed_env()
neuron_model = NeuronDbrxModel(config)

state_dict = NeuronDbrxForCausalLM.get_state_dict(self.model_path, config)

_invoke_preshard_hook(neuron_model, state_dict)

neuron_model.load_state_dict(state_dict, strict=False)

if config.torch_dtype == torch.bfloat16:
neuron_model.bfloat16()

model = NeuronDbrxForCausalLM(None, config)
model.context_encoding_model.model = neuron_model
model.token_generation_model.model = neuron_model
return model

def load_neuron_model(self, traced_model_path):
config = NeuronDbrxConfig.from_pretrained(traced_model_path)
model = NeuronDbrxForCausalLM.from_pretrained("", config)

model.load(traced_model_path)
if config.torch_dtype == torch.bfloat16:
model.bfloat16()

return model

def load_tokenizer(self, padding_side=None):
tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = padding_side if padding_side else self.get_padding_side()
return tokenizer

def get_config_cls(self):
return NeuronDbrxConfig

def get_model_cls(self):
return NeuronDbrxForCausalLM

def get_padding_side(self):
return "right"

def get_default_hf_generation_config_kwargs(self):
config = super().get_default_hf_generation_config_kwargs()
config['pad_token_id'] = 0

return config


if __name__ == "__main__":
DbrxRunner.cmd_execute()
Loading