diff --git a/nlp/gpt_j/popxl/README.md b/nlp/gpt_j/popxl/README.md index 4a6b230ca..2cec6ed06 100644 --- a/nlp/gpt_j/popxl/README.md +++ b/nlp/gpt_j/popxl/README.md @@ -1,10 +1,9 @@ # GPT-J GPT-J for NLP pre-training and text generation, optimised for Graphcore's IPU. -| Framework | domain | Model | Datasets | Tasks| Training| Inference | -|-------------|-|------|-------|-------|-------|---| -| opXL | NLP | GPT-J | MNLI | Next sentence prediction, Question/Answering | ✅ | ✅ | - +| Framework | Domain | Model | Datasets | Tasks | Training | Inference | +|-----------|--------|-------|----------|-------|----------|-----------| +| popXL | NLP | GPT-J | MNLI | Next sentence prediction, Question/Answering | |


Min. 16 IPUs (POD16) required |


Min. 16 IPU (POD16) required | # Instructions summary @@ -51,7 +50,6 @@ source /bin/activate 3. Install the Python requirements: ```bash pip3 install -r requirements.txt -```nstall ``` @@ -72,9 +70,10 @@ The task is to predict the relation between the premise and the hypothesis, whic The default model size for fine-tuning is GPT-J 6B on POD64 (named `gptj_6B_1024_pod64`). You can -change it to other configurations that are available in the configuration file `config/finetuning.yml` using the `- -config` CLI parameter: +change it to other configurations that are available in the configuration file `config/finetuning.yml` using the `--config` CLI parameter. +In particular, you can run fine-tuning on a POD16 using ```bash -python3 run_finetuning_mnli.py - -config gptj_6B_1024_pod64 +python3 run_finetuning_mnli.py --config gptj_6B_1024_pod16 ``` When running the application, it is possible to save/load executables to/from a cache store. This allows for reusing a saved executable instead of re-compiling the model when re-running identical model configurations. To enable this, use the environment variable `POPXL_CACHE_DIR=` when running the application: diff --git a/nlp/gpt_j/popxl/config/finetuning_mnli.yml b/nlp/gpt_j/popxl/config/finetuning_mnli.yml index b2b97f879..3d3afb594 100644 --- a/nlp/gpt_j/popxl/config/finetuning_mnli.yml +++ b/nlp/gpt_j/popxl/config/finetuning_mnli.yml @@ -57,6 +57,17 @@ release: available_memory_proportion: [ 0.2 ] attention_serialisation: 2 + "gptj_6B_1024_pod16": + <<: *gptj_6B_1024 + execution: + micro_batch_size: 1 + loss_scaling: 4096 + io_tiles: 128 + data_parallel: 1 + tensor_parallel: 16 + available_memory_proportion: [ 0.2 ] + attention_serialisation: 2 + tiny: <<: *tiny execution: diff --git a/nlp/gpt_j/popxl/inference.py b/nlp/gpt_j/popxl/inference.py index 4366dd325..86d41f342 100644 --- a/nlp/gpt_j/popxl/inference.py +++ b/nlp/gpt_j/popxl/inference.py @@ -70,10 +70,10 @@ def inference(config: GPTJConfig) -> TaskSession: # ----- Create Variables ----- # Create RemoteBuffers for each variable - embeddings_buffers = named_variable_buffers(embeddings_facts) + embeddings_buffers = named_variable_buffers(embeddings_facts, shard_over_dict=False) layer_buffers = named_variable_buffers( - layer_facts, entries=config.model.layers) - lm_buffers = named_variable_buffers(lm_facts) + layer_facts, entries=config.model.layers, shard_over_dict=False) + lm_buffers = named_variable_buffers(lm_facts, shard_over_dict=False) variables = NamedTensors() transformer = NamedTensors() diff --git a/nlp/gpt_j/popxl/requirements.txt b/nlp/gpt_j/popxl/requirements.txt index 5b594dd4f..cafd2b135 100644 --- a/nlp/gpt_j/popxl/requirements.txt +++ b/nlp/gpt_j/popxl/requirements.txt @@ -15,6 +15,6 @@ sklearn==0.0 pytest==6.2.5 pytest-pythonpath==0.7.4 -git+ssh://git@github.com/graphcore/popxl-addons.git@sdk-release-3.1 +git+ssh://git@github.com/graphcore/popxl-addons.git@sdk-release-3.1_a protobuf==3.20.*; python_version > '3.6'