From 04dde303b87258f09678ec3ae94d9f597ab8d591 Mon Sep 17 00:00:00 2001 From: Gabriele Sarti Date: Tue, 30 Apr 2024 10:09:49 +0200 Subject: [PATCH] Add transformers v4.40 models to config, update changelog --- CHANGELOG.md | 20 ++++++++++++++++++++ inseq/models/model_config.yaml | 14 +++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d7e27d..b4a36ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,23 @@ # Changelog *This file contains a high-level description of changes that were merged into the Inseq main branch since the last release. Refer to the [releases page](https://github.com/inseq-team/inseq/releases) for an exhaustive overview of changes introduced at each release.* + +## 🚀 Features + +- Added new models `DbrxForCausalLM`, `OlmoForCausalLM`, `Phi3ForCausalLM`, `Qwen2MoeForCausalLM` to model config. + +## 🔧 Fixes and Refactoring + +- Fix the issue in the attention implementation from [#268](https://github.com/inseq-team/inseq/issues/268) where non-terminal position in the tensor were set to nan if they were 0s ([#269](https://github.com/inseq-team/inseq/pull/269)). + +- Fix the pad token in cases where it is not specified by default in the loaded model (e.g. for Qwen models) ([#269](https://github.com/inseq-team/inseq/pull/269)). + +- Fix bug reported in [#266](https://github.com/inseq-team/inseq/issues/266) making `value_zeroing` unusable for SDPA attention. This enables using the method on models using SDPA attention as default (e.g. `GemmaForCausalLM`) without passing `model_kwargs={'attn_implementation': 'eager'}` ([#267](https://github.com/inseq-team/inseq/pull/267)). + +## 📝 Documentation and Tutorials + +*No changes* + +## 💥 Breaking Changes + +*No changes* \ No newline at end of file diff --git a/inseq/models/model_config.yaml b/inseq/models/model_config.yaml index 9618135..1b2433a 100644 --- a/inseq/models/model_config.yaml +++ b/inseq/models/model_config.yaml @@ -11,6 +11,9 @@ CodeGenForCausalLM: CohereForCausalLM: self_attention_module: "self_attn" value_vector: "value_states" +DbrxForCausalLM: + self_attention_module: "attn" + value_vector: "value_states" FalconForCausalLM: self_attention_module: "self_attention" value_vector: "value_layer" @@ -44,6 +47,9 @@ MixtralForCausalLM: MptForCausalLM: self_attention_module: "attn" value_vector: "value_states" +OlmoForCausalLM: + self_attention_module: "self_attn" + value_vector: "value_states" OpenAIGPTLMHeadModel: self_attention_module: "attn" value_vector: "value" @@ -53,9 +59,15 @@ OPTForCausalLM: PhiForCausalLM: self_attention_module: "self_attn" value_vector: "value_states" +Phi3ForCausalLM: + self_attention_module: "self_attn" + value_vector: "value_states" Qwen2ForCausalLM: self_attention_module: "self_attn" value_vector: "value_states" +Qwen2MoeForCausalLM: + self_attention_module: "self_attn" + value_vector: "value_states" StableLmForCausalLM: self_attention_module: "self_attn" value_vector: "value_states" @@ -114,4 +126,4 @@ T5ForConditionalGeneration: UMT5ForConditionalGeneration: self_attention_module: "SelfAttention" cross_attention_module: "EncDecAttention" - value_vector: "value_states" \ No newline at end of file + value_vector: "value_states"