Update llama.py

cm2435 · Jan 26, 2024 · a208ec4 · a208ec4
1 parent bd2ff90
commit a208ec4
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py
@@ -234,7 +234,7 @@ def LlamaAttention_fast_forward(
     bsz, q_len, _ = hidden_states.size()
 
     # Check for inference
-    if past_key_value is not None and q_len == 1 and bsz == 1:
+    if False:#past_key_value is not None and q_len == 1 and bsz == 1:
         A, past_key_value = LlamaAttention_fast_forward_inference(
             self,
             hidden_states,
@@ -350,7 +350,7 @@ def LlamaDecoderLayer_fast_forward(
         past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
     """
     bsz, q_len, hd = hidden_states.size()
-    if (past_key_value is not None and q_len == 1 and bsz == 1):
+    if False:#(past_key_value is not None and q_len == 1 and bsz == 1):
         # Self Attention
         residual = hidden_states
         hidden_states = fast_rms_layernorm_inference(self.input_layernorm, hidden_states)