diff --git a/tests/models/test_llama.py b/tests/models/test_llama.py new file mode 100644 index 00000000..1c52c15a --- /dev/null +++ b/tests/models/test_llama.py @@ -0,0 +1,15 @@ +from unittest import TestCase + +from parameterized import parameterized +from optimum.nvidia.utils.tests import requires_gpu +from optimum.nvidia.models.llama import LLamaForCausalLM as TrtLlamaForCausalLM + + + +class LLamaForCausalLMTestCase(TestCase): + + @requires_gpu + @parameterized.expand(["float16", "bfloat16"]) + def test_build_engine_7b_with_tp(self, dtype: str): + model = TrtLlamaForCausalLM.from_pretrained("huggingface/llama-7b", dtype=dtype) + self.assertIsNotNone(model) \ No newline at end of file