You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Trying to deploy with latest tgi release the container fails with:
{"timestamp":"2024-05-16T11:17:36.832915Z","level":"ERROR","fields":{"message":"Error when initializing model\nTraceback (most recent call last):\n File \"/opt/conda/bin/text-generation-server\", line 8, in <module>\n sys.exit(app())\n File \"/opt/conda/lib/python3.10/site-packages/typer/main.py\", line 311, in __call__\n return get_command(self)(*args, **kwargs)\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 1157, in __call__\n return self.main(*args, **kwargs)\n File \"/opt/conda/lib/python3.10/site-packages/typer/core.py\", line 778, in main\n return _main(\n File \"/opt/conda/lib/python3.10/site-packages/typer/core.py\", line 216, in _main\n rv = self.invoke(ctx)\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 1688, in invoke\n return _process_result(sub_ctx.command.invoke(sub_ctx))\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 1434, in invoke\n return ctx.invoke(self.callback, **ctx.params)\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 783, in invoke\n return __callback(*args, **kwargs)\n File \"/opt/conda/lib/python3.10/site-packages/typer/main.py\", line 683, in wrapper\n return callback(**use_params) # type: ignore\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/cli.py\", line 90, in serve\n server.serve(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py\", line 258, in serve\n asyncio.run(\n File \"/opt/conda/lib/python3.10/asyncio/runners.py\", line 44, in run\n return loop.run_until_complete(main)\n File \"/opt/conda/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n self.run_forever()\n File \"/opt/conda/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n self._run_once()\n File \"/opt/conda/lib/python3.10/asyncio/base_events.py\", line 1909, in _run_once\n handle._run()\n File \"/opt/conda/lib/python3.10/asyncio/events.py\", line 80, in _run\n self._context.run(self._callback, *self._args)\n> File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py\", line 222, in serve_inner\n model = get_model(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/__init__.py\", line 420, in get_model\n return FlashLlama(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/flash_llama.py\", line 84, in __init__\n model = FlashLlamaForCausalLM(prefix, config, weights)\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 368, in __init__\n self.model = FlashLlamaModel(prefix, config, weights)\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 292, in __init__\n [\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 293, in <listcomp>\n FlashLlamaLayer(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 232, in __init__\n self.self_attn = FlashLlamaAttention(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 108, in __init__\n self.query_key_value = load_attention(config, prefix, weights)\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 43, in load_attention\n bias = config.attention_bias\n File \"/opt/conda/lib/python3.10/site-packages/transformers/configuration_utils.py\", line 263, in __getattribute__\n return super().__getattribute__(key)\nAttributeError: 'Phi3Config' object has no attribute 'attention_bias'\n"},"target":"text_generation_launcher"}
System Info
Trying to deploy with latest tgi release the container fails with:
{"timestamp":"2024-05-16T11:17:36.832915Z","level":"ERROR","fields":{"message":"Error when initializing model\nTraceback (most recent call last):\n File \"/opt/conda/bin/text-generation-server\", line 8, in <module>\n sys.exit(app())\n File \"/opt/conda/lib/python3.10/site-packages/typer/main.py\", line 311, in __call__\n return get_command(self)(*args, **kwargs)\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 1157, in __call__\n return self.main(*args, **kwargs)\n File \"/opt/conda/lib/python3.10/site-packages/typer/core.py\", line 778, in main\n return _main(\n File \"/opt/conda/lib/python3.10/site-packages/typer/core.py\", line 216, in _main\n rv = self.invoke(ctx)\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 1688, in invoke\n return _process_result(sub_ctx.command.invoke(sub_ctx))\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 1434, in invoke\n return ctx.invoke(self.callback, **ctx.params)\n File \"/opt/conda/lib/python3.10/site-packages/click/core.py\", line 783, in invoke\n return __callback(*args, **kwargs)\n File \"/opt/conda/lib/python3.10/site-packages/typer/main.py\", line 683, in wrapper\n return callback(**use_params) # type: ignore\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/cli.py\", line 90, in serve\n server.serve(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py\", line 258, in serve\n asyncio.run(\n File \"/opt/conda/lib/python3.10/asyncio/runners.py\", line 44, in run\n return loop.run_until_complete(main)\n File \"/opt/conda/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n self.run_forever()\n File \"/opt/conda/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n self._run_once()\n File \"/opt/conda/lib/python3.10/asyncio/base_events.py\", line 1909, in _run_once\n handle._run()\n File \"/opt/conda/lib/python3.10/asyncio/events.py\", line 80, in _run\n self._context.run(self._callback, *self._args)\n> File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/server.py\", line 222, in serve_inner\n model = get_model(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/__init__.py\", line 420, in get_model\n return FlashLlama(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/flash_llama.py\", line 84, in __init__\n model = FlashLlamaForCausalLM(prefix, config, weights)\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 368, in __init__\n self.model = FlashLlamaModel(prefix, config, weights)\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 292, in __init__\n [\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 293, in <listcomp>\n FlashLlamaLayer(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 232, in __init__\n self.self_attn = FlashLlamaAttention(\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 108, in __init__\n self.query_key_value = load_attention(config, prefix, weights)\n File \"/opt/conda/lib/python3.10/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py\", line 43, in load_attention\n bias = config.attention_bias\n File \"/opt/conda/lib/python3.10/site-packages/transformers/configuration_utils.py\", line 263, in __getattribute__\n return super().__getattribute__(key)\nAttributeError: 'Phi3Config' object has no attribute 'attention_bias'\n"},"target":"text_generation_launcher"}
Information
Tasks
Reproduction
deploy to cluster with:
Expected behavior
It starts
The text was updated successfully, but these errors were encountered: