Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error installing and running Falcon Models #83

Closed
pedrognsmartins opened this issue Jun 27, 2023 · 5 comments
Closed

Error installing and running Falcon Models #83

pedrognsmartins opened this issue Jun 27, 2023 · 5 comments

Comments

@pedrognsmartins
Copy link

Dear community,

When trying to install Falcon model and running I´m getting the following error:

┌───────────────────── Traceback (most recent call last) ─────────────────────┐
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:1395 │
│ in download_models │
│ │
│ 1392 │ ).for_model(model_name, model_id=model_id, llm_config=config) │
│ 1393 │ │
│ 1394 │ try: │
│ > 1395 │ │ ref = bentoml.transformers.get(model.tag) │
│ 1396 │ │ if machine: │
│ 1397 │ │ │ # NOTE: When debug is enabled, │
│ 1398 │ │ │ # We will prefix the tag with tag and we can use reg │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml_internal\f │
│ rameworks\transformers.py:292 in get │
│ │
│ 289 │ # target model must be from the BentoML model store │
│ 290 │ model = bentoml.transformers.get("my_pipeline:latest") │
│ 291 │ """ │
│ > 292 │ model = bentoml.models.get(tag_like) │
│ 293 │ if model.info.module not in (MODULE_NAME, name): │
│ 294 │ │ raise NotFound( │
│ 295 │ │ │ f"Model {model.tag} was saved with module {model.info.mod │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\simple_di_init
. │
│ py:139 in _ │
│ │
│ 136 │ │ bind = sig.bind_partial(filtered_args, **filtered_kwargs) │
│ 137 │ │ bind.apply_defaults() │
│ 138 │ │ │
│ > 139 │ │ return func(
_inject_args(bind.args), **inject_kwargs(bind.k │
│ 140 │ │
│ 141 │ setattr(
, "_is_injected", True) │
│ 142 │ return cast(WrappedCallable, _) │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml\models.py:4 │
│ 2 in get │
│ │
│ 39 │ , │
│ 40 │ _model_store: "ModelStore" = Provide[BentoMLContainer.model_store │
│ 41 ) -> "Model": │
│ > 42 │ return _model_store.get(tag) │
│ 43 │
│ 44 │
│ 45 @Inject
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml_internal\s │
│ tore.py:146 in get │
│ │
│ 143 │ │ matches = self._fs.glob(f"{path}
/") │
│ 144 │ │ counts = matches.count().directories │
│ 145 │ │ if counts == 0: │
│ > 146 │ │ │ raise NotFound( │
│ 147 │ │ │ │ f"{self._item_type.get_typename()} '{tag}' is not fou │
│ 148 │ │ │ ) │
│ 149 │ │ elif counts == 1: │
└─────────────────────────────────────────────────────────────────────────────┘
NotFound: Model 'pt-tiiuae-falcon-7b:2f5c3cd4eace6be6c0f12981f377fb35e5bf6ee5'
is not found in BentoML store <osfs 'C:\Users\pedro\bentoml\models'>

During handling of the above exception, another exception occurred:

┌───────────────────── Traceback (most recent call last) ─────────────────────┐
│ in _run_module_as_main:198 │
│ in run_code:88 │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm_main
.py │
│ :26 in │
│ │
│ 23 if name == "main": │
│ 24 │ from openllm.cli import cli │
│ 25 │ │
│ > 26 │ cli() │
│ 27 │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1130 │
│ in call
│ │
│ 1127 │ │
│ 1128 │ def call(self, *args: t.Any, **kwargs: t.Any) -> t.Any: │
│ 1129 │ │ """Alias for :meth:main.""" │
│ > 1130 │ │ return self.main(*args, **kwargs) │
│ 1131 │
│ 1132 │
│ 1133 class Command(BaseCommand): │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1055 │
│ in main │
│ │
│ 1052 │ │ try: │
│ 1053 │ │ │ try: │
│ 1054 │ │ │ │ with self.make_context(prog_name, args, **extra) as │
│ > 1055 │ │ │ │ │ rv = self.invoke(ctx) │
│ 1056 │ │ │ │ │ if not standalone_mode: │
│ 1057 │ │ │ │ │ │ return rv │
│ 1058 │ │ │ │ │ # it's not safe to ctx.exit(rv) here! │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1657 │
│ in invoke │
│ │
│ 1654 │ │ │ │ super().invoke(ctx) │
│ 1655 │ │ │ │ sub_ctx = cmd.make_context(cmd_name, args, parent=ct │
│ 1656 │ │ │ │ with sub_ctx: │
│ > 1657 │ │ │ │ │ return _process_result(sub_ctx.command.invoke(su │
│ 1658 │ │ │
│ 1659 │ │ # In chain mode we create the contexts step by step, but aft │
│ 1660 │ │ # base command has been invoked. Because at that point we d │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1404 │
│ in invoke │
│ │
│ 1401 │ │ │ echo(style(message, fg="red"), err=True) │
│ 1402 │ │ │
│ 1403 │ │ if self.callback is not None: │
│ > 1404 │ │ │ return ctx.invoke(self.callback, **ctx.params) │
│ 1405 │ │
│ 1406 │ def shell_complete(self, ctx: Context, incomplete: str) -> t.Lis │
│ 1407 │ │ """Return a list of completions for the incomplete value. Lo │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:760 │
│ in invoke │
│ │
│ 757 │ │ │
│ 758 │ │ with augment_usage_errors(__self): │
│ 759 │ │ │ with ctx: │
│ > 760 │ │ │ │ return __callback(*args, **kwargs) │
│ 761 │ │
│ 762 │ def forward( │
│ 763 │ │ __self, __cmd: "Command", *args: t.Any, **kwargs: t.Any # n │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:380 │
│ in wrapper │
│ │
│ 377 │ │ @functools.wraps(func) │
│ 378 │ │ def wrapper(*args: P.args, **attrs: P.kwargs) -> t.Any: │
│ 379 │ │ │ try: │
│ > 380 │ │ │ │ return func(*args, **attrs) │
│ 381 │ │ │ except OpenLLMException as err: │
│ 382 │ │ │ │ raise click.ClickException( │
│ 383 │ │ │ │ │ click.style(f"[{group.name}] '{command_name}' fa │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:353 │
│ in wrapper │
│ │
│ 350 │ │ │ │ assert group.name is not None, "group.name should no │
│ 351 │ │ │ │ event = analytics.OpenllmCliEvent(cmd_group=group.na │
│ 352 │ │ │ │ try: │
│ > 353 │ │ │ │ │ return_value = func(*args, **attrs) │
│ 354 │ │ │ │ │ duration_in_ms = (time.time_ns() - start_time) / │
│ 355 │ │ │ │ │ event.duration_in_ms = duration_in_ms │
│ 356 │ │ │ │ │ analytics.track(event) │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:328 │
│ in wrapper │
│ │
│ 325 │ │ │ │
│ 326 │ │ │ configure_logging() │
│ 327 │ │ │ │
│ > 328 │ │ │ return f(*args, **attrs) │
│ 329 │ │ │
│ 330 │ │ return t.cast("ClickFunctionWrapper[..., t.Any]", wrapper) │
│ 331 │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:1422 │
│ in download_models │
│ │
│ 1419 │ │ │ ) │
│ 1420 │ │ │
│ 1421 │ │ (model_args, model_attrs), tokenizer_attrs = model.llm_param │
│ > 1422 │ │ ref = model.import_model( │
│ 1423 │ │ │ model.model_id, │
│ 1424 │ │ │ model.tag, │
│ 1425 │ │ │ *model_args, │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\models\falc │
│ on\modeling_falcon.py:56 in import_model │
│ │
│ 53 │ │ device_map = attrs.pop("device_map", "auto") │
│ 54 │ │ │
│ 55 │ │ tokenizer = transformers.AutoTokenizer.from_pretrained(model

│ > 56 │ │ model = transformers.AutoModelForCausalLM.from_pretrained( │
│ 57 │ │ │ model_id, │
│ 58 │ │ │ trust_remote_code=trust_remote_code, │
│ 59 │ │ │ torch_dtype=torch_dtype, │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\models │
│ \auto\auto_factory.py:479 in from_pretrained │
│ │
│ 476 │ │ │ │ class_ref, pretrained_model_name_or_path, **hub_kwarg │
│ 477 │ │ │ ) │
│ 478 │ │ │ _ = hub_kwargs.pop("code_revision", None) │
│ > 479 │ │ │ return model_class.from_pretrained( │
│ 480 │ │ │ │ pretrained_model_name_or_path, *model_args, config=co │
│ 481 │ │ │ ) │
│ 482 │ │ elif type(config) in cls._model_mapping.keys(): │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\modeli │
│ ng_utils.py:2881 in from_pretrained │
│ │
│ 2878 │ │ │ │ mismatched_keys, │
│ 2879 │ │ │ │ offload_index, │
│ 2880 │ │ │ │ error_msgs, │
│ > 2881 │ │ │ ) = cls._load_pretrained_model( │
│ 2882 │ │ │ │ model, │
│ 2883 │ │ │ │ state_dict, │
│ 2884 │ │ │ │ loaded_state_dict_keys, # XXX: rename? │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\modeli │
│ ng_utils.py:2980 in _load_pretrained_model │
│ │
│ 2977 │ │ │ ) │
│ 2978 │ │ │ is_safetensors = archive_file.endswith(".safetensors") │
│ 2979 │ │ │ if offload_folder is None and not is_safetensors: │
│ > 2980 │ │ │ │ raise ValueError( │
│ 2981 │ │ │ │ │ "The current device_map had weights offloaded │
│ 2982 │ │ │ │ │ " for them. Alternatively, make sure you have s │ │ 2983 │ │ │ │ │ " offers the weights in this format." │ └─────────────────────────────────────────────────────────────────────────────┘ ValueError: The current device_maphad weights offloaded to the disk. Please provide anoffload_folderfor them. Alternatively, make sure you havesafetensors` installed if the model you are using offers the weights in this
format.
Traceback (most recent call last):
File "", line 198, in _run_module_as_main
File "", line 88, in run_code
File "C:\Users\pedro\anaconda3\envs\powerai\Scripts\openllm.exe_main
.py", line 7, in
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1130, in call
return self.main(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1055, in main
rv = self.invoke(ctx)
^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py", line 760, in invoke
return __callback(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 380, in wrapper
return func(*args, **attrs)
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 353, in wrapper
return_value = func(*args, **attrs)
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 328, in wrapper
return f(*args, **attrs)
^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py", line 797, in model_start
llm = t.cast(
^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\models\auto\factory.py", line 135, in for_model
llm.ensure_model_id_exists()
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm_llm.py", line 900, in ensure_model_id_exists
output = subprocess.check_output(
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\subprocess.py", line 466, in check_output
return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pedro\anaconda3\envs\powerai\Lib\subprocess.py", line 571, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['C:\Users\pedro\anaconda3\envs\powerai\python.exe', '-m', 'openllm', 'download', 'falcon', '--model-id', 'tiiuae/falcon-7b', '--machine', '--implementation', 'pt']' returned non-zero exit status 1.

Can someone help me with the topic ?

Thank you.

@kenleejr
Copy link

I got the same issue trying to use falcon with openllm==0.1.17

@aarnphm
Copy link
Member

aarnphm commented Jun 28, 2023

seems like your machine doesn't have enough resource, hence they are offloading it to disk. I will need more bandwidth to investigate how to run falcon on smaller machine

@cmazzoni87
Copy link

cmazzoni87 commented Jul 18, 2023

I have the same issue with a massive resource server:

CalledProcessError Traceback (most recent call last)
in
3 import os
4
----> 5 llm = OpenLLM(model_name='falcon', model_id='tiiuae/falcon-40b-instruct', temperature=0.0)
6
7 llm("What is the difference between a duck and a goose? And why there are so many Goose in Canada?")

~/.local/lib/python3.8/site-packages/langchain/llms/openllm.py in init(self, model_name, model_id, server_url, server_type, embedded, **llm_kwargs)
168 # in-process. Wrt to BentoML users, setting embedded=False is the expected
169 # behaviour to invoke the runners remotely
--> 170 runner = openllm.Runner(
171 model_name=model_name,
172 model_id=model_id,

~/.local/lib/python3.8/site-packages/openllm/_llm.py in Runner(model_name, ensure_available, init_local, implementation, **attrs)
1404 behaviour
1405 """
-> 1406 runner = t.cast(
1407 "_BaseAutoLLMClass",
1408 openllm[implementation if implementation is not None else EnvVarMixin(model_name)["framework_value"]], # type: ignore (internal API)

~/.local/lib/python3.8/site-packages/openllm/models/auto/factory.py in create_runner(cls, model_name, model_id, **attrs)
155 A LLM instance.
156 """
--> 157 llm, runner_attrs = cls.for_model(model_name, model_id, return_runner_kwargs=True, **attrs)
158 return llm.to_runner(**runner_attrs)
159

~/.local/lib/python3.8/site-packages/openllm/models/auto/factory.py in for_model(cls, model_name, model_id, return_runner_kwargs, llm_config, ensure_available, **attrs)
133 llm.model_id,
134 )
--> 135 llm.ensure_model_id_exists()
136 if not return_runner_kwargs:
137 return llm

~/.local/lib/python3.8/site-packages/openllm/_llm.py in ensure_model_id_exists(self)
898 Auto LLM initialisation.
899 """
--> 900 output = subprocess.check_output(
901 [
902 sys.executable,

/usr/lib/python3.8/subprocess.py in check_output(timeout, *popenargs, **kwargs)
413 kwargs['input'] = empty
414
--> 415 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
416 **kwargs).stdout
417

/usr/lib/python3.8/subprocess.py in run(input, capture_output, timeout, check, *popenargs, **kwargs)
514 retcode = process.poll()
515 if check and retcode:
--> 516 raise CalledProcessError(retcode, process.args,
517 output=stdout, stderr=stderr)
518 return CompletedProcess(process.args, retcode, stdout, stderr)

CalledProcessError: Command '['/usr/bin/python3', '-m', 'openllm', 'download', 'falcon', '--model-id', 'tiiuae/falcon-40b-instruct', '--machine', '--implementation', 'pt']' returned non-zero exit status 1.

@cmazzoni87
Copy link

Same issue as the one on ticket: #121

@aarnphm aarnphm closed this as completed Sep 5, 2023
@aarnphm
Copy link
Member

aarnphm commented Sep 5, 2023

Please reopen if you still see this error on 0.3.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants