Skip to content

Commit

Permalink
feat: hubble support env variables (#5023)
Browse files Browse the repository at this point in the history
* feat: add command line arguments --build-env

* fix: add env variable check func

* fix: call the correct method

* refactor: optimize code

* fix: fix regex of env var

* refactor: remove useless reference packages from files

* style: fix overload and cli autocomplete

* refactor: use global import style

* fix: remove useless func and fix the test parameter

* refactor: optimize parameter names of func

* test: improve hubio test

* fix: fix the test of test hubio

* refactor: add docstring of func

* test: optimization test

* test: add test for hubio push api

* test: improve test of hubio

* fix: fix hub pull install requirements need to replace environment variable

* test: improve test of hubio and have remove test token

* style: use single quotes and remove print

* style: improve the imported way of dependency

* feat: support the env variables can consist of number

* refactor: optimization function description and remove unnecessary function calls

* fix: add expand_env_variables in install requirements

* refactor: optimized code and function comments

* docs: add documentation about supporting build env when you use jina hub push

* docs: improve the docs fo hub push

* refactor: optimize command line prompts

* docs: polish documentation for jina hub push

* docs: polish

* docs: add mutli env variables

* docs: polish

Co-authored-by: Jina Dev Bot <dev-bot@jina.ai>
Co-authored-by: Zhaofeng Miao <522856232@qq.com>
  • Loading branch information
3 people committed Aug 9, 2022
1 parent 1240453 commit 6cc4497
Show file tree
Hide file tree
Showing 14 changed files with 441 additions and 20 deletions.
56 changes: 55 additions & 1 deletion docs/fundamentals/executor/hub/push-executor.md
Expand Up @@ -83,4 +83,58 @@ After being pushed for the first time, the protected tags can not be pushed agai

```bash
jina hub push [--public/--private] --force-update <NAME> --secret <SECRET> --protected-tag <PROTECTED_TAG_1> --protected-tag <PROTECTED_TAG_2> <path_to_executor_folder>
```
```
## Use environment variables

Sometimes you might want to use private token in `requirements.txt` to install private dependencies. For security reasons, you don't want to expose this token to anyone else. The `--build-env` parameter could help with this situation. For example, now we have `requirements.txt` like below:

```txt
# requirements.txt
git+http://${YOUR_TOKEN}@github.com/your_private_repo
```

When doing `jina hub push`, you can pass the `--build-env` parameter:

```bash
jina hub push --build-env YOUR_TOKEN=foo
```

````{admonition} Note
:class: note
There are restrictions in terms of naming environment variables:
- `{` and `}` is required when using environment variables in `requirements.txt`. e.g `$YOUR_TOKEN` doesn't work as expected.
- Environment variables are limited to the uppercase letter and numbers and the `_` (underscore), not start with `_`.
````

````{admonition} Limitations
:class: attention
There are limitations if you push Executors via `--build-env` and pull/use it as source code (but doesn't matter if you use docker image):
- When you use `jina hub pull jinahub://YOUR_EXECUTOR`, you must set the corresponding environment variable according to the prompt.
```bash
export YOUR_TOKEN=foo
```
- When you use `.add(uses='jinahub://YOUR_EXECUTOR')` in Flow, you must set the corresponding environment variable also.
For example:
```python
from docarray import Document
from jina import Flow, Executor, requests
import os
os.environ["YOUR_TOKEN"] = 'foo'
f = Flow().add(uses='jinahub://YOUR_EXECUTOR')
with f:
f.post(on='/', inputs=Document(), on_done=print)
```
````

For multiple enviroment variables, we can pass it in this way:

```bash
jina hub push --build-env FIRST=foo --build-env SECOND=bar
```
1 change: 1 addition & 0 deletions jina/hubble/__init__.py
Expand Up @@ -14,3 +14,4 @@ class HubExecutor:
image_name: Optional[str] = None
archive_url: Optional[str] = None
md5sum: Optional[str] = None
build_env: Optional[list] = None
61 changes: 54 additions & 7 deletions jina/hubble/helper.py
Expand Up @@ -22,6 +22,12 @@
from jina.helper import get_request_header as _get_request_header_main
from jina.importer import ImportExtensions
from jina.logging.predefined import default_logger
from jina.hubble.requirements import (
get_env_variables,
check_env_variable,
parse_requirement,
expand_env_variables
)


@lru_cache()
Expand Down Expand Up @@ -484,10 +490,50 @@ def is_requirements_installed(
return isinstance(ex, VersionConflict)
return True

def get_requirements_env_variables(requirements_file: 'Path') -> list:
"""get the env variables in requirements.txt
:param requirements_file: the requirements.txt file
:return: List of env variables in requirements.txt
"""
env_variables = []
with requirements_file.open() as requirements:
for req in requirements:
req = req.strip()
if (not req) or req.startswith('#'):
continue
else:
variables = get_env_variables(req)
env_variables.extend(variables)

def _get_install_options(requirements_file: 'Path', excludes: Tuple[str] = ('jina',)):
from .requirements import parse_requirement
return env_variables

def check_requirements_env_variable(env_variable: str) -> bool:
"""
check the environment variables is limited
to uppercase letter and number and the `_` (underscore).
:param env_variable: env_variable in the requirements.txt file
:return: True or False if not satisfied
"""
return check_env_variable(env_variable)

def replace_requirements_env_variables(requirements_file: 'Path') -> list:
"""replace the environment variables in requirements.txt
:param requirements_file: the requirements.txt file
:return: List of replaced env variables in requirements.txt
"""
env_variables = []
with requirements_file.open('r') as requirements:
for line in requirements.readlines():
line = line.strip()
if (not line) or line.startswith('#'):
continue
else:
line = expand_env_variables(line)
env_variables.append(line)
return env_variables


def _get_install_options(requirements_file: 'Path', excludes: Tuple[str] = ('jina',)):
with requirements_file.open() as requirements:
install_options = []
install_reqs = []
Expand All @@ -496,13 +542,14 @@ def _get_install_options(requirements_file: 'Path', excludes: Tuple[str] = ('jin
if (not req) or req.startswith('#'):
continue
elif req.startswith('-'):
install_options.extend(req.split(' '))
for index, item in enumerate(req.split(' ')):
install_options.append(expand_env_variables(item))
else:
req_spec = parse_requirement(req)
expand_req = expand_env_variables(req)
req_spec = parse_requirement(expand_req)

if req_spec.project_name not in excludes or len(req_spec.extras) > 0:
install_reqs.append(req)

install_reqs.append(expand_req)
return install_reqs, install_options


Expand Down Expand Up @@ -628,4 +675,4 @@ def get_hubble_error_message(hubble_structured_error: dict) -> Tuple[str, str]:
https://github.com/jina-ai/executor-normalizer
'''

return (msg, original_msg)
return (msg, original_msg)
73 changes: 72 additions & 1 deletion jina/hubble/hubio.py
Expand Up @@ -23,6 +23,8 @@
get_request_header,
parse_hub_uri,
upload_file,
get_requirements_env_variables,
check_requirements_env_variable
)
from jina.hubble.hubapi import (
dump_secret,
Expand Down Expand Up @@ -345,6 +347,44 @@ def push(self) -> None:
)

dockerfile = dockerfile.relative_to(work_path)

build_env = None
if self.args.build_env:
build_envs = self.args.build_env.strip().split()
build_env_dict = {}
for index, env in enumerate(build_envs):
env_list = env.split('=')
if (len(env_list) != 2):
raise Exception( f'The --build-env parameter: `{env}` is wrong format. you can use: `--build-env {env}=YOUR_VALUE`.')
if check_requirements_env_variable(env_list[0]) is False:
raise Exception( f'The --build-env parameter key:`{env_list[0]}` can only consist of uppercase letter and number and underline.')
build_env_dict[env_list[0]] = env_list[1]
build_env = build_env_dict if len(list(build_env_dict.keys()))>0 else None

requirements_file = work_path / 'requirements.txt'

requirements_env_variables = []
if requirements_file.exists():
requirements_env_variables = get_requirements_env_variables(requirements_file)
for index, env in enumerate(requirements_env_variables):
if check_requirements_env_variable(env) is False:
raise Exception( f'The requirements.txt environment variables:`${env}` can only consist of uppercase letter and number and underline.')

if len(requirements_env_variables) and not build_env:
env_variables_str = ','.join(requirements_env_variables);
error_str= f'The requirements.txt set environment variables as follows:`{env_variables_str}` should use `--build-env';
for item in requirements_env_variables:
error_str+= f' {item}=YOUR_VALUE'
raise Exception(f'{error_str}` to add it.')
elif len(requirements_env_variables) and build_env:
build_env_keys = list(build_env.keys())
diff_env_variables = list(set(requirements_env_variables).difference(set(build_env_keys)))
if len(diff_env_variables):
diff_env_variables_str = ",".join(diff_env_variables)
error_str= f'The requirements.txt set environment variables as follows:`{diff_env_variables_str}` should use `--build-env';
for item in diff_env_variables:
error_str+= f' {item}=YOUR_VALUE'
raise Exception(f'{error_str}` to add it.')

console = get_rich_console()
with console.status(f'Pushing `{self.args.path}` ...') as st:
Expand Down Expand Up @@ -381,6 +421,9 @@ def push(self) -> None:
if dockerfile:
form_data['dockerfile'] = str(dockerfile)

if build_env:
form_data['buildEnv'] = json.dumps(build_env)

uuid8, secret = load_secret(work_path)
if self.args.force_update or uuid8:
form_data['id'] = self.args.force_update or uuid8
Expand Down Expand Up @@ -563,6 +606,27 @@ def _get_prettyprint_usage(self, console, executor_name, usage_kind=None):

console.print(Panel(param_str, title='Usage', expand=False, width=100))


def _prettyprint_build_env_usage(self, console, build_env, usage_kind=None):
from rich import box
from rich.panel import Panel
from rich.syntax import Syntax
from rich.table import Table

param_str = Table(
box=box.SIMPLE,
)
param_str.add_column('Environment variable')
param_str.add_column('Your value')

for index, item in enumerate(build_env):
param_str.add_row(
f'{item}',
'your value'
)

console.print(Panel(param_str, title='build_env', subtitle='You have to set the above environment variables', expand=False, width=100))

@staticmethod
@disk_cache_offline(cache_file=str(get_cache_db()))
def fetch_meta(
Expand Down Expand Up @@ -624,7 +688,7 @@ def _send_request_with_retry(url, **kwargs):
f'tag: {tag}, commit: {resp.get("commit", {}).get("id")}, '
f'session_id: {req_header.get("jinameta-session-id")}'
)

buildEnv = resp['commit'].get('commitParams', {}).get('buildEnv', None)
return HubExecutor(
uuid=resp['id'],
name=resp.get('name', None),
Expand All @@ -634,6 +698,7 @@ def _send_request_with_retry(url, **kwargs):
image_name=image_name,
archive_url=resp['package']['download'],
md5sum=resp['package']['md5'],
build_env=buildEnv.keys() if buildEnv else []
)

@staticmethod
Expand Down Expand Up @@ -771,6 +836,7 @@ def pull(self) -> str:
cached_zip_file = None
executor_name = None
usage_kind = None
build_env = None

try:
need_pull = self.args.force_update
Expand All @@ -787,6 +853,8 @@ def pull(self) -> str:
force=need_pull,
)

build_env = executor.build_env

presented_id = executor.name if executor.name else executor.uuid
executor_name = (
f'{presented_id}'
Expand Down Expand Up @@ -818,6 +886,9 @@ def pull(self) -> str:
elif scheme == 'jinahub':
import filelock

if build_env:
self._prettyprint_build_env_usage(console,build_env)

with filelock.FileLock(get_lockfile(), timeout=-1):
try:
pkg_path, pkg_dist_path = get_dist_path_of_executor(
Expand Down
52 changes: 49 additions & 3 deletions jina/hubble/requirements.py
Expand Up @@ -2,8 +2,7 @@

import os
import re
from typing import Dict, Tuple, cast

from typing import Dict, Tuple, cast, List
from pkg_resources import Requirement

# Adopted from requirements-parser:
Expand Down Expand Up @@ -47,6 +46,9 @@
rf'^(?P<scheme>{VCS_SCHEMES_REGEX})://((?P<login>[^/@]+)@)?'
r'(?P<path>[^#@]+)(@(?P<revision>[^#]+))?(#(?P<fragment>\S+))?'
)
ENV_VAR_RE = re.compile(r"(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})")

ENV_VAR_RE_ONLY_MATCH_UPPERCASE_UNDERLINE = re.compile(r"^[A-Z0-9_]+$");


extras_require_search = re.compile(r'(?P<name>.+)\[(?P<extras>[^\]]+)\]')
Expand Down Expand Up @@ -75,7 +77,6 @@ def parse_requirement(line: str) -> 'Requirement':
:param line: a line of a requirement file
:returns: a Requirement instance for the given line
"""

vcs_match = VCS_REGEX.match(line)
uri_match = URI_REGEX.match(line)

Expand All @@ -99,3 +100,48 @@ def parse_requirement(line: str) -> 'Requirement':
line = f'{egg or name} @ {line}'

return Requirement.parse(line)


def get_env_variables(line: str) -> List:
"""
search the environment variable only match uppercase letter and number and the `_` (underscore).
:param line: a line of a requirement file
:return: a List of components
"""
env_variables = [];
for env_var, var_name in ENV_VAR_RE.findall(line):
env_variables.append(var_name)
env_variables = list(set(env_variables));
return env_variables


def check_env_variable(env_variable: str) -> bool:
"""
check the environment variables is limited
to uppercase letter and number and the `_` (underscore).
:param env_variable: env_variable in the requirements.txt file
:return: True or False if not satisfied
"""
return True if ENV_VAR_RE_ONLY_MATCH_UPPERCASE_UNDERLINE.match(env_variable) is not None else False


def expand_env_variables(line: str) -> str:
"""
Replace all environment variables that can be retrieved via `os.getenv`.
The only allowed format for environment variables defined in the
requirement file is `${MY_VARIABLE_1}` to ensure two things:
1. Strings that contain a `$` aren't accidentally (partially) expanded.
2. Ensure consistency across platforms for requirement files.
Valid characters in variable names follow the `POSIX standard
<http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
to uppercase letter and number and the `_` (underscore).
Replace environment variables in requirement if it's defined.
:param line: a line of a requirement file
:return: line
"""
for env_var, var_name in ENV_VAR_RE.findall(line):
value = os.getenv(var_name)
if not value:
raise Exception(f'The given requirements.txt require environment variables `{var_name}` does not exist!')
line = line.replace(env_var, value)
return line
5 changes: 5 additions & 0 deletions jina/parsers/hubble/push.py
Expand Up @@ -61,6 +61,11 @@ def dir_path(string):
type=str,
help='If set, push will overwrite the Executor on the Hub that shares the same NAME or UUID8 identifier',
)
gp.add_argument(
'--build-env',
type=str,
help='A list of environment variables. It will be used in project build phase.',
)
gp.add_argument(
'--secret',
type=str,
Expand Down
1 change: 1 addition & 0 deletions jina_cli/autocomplete.py
Expand Up @@ -168,6 +168,7 @@
'--protected-tag',
'--force-update',
'--force',
'--build-env',
'--secret',
'--no-cache',
'--public',
Expand Down

0 comments on commit 6cc4497

Please sign in to comment.