From 55aa57e64d9823bc830630584def1918f1606738 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 2 Dec 2021 21:12:54 +0100 Subject: [PATCH] Enable local Breeze script and `pipx` to be used for breeze bootstrap --- .github/workflows/ci.yml | 2 +- .gitignore | 3 + .pre-commit-config.yaml | 2 +- Breeze2 | 46 +++- .../0003-bootstraping-virtual-environment.md | 207 ++++++++++++++++++ dev/breeze/src/airflow_breeze/breeze.py | 46 +++- .../src/airflow_breeze/visuals/__init__.py | 2 +- .../tests/test_find_airflow_directory.py | 51 +++++ 8 files changed, 342 insertions(+), 17 deletions(-) create mode 100644 dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md create mode 100644 dev/breeze/tests/test_find_airflow_directory.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e7f4c97c16333..fa42cd4469bb0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -254,7 +254,7 @@ jobs: with: python-version: '3.7' cache: 'pip' - - run: pip install . + - run: pip install -e . - run: python3 -m pytest -n auto --color=yes tests-ui: diff --git a/.gitignore b/.gitignore index 5f37105e8a1ba..f6a605af3c5ef 100644 --- a/.gitignore +++ b/.gitignore @@ -220,3 +220,6 @@ pip-wheel-metadata # Generated UI licenses licenses/LICENSES-ui.txt + +# Packaged Breeze2 on Windows +/Breeze2.exe diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ca983ccb8b966..b310fd6644e3e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -408,7 +408,7 @@ repos: language: pygrep name: Only capitalized Breeze used in Breeze2. description: Please use capitalized "Breeze" in the new Breeze docs - entry: "breeze" + entry: ([\W\s\n\t\r]|^)breeze([\W\s\n\t\r]|$) pass_filenames: true files: ^dev/breeze/doc - id: base-operator diff --git a/Breeze2 b/Breeze2 index 60cf940468b38..9591f6ba7d748 100755 --- a/Breeze2 +++ b/Breeze2 @@ -4,6 +4,8 @@ import os import sys # Python <3.4 does not have pathlib +from venv import EnvBuilder + if sys.version_info.major != 3 or sys.version_info.minor < 7: print("ERROR! Make sure you use Python 3.7+ !!") sys.exit(1) @@ -12,13 +14,19 @@ import subprocess from os import execv from pathlib import Path -AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve() +if getattr(sys, 'frozen', False): + # If the application is run as a bundle, the PyInstaller bootloader + # extends the sys module by a flag frozen=True and sets the temporary app + # path into variable _MEIPASS' and sys.executable is Breeze's executable path. + AIRFLOW_SOURCES_DIR = Path(sys.executable).parent.resolve() +else: + AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve() BUILD_DIR = AIRFLOW_SOURCES_DIR / ".build" BUILD_BREEZE_DIR = BUILD_DIR / "breeze2" BUILD_BREEZE_CFG_SAVED = BUILD_BREEZE_DIR / "setup.cfg.saved" BUILD_BREEZE_VENV_DIR = BUILD_BREEZE_DIR / "venv" -BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / "bin" -BUILD_BREEZE_VENV_PIP = BUILD_BREEZE_VENV_BIN_DIR / "pip" +BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / ("Scripts" if os.name == 'nt' else "bin") +BUILD_BREEZE_VENV_PYTHON = BUILD_BREEZE_VENV_BIN_DIR / "python" BUILD_BREEZE_VENV_BREEZE = BUILD_BREEZE_VENV_BIN_DIR / "Breeze2" BREEZE_SOURCE_PATH = AIRFLOW_SOURCES_DIR / "dev" / "breeze" @@ -41,15 +49,35 @@ def save_config(): if needs_installation(): print(f"(Re)Installing Breeze's virtualenv in {BUILD_BREEZE_VENV_DIR}") - BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True) - subprocess.run([sys.executable, "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True) - subprocess.run( - [f"{BUILD_BREEZE_VENV_PIP}", "install", "--upgrade", "-e", "."], cwd=BREEZE_SOURCE_PATH, check=True - ) + try: + EnvBuilder(system_site_packages=False, upgrade=True, with_pip=True, prompt="breeze").create( + str(BUILD_BREEZE_VENV_DIR) + ) + except Exception as e: + # in some cases (mis-configured python) the venv creation might not work via API + # (ensurepip missing). This is the case in case of default MacOS Python and Python executable + # Bundled in Windows executable, In this case we fallback to running venv as a tool using default + # Python3 found on path (in case of Windows Bundled exe, you don't even have a current + # interpreted executable available, because Python interpreter is executed through a library. + # and sys.executable points to the Bundled exe file. + BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True) + subprocess.run(["python3", "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True) + if os.name == 'nt': + subprocess.run( + [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."], + cwd=BREEZE_SOURCE_PATH, + check=True, + ) + else: + subprocess.run( + [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."], + cwd=BREEZE_SOURCE_PATH, + check=True, + ) save_config() if os.name == 'nt': # This is the best way of running it on Windows, though it leaves the original process hanging around - subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}.exe"] + sys.argv[1:], check=True) + subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:], check=True) else: execv(f"{BUILD_BREEZE_VENV_BREEZE}", [f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:]) diff --git a/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md b/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md new file mode 100644 index 0000000000000..64717a7ce0b6a --- /dev/null +++ b/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md @@ -0,0 +1,207 @@ + + + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [3. Bootstraping the virtual environment](#3-bootstraping-the-virtual-environment) + - [Status](#status) + - [Context](#context) + - [Decision](#decision) + - [Alternatives considered](#alternatives-considered) + - [Consequences](#consequences) + + + +# 3. Bootstraping the virtual environment + +Date: 2021-12-06 + +## Status + +Draft + +## Context + +Since Breeze is written in Python, it needs to be run in its own virtual environment. +This virtual environment is different from Airflow virtualenv as it contains only a +small set of tools (for example rich) that are not present in the standard Python +library. We want to keep the virtualenv separated, because setting up Airflow +virtualenv is hard (especially if you consider cross-platform use). The virtualenv +is needed mainly to run the script that will actually manage airflow installation +and dependencies, in the form of Docker images which are part of Breeze. + +This virtualenv needs to be easy to setup and it should support the "live" nature +of Breeze. The idea is that the user of Breeze does not have to do any action +to update to the latest version of the virtualenv, when new dependencies are +added, also when new Breeze functionalities are added, they should be automatically +available for the user after the repository is updated to latest version. + +User should not have to think about installing and upgrading Breeze separately from +switching to different Airflow tag or branch - moreover, the Breeze environment +should automatically adapt to the version and Branch the user checked out. By its +nature Airflow Breeze (at least for quite a while) will be evolving together with +Airflow and it will live in the same repository and new features and behaviours +will be added continuously. + +The workflow that needs to be supported should tap into the regular workflow +of the user who is developing Airflow. + +* git checkout branch + +./Breeze should use the version of Breeze that is available in this version + +* git rebase --onto apache/main + +./Breeze should be automatically updated to the latest version available +in main (including dependencies) + +Also if someone develops Breeze itself, the experience should be seamlessly +integrated - modification of Breeze code locally should be automatically +reflected in the Breeze environment of the user who is modifying Breeze. + +The user should not have to re-install/update Breeze to automatically use +the modifying Breeze source code when running Breeze commands and testing +then with Airflow. + +Breeze is also used as part of CI - common Python functions and libraries +are used across both Breeze development environment and Continuous +Integration we run. It's been established practice of the CI is that the logic +of the CI is stored in the same repository as the source code of the +application it tests and part of the Breeze functions are shared with CI. + +In the future when Breeze2 stabilizes and it's update cadence will be +much slower (which is likele as it happened with the Breeze predecessor) +there could be an option that Breeze is installed as separate package and +same released Breeze version could be ued to manage multiple Airflow +versions, for that we might want to release Breeze as a separate package +in PyPI. However since there is the CI integration, the source code +version of Breeze will remain as part of the Airflow's source code. + + +## Decision + +The decision is to implement Breeze in a subfolder (`dev/breeze2/`) of +Apache Airflow as a Python project following the standard setuptools +enabled project. The project contains setup.py and dependencies described +in setup.cfg and contains both source code and tests for Breeze code. + +The sub-project could be used in the future to produce a PyPI package +(we reserved such package in PyPI), however its main purpose is +to install Breeze in a separate virtualenv bootstrapped +automatically in editable mode. + +There are two ways you will be able to install `Breeze2` - locally in +repository using ./Breeze2 bootstrapping script and using `pipx`. + +The bootstrapping Python script (`Breeze2` in the main repository +of Airflow) performs the following tasks: + +* when run for the first time it creates `.build/breeze2/venv` virtual + environment (Python3.6+ based) - with locally installed `dev` + project in editable mode (`pip install -e .`) - this makes sure + that the users of Breeze will use the latest version of Breeze + available in their version of the repository +* when run subsequently, it will check if setup files changed for + Breeze (dependencies changed) and if they did it will automatically + reinstall the environment, adding missing dependencies +* after managing the venv, the Breeze2 script will simply execute + the actual Breeze2 script in the `.build/venv` passing the + parameters to the script. For the user, the effect will be same + as activating the virtualenv and executing the ./Breeze2 from + there (but it will happen automatically and invisibly for the + user +* In Windows environment where you have no easy/popular mechanism + of running scripts with shebang (#!) equivalent in Posix + environments, Users will be able to locally build (using + `pyinstaller` a `Breeze2.exe` frozen Python script that will + essentially do the same, they could also use `python Breeze2` + command or switch to Git Bash to utilize the shebang feature + (Git Bash comes together with Git when installed on Windows) +* The second option is to use `pipx` to install Breeze2. + The `pipx` is almost equivalent to what the Bootstrapping does + and many users might actually choose to install Breeze this + way - and we will add it as an option to install Breeze + with pipx `pipx install -e ` provides the right + installation instruction. The installation can be updated + by `pipx install --force -e `. + The benefit of using `pipx` is that Breeze becomes + available on the path when you install it this way, also + it provides out-of-the box Windows support. The drawback is + that when new dependencies are added, they will not be + automatically installed and that you need to manually force + re-installation if new dependencies are used - which is not + as seamlessly integrate in the regular development + environment, and it might create some confusions for the + users who would have to learn `pipx` and it's commands. + Another drawback of `pipx` is that installs one global + version of Breeze2 for all projects, where it is quite + possible that someone has two different versions of + Airflow repository checked out and the bootstraping + script provides this capability. + +The bootstrapping script is temporary measure, until the +dependencies of Breeze stabilize enough that the need +to recreate the virtual environment by `pipx` will be +very infrequent. In this case `pipx` provides better +user experience, and we might decide even to remove the +bootstrapping script and switch fully to `pipx` + +## Alternatives considered + +The alternatives considered were: + +* `nox` - this is a tool to manage virtualenv for testing, while + it has some built in virtualenv capabilities, it is an + additional tool that needs to be installed and it lacks + the automation of checking and recreation of the virtualenv + when needed (you need to manually run nox to update environment) + Alsoi it is targeted for building multiple virtualenv + for tests - it has nice pytest integration for example, but it + lacks support for managing editable installs for a long time. + +* `pyenv` - this is the de-facto standard for maintenance of + virtualenvs. it has the capability of creation and switching + between virtualenvs easily. Together with some of its plugins + (pyenv-virtualenv and auto-activation) it could serve the + purpose quite well. However the problem is that if you + also use `pyenv` to manage your `airflow` virtualenv this might + be source of confusion. Should I activate airflow virtualenv + or Breeze2 venv to run tests? Part of Breeze experience is + to activate local Airflow virtualenv for IDE integration and + since this is different than simple Breeze virtualenv, using + pytest and autoactivation in this case might lead to a lot + of confusion. Keeping the Breeze virtualenv "hidden" and + mostly "used" but not deliberately activated is a better + choice - especially that most users will simply "use" Breeze2 + as an app rather than activate the environment deliberately. + Also choosing `pyenv` and it's virtualenv plugin would + add extra, unnecessary steps and prerequisites for Breeze. + + +## Consequences + +Using Breeze for new users will be much simpler, without +having to install any prerequisites. The virtualenv used by +Breeze2 will be hidden from the user, and used behind the +scenes - and the dependencies used will be automatically +installed when needed. This will allow to seamlessly +integrate Breeze tool in the develiopment experience without +having to worry about extra maintenance needed. diff --git a/dev/breeze/src/airflow_breeze/breeze.py b/dev/breeze/src/airflow_breeze/breeze.py index 9969de7ead351..c854d323924c8 100755 --- a/dev/breeze/src/airflow_breeze/breeze.py +++ b/dev/breeze/src/airflow_breeze/breeze.py @@ -15,6 +15,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import os +from pathlib import Path +from typing import Optional import click from click import ClickException @@ -25,15 +28,47 @@ NAME = "Breeze2" VERSION = "0.0.1" +__AIRFLOW_SOURCES_ROOT = Path.cwd() -@click.group() -def main(): - pass - +__AIRFLOW_CFG_FILE = "setup.cfg" console = Console(force_terminal=True, color_system="standard", width=180) +def get_airflow_sources_root(): + return __AIRFLOW_SOURCES_ROOT + + +def search_upwards_for_airflow_sources_root(start_from: Path) -> Optional[Path]: + root = Path(start_from.root) + d = start_from + while d != root: + attempt = d / __AIRFLOW_CFG_FILE + if attempt.exists() and "name = apache-airflow\n" in attempt.read_text(): + return attempt.parent + d = d.parent + return None + + +def find_airflow_sources_root(): + # Try to find airflow sources in current working dir + airflow_sources_root = search_upwards_for_airflow_sources_root(Path.cwd()) + if not airflow_sources_root: + # Or if it fails, find it in parents of the directory where the ./breeze.py is. + airflow_sources_root = search_upwards_for_airflow_sources_root(Path(__file__).resolve().parent) + global __AIRFLOW_SOURCES_ROOT + if airflow_sources_root: + __AIRFLOW_SOURCES_ROOT = airflow_sources_root + else: + console.print(f"\n[yellow]Could not find Airflow sources location. Assuming {__AIRFLOW_SOURCES_ROOT}") + os.chdir(__AIRFLOW_SOURCES_ROOT) + + +@click.group() +def main(): + find_airflow_sources_root() + + option_verbose = click.option( "--verbose", is_flag=True, @@ -54,6 +89,7 @@ def shell(verbose: bool): """Enters breeze.py environment. this is the default command use when no other is selected.""" if verbose: console.print("\n[green]Welcome to breeze.py[/]\n") + console.print(f"\n[green]Root of Airflow Sources = {__AIRFLOW_SOURCES_ROOT}[/]\n") console.print(ASCIIART, style=ASCIIART_STYLE) raise ClickException("\nPlease implement entering breeze.py\n") @@ -63,7 +99,7 @@ def shell(verbose: bool): def build_ci_image(verbose: bool): """Builds breeze.ci image for breeze.py.""" if verbose: - console.print("\n[blue]Building image[/]\n") + console.print(f"\n[blue]Building image of airflow from {__AIRFLOW_SOURCES_ROOT}[/]\n") raise ClickException("\nPlease implement building the CI image\n") diff --git a/dev/breeze/src/airflow_breeze/visuals/__init__.py b/dev/breeze/src/airflow_breeze/visuals/__init__.py index 2df79115700d9..018b1ad6b0228 100644 --- a/dev/breeze/src/airflow_breeze/visuals/__init__.py +++ b/dev/breeze/src/airflow_breeze/visuals/__init__.py @@ -59,4 +59,4 @@ """ -ASCIIART_STYLE = "blue" +ASCIIART_STYLE = "white" diff --git a/dev/breeze/tests/test_find_airflow_directory.py b/dev/breeze/tests/test_find_airflow_directory.py new file mode 100644 index 0000000000000..a50664495d7e0 --- /dev/null +++ b/dev/breeze/tests/test_find_airflow_directory.py @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from pathlib import Path +from unittest import mock + +from airflow_breeze.breeze import find_airflow_sources_root, get_airflow_sources_root + +ACTUAL_AIRFLOW_SOURCES = Path(__file__).parent.parent.parent.parent +ROOT_PATH = Path(Path(__file__).root) + + +def test_find_airflow_root_upwards_from_cwd(capsys): + os.chdir(Path(__file__).parent) + find_airflow_sources_root() + assert ACTUAL_AIRFLOW_SOURCES == get_airflow_sources_root() + output = str(capsys.readouterr().out) + assert output == '' + + +def test_find_airflow_root_upwards_from_file(capsys): + os.chdir(Path(__file__).root) + find_airflow_sources_root() + assert ACTUAL_AIRFLOW_SOURCES == get_airflow_sources_root() + output = str(capsys.readouterr().out) + assert output == '' + + +@mock.patch('airflow_breeze.breeze.__AIRFLOW_SOURCES_ROOT', ROOT_PATH) +@mock.patch('airflow_breeze.breeze.__AIRFLOW_CFG_FILE', "bad_name.cfg") +def test_fallback_find_airflow_root(capsys): + os.chdir(ROOT_PATH) + find_airflow_sources_root() + assert ROOT_PATH == get_airflow_sources_root() + output = str(capsys.readouterr().out) + assert "Could not find Airflow sources" in output