Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ jobs:
with:
python-version: '3.7'
cache: 'pip'
- run: pip install .
- run: pip install -e .
- run: python3 -m pytest -n auto --color=yes

tests-ui:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,6 @@ pip-wheel-metadata

# Generated UI licenses
licenses/LICENSES-ui.txt

# Packaged Breeze2 on Windows
/Breeze2.exe
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ repos:
language: pygrep
name: Only capitalized Breeze used in Breeze2.
description: Please use capitalized "Breeze" in the new Breeze docs
entry: "breeze"
entry: ([\W\s\n\t\r]|^)breeze([\W\s\n\t\r]|$)
pass_filenames: true
files: ^dev/breeze/doc
- id: base-operator
Expand Down
46 changes: 37 additions & 9 deletions Breeze2
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import os
import sys

# Python <3.4 does not have pathlib
from venv import EnvBuilder

if sys.version_info.major != 3 or sys.version_info.minor < 7:
print("ERROR! Make sure you use Python 3.7+ !!")
sys.exit(1)
Expand All @@ -12,13 +14,19 @@ import subprocess
from os import execv
from pathlib import Path

AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve()
if getattr(sys, 'frozen', False):
# If the application is run as a bundle, the PyInstaller bootloader
# extends the sys module by a flag frozen=True and sets the temporary app
# path into variable _MEIPASS' and sys.executable is Breeze's executable path.
AIRFLOW_SOURCES_DIR = Path(sys.executable).parent.resolve()
else:
AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve()
BUILD_DIR = AIRFLOW_SOURCES_DIR / ".build"
BUILD_BREEZE_DIR = BUILD_DIR / "breeze2"
BUILD_BREEZE_CFG_SAVED = BUILD_BREEZE_DIR / "setup.cfg.saved"
BUILD_BREEZE_VENV_DIR = BUILD_BREEZE_DIR / "venv"
BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / "bin"
BUILD_BREEZE_VENV_PIP = BUILD_BREEZE_VENV_BIN_DIR / "pip"
BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / ("Scripts" if os.name == 'nt' else "bin")
BUILD_BREEZE_VENV_PYTHON = BUILD_BREEZE_VENV_BIN_DIR / "python"
BUILD_BREEZE_VENV_BREEZE = BUILD_BREEZE_VENV_BIN_DIR / "Breeze2"

BREEZE_SOURCE_PATH = AIRFLOW_SOURCES_DIR / "dev" / "breeze"
Expand All @@ -41,15 +49,35 @@ def save_config():

if needs_installation():
print(f"(Re)Installing Breeze's virtualenv in {BUILD_BREEZE_VENV_DIR}")
BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True)
subprocess.run([sys.executable, "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True)
subprocess.run(
[f"{BUILD_BREEZE_VENV_PIP}", "install", "--upgrade", "-e", "."], cwd=BREEZE_SOURCE_PATH, check=True
)
try:
EnvBuilder(system_site_packages=False, upgrade=True, with_pip=True, prompt="breeze").create(
str(BUILD_BREEZE_VENV_DIR)
)
except Exception as e:
# in some cases (mis-configured python) the venv creation might not work via API
# (ensurepip missing). This is the case in case of default MacOS Python and Python executable
# Bundled in Windows executable, In this case we fallback to running venv as a tool using default
# Python3 found on path (in case of Windows Bundled exe, you don't even have a current
# interpreted executable available, because Python interpreter is executed through a library.
# and sys.executable points to the Bundled exe file.
BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True)
subprocess.run(["python3", "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True)
if os.name == 'nt':
subprocess.run(
[f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."],
cwd=BREEZE_SOURCE_PATH,
check=True,
)
else:
subprocess.run(
[f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."],
cwd=BREEZE_SOURCE_PATH,
check=True,
)
save_config()

if os.name == 'nt':
# This is the best way of running it on Windows, though it leaves the original process hanging around
subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}.exe"] + sys.argv[1:], check=True)
subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:], check=True)
else:
execv(f"{BUILD_BREEZE_VENV_BREEZE}", [f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:])
207 changes: 207 additions & 0 deletions dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

<!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)*

- [3. Bootstraping the virtual environment](#3-bootstraping-the-virtual-environment)
- [Status](#status)
- [Context](#context)
- [Decision](#decision)
- [Alternatives considered](#alternatives-considered)
- [Consequences](#consequences)

<!-- END doctoc generated TOC please keep comment here to allow auto update -->

# 3. Bootstraping the virtual environment

Date: 2021-12-06

## Status

Draft

## Context

Since Breeze is written in Python, it needs to be run in its own virtual environment.
This virtual environment is different from Airflow virtualenv as it contains only a
small set of tools (for example rich) that are not present in the standard Python
library. We want to keep the virtualenv separated, because setting up Airflow
virtualenv is hard (especially if you consider cross-platform use). The virtualenv
is needed mainly to run the script that will actually manage airflow installation
and dependencies, in the form of Docker images which are part of Breeze.

This virtualenv needs to be easy to setup and it should support the "live" nature
of Breeze. The idea is that the user of Breeze does not have to do any action
to update to the latest version of the virtualenv, when new dependencies are
added, also when new Breeze functionalities are added, they should be automatically
available for the user after the repository is updated to latest version.

User should not have to think about installing and upgrading Breeze separately from
switching to different Airflow tag or branch - moreover, the Breeze environment
should automatically adapt to the version and Branch the user checked out. By its
nature Airflow Breeze (at least for quite a while) will be evolving together with
Airflow and it will live in the same repository and new features and behaviours
will be added continuously.

The workflow that needs to be supported should tap into the regular workflow
of the user who is developing Airflow.

* git checkout branch

./Breeze should use the version of Breeze that is available in this version

* git rebase --onto apache/main

./Breeze should be automatically updated to the latest version available
in main (including dependencies)

Also if someone develops Breeze itself, the experience should be seamlessly
integrated - modification of Breeze code locally should be automatically
reflected in the Breeze environment of the user who is modifying Breeze.

The user should not have to re-install/update Breeze to automatically use
the modifying Breeze source code when running Breeze commands and testing
then with Airflow.

Breeze is also used as part of CI - common Python functions and libraries
are used across both Breeze development environment and Continuous
Integration we run. It's been established practice of the CI is that the logic
of the CI is stored in the same repository as the source code of the
application it tests and part of the Breeze functions are shared with CI.

In the future when Breeze2 stabilizes and it's update cadence will be
much slower (which is likele as it happened with the Breeze predecessor)
there could be an option that Breeze is installed as separate package and
same released Breeze version could be ued to manage multiple Airflow
versions, for that we might want to release Breeze as a separate package
in PyPI. However since there is the CI integration, the source code
version of Breeze will remain as part of the Airflow's source code.


## Decision

The decision is to implement Breeze in a subfolder (`dev/breeze2/`) of
Apache Airflow as a Python project following the standard setuptools
enabled project. The project contains setup.py and dependencies described
in setup.cfg and contains both source code and tests for Breeze code.

The sub-project could be used in the future to produce a PyPI package
(we reserved such package in PyPI), however its main purpose is
to install Breeze in a separate virtualenv bootstrapped
automatically in editable mode.

There are two ways you will be able to install `Breeze2` - locally in
repository using ./Breeze2 bootstrapping script and using `pipx`.

The bootstrapping Python script (`Breeze2` in the main repository
of Airflow) performs the following tasks:

* when run for the first time it creates `.build/breeze2/venv` virtual
environment (Python3.6+ based) - with locally installed `dev`
project in editable mode (`pip install -e .`) - this makes sure
that the users of Breeze will use the latest version of Breeze
available in their version of the repository
* when run subsequently, it will check if setup files changed for
Breeze (dependencies changed) and if they did it will automatically
reinstall the environment, adding missing dependencies
* after managing the venv, the Breeze2 script will simply execute
the actual Breeze2 script in the `.build/venv` passing the
parameters to the script. For the user, the effect will be same
as activating the virtualenv and executing the ./Breeze2 from
there (but it will happen automatically and invisibly for the
user
* In Windows environment where you have no easy/popular mechanism
of running scripts with shebang (#!) equivalent in Posix
environments, Users will be able to locally build (using
`pyinstaller` a `Breeze2.exe` frozen Python script that will
essentially do the same, they could also use `python Breeze2`
command or switch to Git Bash to utilize the shebang feature
(Git Bash comes together with Git when installed on Windows)
* The second option is to use `pipx` to install Breeze2.
The `pipx` is almost equivalent to what the Bootstrapping does
and many users might actually choose to install Breeze this
way - and we will add it as an option to install Breeze
with pipx `pipx install -e <BREEZE FOLDER>` provides the right
installation instruction. The installation can be updated
by `pipx install --force -e <BREEZE FOLDER>`.
The benefit of using `pipx` is that Breeze becomes
available on the path when you install it this way, also
it provides out-of-the box Windows support. The drawback is
that when new dependencies are added, they will not be
automatically installed and that you need to manually force
re-installation if new dependencies are used - which is not
as seamlessly integrate in the regular development
environment, and it might create some confusions for the
users who would have to learn `pipx` and it's commands.
Another drawback of `pipx` is that installs one global
version of Breeze2 for all projects, where it is quite
possible that someone has two different versions of
Airflow repository checked out and the bootstraping
script provides this capability.

The bootstrapping script is temporary measure, until the
dependencies of Breeze stabilize enough that the need
to recreate the virtual environment by `pipx` will be
very infrequent. In this case `pipx` provides better
user experience, and we might decide even to remove the
bootstrapping script and switch fully to `pipx`

## Alternatives considered

The alternatives considered were:

* `nox` - this is a tool to manage virtualenv for testing, while
it has some built in virtualenv capabilities, it is an
additional tool that needs to be installed and it lacks
the automation of checking and recreation of the virtualenv
when needed (you need to manually run nox to update environment)
Alsoi it is targeted for building multiple virtualenv
for tests - it has nice pytest integration for example, but it
lacks support for managing editable installs for a long time.

* `pyenv` - this is the de-facto standard for maintenance of
virtualenvs. it has the capability of creation and switching
between virtualenvs easily. Together with some of its plugins
(pyenv-virtualenv and auto-activation) it could serve the
purpose quite well. However the problem is that if you
also use `pyenv` to manage your `airflow` virtualenv this might
be source of confusion. Should I activate airflow virtualenv
or Breeze2 venv to run tests? Part of Breeze experience is
to activate local Airflow virtualenv for IDE integration and
since this is different than simple Breeze virtualenv, using
pytest and autoactivation in this case might lead to a lot
of confusion. Keeping the Breeze virtualenv "hidden" and
mostly "used" but not deliberately activated is a better
choice - especially that most users will simply "use" Breeze2
as an app rather than activate the environment deliberately.
Also choosing `pyenv` and it's virtualenv plugin would
add extra, unnecessary steps and prerequisites for Breeze.


## Consequences

Using Breeze for new users will be much simpler, without
having to install any prerequisites. The virtualenv used by
Breeze2 will be hidden from the user, and used behind the
scenes - and the dependencies used will be automatically
installed when needed. This will allow to seamlessly
integrate Breeze tool in the develiopment experience without
having to worry about extra maintenance needed.
46 changes: 41 additions & 5 deletions dev/breeze/src/airflow_breeze/breeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
from pathlib import Path
from typing import Optional

import click
from click import ClickException
Expand All @@ -25,15 +28,47 @@
NAME = "Breeze2"
VERSION = "0.0.1"

__AIRFLOW_SOURCES_ROOT = Path.cwd()

@click.group()
def main():
pass

__AIRFLOW_CFG_FILE = "setup.cfg"

console = Console(force_terminal=True, color_system="standard", width=180)


def get_airflow_sources_root():
return __AIRFLOW_SOURCES_ROOT


def search_upwards_for_airflow_sources_root(start_from: Path) -> Optional[Path]:
root = Path(start_from.root)
d = start_from
while d != root:
attempt = d / __AIRFLOW_CFG_FILE
if attempt.exists() and "name = apache-airflow\n" in attempt.read_text():
return attempt.parent
d = d.parent
return None


def find_airflow_sources_root():
# Try to find airflow sources in current working dir
airflow_sources_root = search_upwards_for_airflow_sources_root(Path.cwd())
if not airflow_sources_root:
# Or if it fails, find it in parents of the directory where the ./breeze.py is.
airflow_sources_root = search_upwards_for_airflow_sources_root(Path(__file__).resolve().parent)
global __AIRFLOW_SOURCES_ROOT
if airflow_sources_root:
__AIRFLOW_SOURCES_ROOT = airflow_sources_root
else:
console.print(f"\n[yellow]Could not find Airflow sources location. Assuming {__AIRFLOW_SOURCES_ROOT}")
os.chdir(__AIRFLOW_SOURCES_ROOT)


@click.group()
def main():
find_airflow_sources_root()


option_verbose = click.option(
"--verbose",
is_flag=True,
Expand All @@ -54,6 +89,7 @@ def shell(verbose: bool):
"""Enters breeze.py environment. this is the default command use when no other is selected."""
if verbose:
console.print("\n[green]Welcome to breeze.py[/]\n")
console.print(f"\n[green]Root of Airflow Sources = {__AIRFLOW_SOURCES_ROOT}[/]\n")
console.print(ASCIIART, style=ASCIIART_STYLE)
raise ClickException("\nPlease implement entering breeze.py\n")

Expand All @@ -63,7 +99,7 @@ def shell(verbose: bool):
def build_ci_image(verbose: bool):
"""Builds breeze.ci image for breeze.py."""
if verbose:
console.print("\n[blue]Building image[/]\n")
console.print(f"\n[blue]Building image of airflow from {__AIRFLOW_SOURCES_ROOT}[/]\n")
raise ClickException("\nPlease implement building the CI image\n")


Expand Down
2 changes: 1 addition & 1 deletion dev/breeze/src/airflow_breeze/visuals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@

"""

ASCIIART_STYLE = "blue"
ASCIIART_STYLE = "white"
Loading