Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions providers/git/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ dependencies = [
"GitPython>=3.1.44",
]

# The optional dependencies should be modified in place in the generated file
# Any change in the dependencies is preserved when the file is regenerated
[project.optional-dependencies]
github = [
"PyGithub>=2.1.1",
]

[dependency-groups]
dev = [
"apache-airflow",
Expand Down
72 changes: 72 additions & 0 deletions providers/git/src/airflow/providers/git/hooks/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ class GitHook(BaseHook):
* ``ssh_config_file`` — path to a custom SSH config file.
* ``host_proxy_cmd`` — SSH ProxyCommand string (e.g. for bastion/jump hosts).
* ``ssh_port`` — non-default SSH port.
* ``github_app_id`` — GitHub App ID used for GitHub App authentication. Requires the GitHub App
private key to be provided as a PEM-encoded key via either ``private_key`` (inline) or
``key_file`` (path to key file).
* ``github_installation_id`` — GitHub App installation ID used for GitHub App authentication.
"""

conn_name_attr = "git_conn_id"
Expand Down Expand Up @@ -76,6 +80,8 @@ def get_ui_field_behaviour(cls) -> dict[str, Any]:
"ssh_config_file": "",
"host_proxy_cmd": "",
"ssh_port": "",
"github_app_id": "",
"github_installation_id": "",
}
)
},
Expand Down Expand Up @@ -104,10 +110,60 @@ def __init__(
self.host_proxy_cmd = extra.get("host_proxy_cmd")
self.ssh_port: int | None = int(extra["ssh_port"]) if extra.get("ssh_port") else None

# GitHub App Auth Options
raw_github_app_id = extra.get("github_app_id")
if raw_github_app_id is not None:
try:
self.github_app_id: int | None = int(raw_github_app_id)
except (TypeError, ValueError) as exc:
raise ValueError(
f"Invalid 'github_app_id' value {raw_github_app_id!r}. It must be an integer."
) from exc
else:
self.github_app_id = None

raw_github_installation_id = extra.get("github_installation_id")
if raw_github_installation_id is not None:
try:
self.github_installation_id: int | None = int(raw_github_installation_id)
except (TypeError, ValueError) as exc:
raise ValueError(
f"Invalid 'github_installation_id' value {raw_github_installation_id!r}. It must be an integer."
) from exc
else:
self.github_installation_id = None
self.env: dict[str, str] = {}

if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot be provided at the same time")
if (self.github_app_id and not self.github_installation_id) or (
not self.github_app_id and self.github_installation_id
):
raise ValueError(
"Both 'github_app_id' and 'github_installation_id' must be provided to use GitHub App Authentication"
)
if self.github_app_id and self.github_installation_id:
if not self.key_file and not self.private_key:
raise ValueError("Missing inline private_key or key_file for GitHub App Auth")
if self.key_file and not self.private_key:
try:
with open(self.key_file, encoding="utf-8") as key_file:
self.private_key = key_file.read()
except OSError as exc:
raise OSError(
f"Failed to read GitHub App private key file {self.key_file!r}: {exc}"
) from exc
if not (self.repo_url or "").startswith(("https://", "http://")):
raise ValueError(
f"GitHub App authentication requires an HTTPS repository URL, but got: {self.repo_url!r}"
)
# Store the PEM separately so configure_hook_env() does not treat it as an SSH key.
self.github_app_private_key: str | None = self.private_key
self.private_key = None
self.key_file = None
self.user_name, self.auth_token = self._get_github_app_token()
Comment thread
RaphCodec marked this conversation as resolved.
else:
self.github_app_private_key = None
self._process_git_auth_url()
Comment thread
RaphCodec marked this conversation as resolved.

_VALID_STRICT_HOST_KEY_CHECKING = frozenset({"yes", "no", "accept-new", "off", "ask"})
Expand Down Expand Up @@ -142,6 +198,22 @@ def _build_ssh_command(self, key_path: str | None = None) -> str:

return " ".join(parts)

def _get_github_app_token(self):
try:
from github import Auth as GithubAuth, Github as GithubClient
except ImportError as exc:
raise ImportError(
"The PyGithub library is required for GitHub App authentication. Please install it with 'pip install apache-airflow-providers-git[github]'"
) from exc

github_auth = GithubAuth.AppAuth(
app_id=self.github_app_id, private_key=self.github_app_private_key
).get_installation_auth(installation_id=self.github_installation_id)
Comment thread
RaphCodec marked this conversation as resolved.

# Client is needed to generate the token even though we don't use the client directly
GithubClient(auth=github_auth)
return "x-access-token", github_auth.token

def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
Expand Down
216 changes: 216 additions & 0 deletions providers/git/tests/unit/git/hooks/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def bundle_temp_dir(tmp_path):
CONN_ONLY_INLINE_KEY = "my_git_conn_only_inline_key"
CONN_BOTH_PATH_INLINE = "my_git_conn_both_path_inline"
CONN_NO_REPO_URL = "my_git_conn_no_repo_url"
CONN_APP_INLINE_KEY = "git_app_inline_key"
CONN_APP_ONLY_APP_ID = "git_app_only_app_id"
CONN_APP_ONLY_INSTALLATION_ID = "git_app_only_installation_id"
CONN_APP_NO_KEY = "git_app_no_key"
CONN_APP_INVALID_APP_ID = "git_app_invalid_app_id"
CONN_APP_INVALID_INSTALLATION_ID = "git_app_invalid_installation_id"


@pytest.fixture
Expand Down Expand Up @@ -120,6 +126,85 @@ def setup_connections(self, create_connection_without_db):
},
)
)
create_connection_without_db(
Connection(
conn_id=CONN_BOTH_PATH_INLINE,
host="path/to/repo",
conn_type="git",
extra={
"key_file": "path/to/key",
"private_key": "inline_key",
},
)
)
create_connection_without_db(
Connection(
conn_id="my_git_conn_strict",
host=AIRFLOW_GIT,
conn_type="git",
extra='{"key_file": "/files/pkey.pem", "strict_host_key_checking": "yes"}',
)
)
create_connection_without_db(
Connection(
conn_id=CONN_APP_INLINE_KEY,
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={
"github_app_id": "12345",
"github_installation_id": "67890",
"private_key": "inline_pem_key",
},
)
)
create_connection_without_db(
Connection(
conn_id=CONN_APP_ONLY_APP_ID,
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={"github_app_id": "12345"},
)
)
create_connection_without_db(
Connection(
conn_id=CONN_APP_ONLY_INSTALLATION_ID,
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={"github_installation_id": "67890"},
)
)
create_connection_without_db(
Connection(
conn_id=CONN_APP_NO_KEY,
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={"github_app_id": "12345", "github_installation_id": "67890"},
)
)
create_connection_without_db(
Connection(
conn_id=CONN_APP_INVALID_APP_ID,
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={
"github_app_id": "not_an_int",
"github_installation_id": "67890",
"private_key": "inline_pem_key",
},
)
)
create_connection_without_db(
Connection(
conn_id=CONN_APP_INVALID_INSTALLATION_ID,
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={
"github_app_id": "12345",
"github_installation_id": "not_an_int",
"private_key": "inline_pem_key",
},
)
)

@pytest.mark.parametrize(
("conn_id", "hook_kwargs", "expected_repo_url"),
Expand Down Expand Up @@ -352,3 +437,134 @@ def test_passphrase_askpass_cleaned_up(self, create_connection_without_db):
assert os.path.exists(askpass_path)
# Both the askpass script and the temp key file should be cleaned up
assert not os.path.exists(askpass_path)

# --- GitHub App auth tests ---

def test_only_app_id_without_installation_id_raises(self):
with pytest.raises(
AirflowException, match="Both 'github_app_id' and 'github_installation_id' must be provided"
):
GitHook(git_conn_id=CONN_APP_ONLY_APP_ID)

def test_only_installation_id_without_app_id_raises(self):
with pytest.raises(
AirflowException,
match="Both 'github_app_id' and 'github_installation_id' must be provided",
):
GitHook(git_conn_id=CONN_APP_ONLY_INSTALLATION_ID)

def test_app_id_and_installation_id_without_key_raises(self):
with pytest.raises(
AirflowException,
match="Missing inline private_key or key_file for GitHub App Auth",
):
GitHook(git_conn_id=CONN_APP_NO_KEY)

def test_invalid_github_app_id_raises(self):
with pytest.raises(
AirflowException,
match="Invalid 'github_app_id' value",
):
GitHook(git_conn_id=CONN_APP_INVALID_APP_ID)

def test_invalid_github_installation_id_raises(self):
with pytest.raises(
AirflowException,
match="Invalid 'github_installation_id' value",
):
GitHook(git_conn_id=CONN_APP_INVALID_INSTALLATION_ID)

def test_app_auth_with_key_file_reads_file(self, create_connection_without_db, tmp_path):
key_file = tmp_path / "app_key.pem"
key_file.write_text("file_pem_key_content")
create_connection_without_db(
Connection(
conn_id="git_app_key_file",
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={
"github_app_id": "12345",
"github_installation_id": "67890",
"key_file": str(key_file),
},
)
)
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"airflow.providers.git.hooks.git.GitHook._get_github_app_token",
lambda self: ("x-access-token", "ghs_test_token"),
)
hook = GitHook(git_conn_id="git_app_key_file")

assert hook.private_key == "file_pem_key_content"

def test_app_auth_with_missing_key_file_raises(self, create_connection_without_db):
create_connection_without_db(
Connection(
conn_id="git_app_missing_key_file",
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={
"github_app_id": "12345",
"github_installation_id": "67890",
"key_file": "/nonexistent/path/key.pem",
},
)
)
with pytest.raises(AirflowException, match="Failed to read GitHub App private key file"):
GitHook(git_conn_id="git_app_missing_key_file")

def test_app_auth_success_injects_token_into_https_url(self):
mock_token = "ghs_test_token"
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"airflow.providers.git.hooks.git.GitHook._get_github_app_token",
lambda self: ("x-access-token", mock_token),
)
hook = GitHook(git_conn_id=CONN_APP_INLINE_KEY)
assert hook.auth_token == mock_token
assert hook.user_name == "x-access-token"
assert f"x-access-token:{mock_token}@" in hook.repo_url
assert hook.repo_url.startswith("https://")

def test_app_auth_success_stores_app_id_and_installation_id(self):
mock_token = "ghs_test_token"
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"airflow.providers.git.hooks.git.GitHook._get_github_app_token",
lambda self: ("x-access-token", mock_token),
)
hook = GitHook(git_conn_id=CONN_APP_INLINE_KEY)
assert hook.github_app_id == 12345
assert hook.github_installation_id == 67890

@pytest.mark.parametrize(
("app_id", "installation_id"),
[
("12345", "67890"),
(12345, 67890),
],
)
def test_app_id_and_installation_id_parsed_as_int(
self, app_id, installation_id, create_connection_without_db
):
create_connection_without_db(
Connection(
conn_id="git_app_int_check",
host=AIRFLOW_HTTPS_URL,
conn_type="git",
extra={
"github_app_id": app_id,
"github_installation_id": installation_id,
"private_key": "inline_pem_key",
},
)
)
with pytest.MonkeyPatch().context() as mp:
mp.setattr(
"airflow.providers.git.hooks.git.GitHook._get_github_app_token",
lambda self: ("x-access-token", "token"),
)
hook = GitHook(git_conn_id="git_app_int_check")
assert isinstance(hook.github_app_id, int)
assert isinstance(hook.github_installation_id, int)
Loading
Loading