From 6c47e2159ef8e1c9e266c459f2b48a3c06278296 Mon Sep 17 00:00:00 2001 From: s-ol Date: Thu, 23 Jun 2022 14:21:16 +0200 Subject: [PATCH 1/3] Add contrib/wsgi_autoreload_recursive.py This is similar to #168 but uses `os.scandir()` instead of `os.walk()` so as not to recurse into detected repositories. It has so been extended for bare repositories, although it only checks for the `.git` suffix in that case. --- klaus/contrib/wsgi_autoreload_recursive.py | 104 +++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 klaus/contrib/wsgi_autoreload_recursive.py diff --git a/klaus/contrib/wsgi_autoreload_recursive.py b/klaus/contrib/wsgi_autoreload_recursive.py new file mode 100644 index 00000000..c9cadfc7 --- /dev/null +++ b/klaus/contrib/wsgi_autoreload_recursive.py @@ -0,0 +1,104 @@ +from __future__ import print_function +import os +import os.path +import time +import threading +import warnings + +from klaus import make_app + +# Shared state between poller and application wrapper +class _: + #: the real WSGI app + inner_app = None + should_reload = True + + +def find_git_repos_recursive(dir): + if dir.endswith('.git'): + yield dir + return + + subdirectories = [] + for entry in os.scandir(dir): + if entry.name == '.git': + yield dir + return + + if entry.is_dir(): + subdirectories.append(entry.path) + + for path in subdirectories: + yield from find_git_repos_recursive(path) + + +def namespaceify(root, repos): + map = {} + raw = [] + map[None] = raw + for path in repos: + repo = os.path.relpath(path, root) + try: + [namespace, name] = repo.rsplit('/', 1) + map[namespace] = map.get(namespace, []) + map[namespace].append(path) + except ValueError: + raw.append(path) + return map + + +def poll_for_changes(interval, dir): + """ + Polls `dir` for changes every `interval` seconds and sets `should_reload` + accordingly. + """ + old_contents = list(find_git_repos_recursive(dir)) + while 1: + time.sleep(interval) + if _.should_reload: + # klaus application has not seen our change yet + continue + new_contents = find_git_repos_recursive(dir) + if new_contents != old_contents: + # Directory contents changed => should_reload + old_contents = new_contents + _.should_reload = True + + +def make_autoreloading_app(repos_root, *args, **kwargs): + def app(environ, start_response): + if _.should_reload: + # Refresh inner application with new repo list + print("Reloading repository list...") + _.inner_app = make_app( + namespaceify(repos_root, find_git_repos_recursive(repos_root)), + *args, **kwargs + ) + _.should_reload = False + return _.inner_app(environ, start_response) + + # Background thread that polls the directory for changes + poller_thread = threading.Thread(target=(lambda: poll_for_changes(10, repos_root))) + poller_thread.daemon = True + poller_thread.start() + + return app + + +if 'KLAUS_REPOS' in os.environ: + warnings.warn("use KLAUS_REPOS_ROOT instead of KLAUS_REPOS for the autoreloader apps", DeprecationWarning) + +if 'KLAUS_HTDIGEST_FILE' in os.environ: + with open(os.environ['KLAUS_HTDIGEST_FILE']) as file: + application = make_autoreloading_app( + os.environ.get('KLAUS_REPOS_ROOT') or os.environ['KLAUS_REPOS'], + os.environ['KLAUS_SITE_NAME'], + os.environ.get('KLAUS_USE_SMARTHTTP'), + file, + ) +else: + application = make_autoreloading_app( + os.environ.get('KLAUS_REPOS_ROOT') or os.environ['KLAUS_REPOS'], + os.environ['KLAUS_SITE_NAME'], + os.environ.get('KLAUS_USE_SMARTHTTP'), + ) From b40f7e7d625bc68718eed2c4d9912ca9e9b1c38b Mon Sep 17 00:00:00 2001 From: s-ol Date: Thu, 23 Jun 2022 12:16:55 +0200 Subject: [PATCH 2/3] Support nested namespaces (without ~prefix) This is a major change that changes all existing route fomats. The base route of each repo is no longer `/repo` or `/~ns/repo` but `/repo/-/` or `/deeply/nested/repo/-/`, whereas the Dulwich/SMARTHTTP routes are at `/repo.git` or `/deeply/nested/repo.git`. Klaus is expected to be used together with some method of hosting Git repositories, such as plain SSH access, git-shell, or e.g. gitolite. All of these allow organizing the repositories freely, creating directory structures (and even mixing with other files) as wanted. This routing scheme is inspired by GitLab, where repositories and groups can be nested in this way also. It allows klaus to have that same flexibility and keeps the SMARTHTTP clone URL "symmetric" to what e.g. an SSH clone URL may look like: git@host:path/to/repo.git http://host/path/to/repo.git Some might find the `-` in the URL irritating. The biggest issue is simply that changing the URL scheme is not backwards compatible. It would make sense to redirect `/repo` to `/repo/-/` automatically, which would solve the most common issue (links to repositories not working). --- klaus/__init__.py | 40 +++++++++++++++++--------------- klaus/repo.py | 12 +++------- klaus/templates/history.inc.html | 2 +- klaus/views.py | 11 +++++---- tests/test_contrib.py | 4 ++-- tests/test_make_app.py | 12 +++++----- tests/test_views.py | 2 +- tests/utils.py | 11 +++++---- 8 files changed, 47 insertions(+), 47 deletions(-) diff --git a/klaus/__init__.py b/klaus/__init__.py index 2ddf2919..e85c9e3d 100644 --- a/klaus/__init__.py +++ b/klaus/__init__.py @@ -59,27 +59,29 @@ def setup_routes(self): for endpoint, rule in [ ('repo_list', '/'), ('robots_txt', '/robots.txt/'), - ('blob', '//blob/'), - ('blob', '//blob//'), - ('blame', '//blame/'), - ('blame', '//blame//'), - ('raw', '//raw//'), - ('raw', '//raw//'), - ('submodule', '//submodule//'), - ('submodule', '//submodule//'), - ('commit', '//commit//'), - ('patch', '//commit/.diff'), - ('patch', '//commit/.patch'), - ('index', '//'), - ('index', '//'), - ('history', '//tree//'), - ('history', '//tree//'), - ('download', '//tarball//'), + ('blob', '//-/blob/'), + ('blob', '//-/blob//'), + ('blame', '//-/blame/'), + ('blame', '//-/blame//'), + ('raw', '//-/raw//'), + ('raw', '//-/raw//'), + ('submodule', '//-/submodule//'), + ('submodule', '//-/submodule//'), + ('commit', '//-/commit//'), + ('patch', '//-/commit/.diff'), + ('patch', '//-/commit/.patch'), + ('index', '//-/'), + ('index', '//-/'), + ('history', '//-/tree//'), + ('history', '//-/tree//'), + ('download', '//-/tarball//'), + ('smarthttp', '/.git'), ]: self.add_url_rule(rule, view_func=getattr(views, endpoint)) if "" in rule: self.add_url_rule( - "/~" + rule, view_func=getattr(views, endpoint) + rule.replace('', '/'), + view_func=getattr(views, endpoint) ) # fmt: on @@ -173,7 +175,7 @@ def make_app( # `path -> Repo` mapping for Dulwich's web support dulwich_backend = dulwich.server.DictBackend( { - "/" + namespaced_name: repo + "/" + namespaced_name + '.git': repo for namespaced_name, repo in app.valid_repos.items() } ) @@ -200,7 +202,7 @@ def make_app( # failed for /info/refs, but since it's used to upload stuff to the server # we must secure it anyway for security reasons. PATTERN = ( - r"^/(~[^/]+/)?[^/]+/(info/refs\?service=git-receive-pack|git-receive-pack)$" + r"^/.*\.git/(info/refs\?service=git-receive-pack|git-receive-pack)$" ) if unauthenticated_push: # DANGER ZONE: Don't require authentication for push'ing diff --git a/klaus/repo.py b/klaus/repo.py index 18792071..010b4221 100644 --- a/klaus/repo.py +++ b/klaus/repo.py @@ -1,4 +1,4 @@ -import os +import os.path import io import stat import subprocess @@ -43,10 +43,7 @@ def name(self): @property def namespaced_name(self): - if self.namespace: - return "~{}/{}".format(self.namespace, self.name) - else: - return self.name + return os.path.join(self.namespace or "", self.name) # TODO: factor out stuff into dulwich def get_last_updated_at(self): @@ -365,7 +362,4 @@ def name(self): @property def namespaced_name(self): - if self.namespace: - return "~{}/{}".format(self.namespace, self.name) - else: - return self.name + return os.path.join(self.namespace or "", self.name) diff --git a/klaus/templates/history.inc.html b/klaus/templates/history.inc.html index 3c251009..6b19f5ae 100644 --- a/klaus/templates/history.inc.html +++ b/klaus/templates/history.inc.html @@ -35,7 +35,7 @@

@{{ rev }} {% if USE_SMARTHTTP %} - git clone {{ url_for('index', repo=repo.name, namespace=namespace, _external=True) }} + git clone {{ url_for('smarthttp', repo=repo.name, namespace=namespace, _external=True) }} {% endif %} {% if repo.cloneurl %} git clone {{ repo.cloneurl }} diff --git a/klaus/views.py b/klaus/views.py index 5495caa3..7d5c404c 100644 --- a/klaus/views.py +++ b/klaus/views.py @@ -1,5 +1,5 @@ from io import BytesIO -import os +import os.path import sys from flask import request, render_template, current_app, url_for @@ -92,10 +92,7 @@ def _get_repo_and_rev(repo, namespace=None, rev=None, path=None): if path and rev: rev += "/" + path.rstrip("/") - if namespace: - repo_key = "~{}/{}".format(namespace, repo) - else: - repo_key = repo + repo_key = os.path.join(namespace or "", repo) try: repo = current_app.valid_repos[repo_key] except KeyError: @@ -548,3 +545,7 @@ def get_response(self): raw = RawView.as_view("raw", "raw") download = DownloadView.as_view("download", "download") submodule = SubmoduleView.as_view("submodule", "submodule") + + +def smarthttp(*args, **kwargs): + raise ValueError("this endpoint shouldn't be reachable") diff --git a/tests/test_contrib.py b/tests/test_contrib.py index f1dd9822..c654948d 100644 --- a/tests/test_contrib.py +++ b/tests/test_contrib.py @@ -124,7 +124,7 @@ def can_reach_unauth(): def can_push_auth(): - return _can_push(_GET_auth, AUTH_TEST_REPO_NO_NAMESPACE_URL) + return _can_push(_GET_auth, AUTH_TEST_REPO_NO_NAMESPACE_SMART_URL) def _can_push(http_get, url): @@ -132,7 +132,7 @@ def _can_push(http_get, url): [ _check_http200( http_get, - TEST_REPO_NO_NAMESPACE_BASE_URL + "info/refs?service=git-receive-pack", + url + "/info/refs?service=git-receive-pack", ), _check_http200( http_get, TEST_REPO_NO_NAMESPACE_BASE_URL + "git-receive-pack" diff --git a/tests/test_make_app.py b/tests/test_make_app.py index 9b0f9809..accac11e 100644 --- a/tests/test_make_app.py +++ b/tests/test_make_app.py @@ -132,11 +132,11 @@ def can_reach_auth(): # Clone def can_clone_unauth(): - return _can_clone(_GET_unauth, UNAUTH_TEST_REPO_URL) + return _can_clone(_GET_unauth, UNAUTH_TEST_REPO_SMART_URL) def can_clone_auth(): - return _can_clone(_GET_auth, AUTH_TEST_REPO_URL) + return _can_clone(_GET_auth, AUTH_TEST_REPO_SMART_URL) def _can_clone(http_get, url): @@ -146,7 +146,7 @@ def _can_clone(http_get, url): [ "git clone" in http_get(TEST_REPO_BASE_URL).text, _check_http200( - http_get, TEST_REPO_BASE_URL + "info/refs?service=git-upload-pack" + http_get, url + "/info/refs?service=git-upload-pack" ), subprocess.call(["git", "clone", url, tmp]) == 0, ] @@ -157,18 +157,18 @@ def _can_clone(http_get, url): # Push def can_push_unauth(): - return _can_push(_GET_unauth, UNAUTH_TEST_REPO_URL) + return _can_push(_GET_unauth, UNAUTH_TEST_REPO_SMART_URL) def can_push_auth(): - return _can_push(_GET_auth, AUTH_TEST_REPO_URL) + return _can_push(_GET_auth, AUTH_TEST_REPO_SMART_URL) def _can_push(http_get, url): return any( [ _check_http200( - http_get, TEST_REPO_BASE_URL + "info/refs?service=git-receive-pack" + http_get, url + "/info/refs?service=git-receive-pack" ), _check_http200(http_get, TEST_REPO_BASE_URL + "git-receive-pack"), subprocess.call(["git", "push", url, "master"], cwd=TEST_REPO) == 0, diff --git a/tests/test_views.py b/tests/test_views.py index 7fb67157..f87992af 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -62,7 +62,7 @@ def test_render_image(): response = requests.get( UNAUTH_TEST_REPO_DONT_RENDER_URL + "blob/HEAD/image.jpg" ).text - assert ' Date: Sun, 26 Jun 2022 18:54:36 +0200 Subject: [PATCH 3/3] redirect/handle old-style index and SmartHTTP requests --- klaus/__init__.py | 103 +++++++++++++++++++++++++++++++++-------- klaus/utils.py | 49 ++++++++++++++++++++ klaus/views.py | 5 +- tests/test_contrib.py | 2 +- tests/test_make_app.py | 2 +- 5 files changed, 139 insertions(+), 22 deletions(-) diff --git a/klaus/__init__.py b/klaus/__init__.py index e85c9e3d..d42558b5 100644 --- a/klaus/__init__.py +++ b/klaus/__init__.py @@ -7,6 +7,7 @@ import flask import httpauth import dulwich.web +from werkzeug.exceptions import NotFound from dulwich.errors import NotGitRepository from klaus import views, utils from klaus.repo import FancyRepo, InvalidRepo @@ -15,6 +16,59 @@ KLAUS_VERSION = utils.guess_git_revision() or "1.5.2" +class KlausRedirects(flask.Flask): + def __init__(self, repos): + flask.Flask.__init__(self, __name__) + + for namespaced_name in repos: + self.setup_redirects('/' + namespaced_name) + if namespaced_name.count('/') == 1: + self.setup_redirects('/' + namespaced_name, '/~' + namespaced_name) + + def query_str(self): + query = flask.request.query_string.decode() + if len(query) > 0: + return '?' + query + + return '' + + def setup_redirects(self, route, pattern=None): + if not pattern: + pattern = route + + def redirect_root(): + return flask.redirect(route + '/-/' + self.query_str(), 301) + + def redirect_rest(path): + if path.startswith('-/'): + raise NotFound() + return flask.redirect(route + '/-/' + path + self.query_str(), 301) + + def redirect_git(): + return flask.redirect(route + '.git/info/refs' + self.query_str(), 301) + + self.add_url_rule( + pattern + '/', + endpoint=pattern + '_root', + view_func=redirect_root, + ) + self.add_url_rule( + pattern + '.git', + endpoint=pattern + '_git2root', + view_func=redirect_root, + ) + self.add_url_rule( + pattern + '/', + endpoint=pattern + '_rest', + view_func=redirect_rest, + ) + self.add_url_rule( + pattern + '/info/refs', + endpoint=pattern + '_git', + view_func=redirect_git, + ) + + class Klaus(flask.Flask): jinja_options = { "extensions": [] if jinja2_autoescape_builtin else ["jinja2.ext.autoescape"], @@ -25,6 +79,7 @@ def __init__(self, repo_paths, site_name, use_smarthttp, ctags_policy="none"): """(See `make_app` for parameter descriptions.)""" self.site_name = site_name self.use_smarthttp = use_smarthttp + self.smarthttp = None # dulwich wsgi app self.ctags_policy = ctags_policy valid_repos, invalid_repos = self.load_repos(repo_paths) @@ -55,6 +110,8 @@ def create_jinja_environment(self): return env def setup_routes(self): + redirects = {} + # fmt: off for endpoint, rule in [ ('repo_list', '/'), @@ -84,6 +141,17 @@ def setup_routes(self): view_func=getattr(views, endpoint) ) # fmt: on + if self.use_smarthttp: + self.add_url_rule( + '/.git/', + view_func=views.smarthttp, + methods=['GET', 'POST'], + ) + self.add_url_rule( + '//.git/', + view_func=views.smarthttp, + methods=['GET', 'POST'], + ) def should_use_ctags(self, git_repo, git_commit): if self.ctags_policy == "none": @@ -169,23 +237,20 @@ def make_app( use_smarthttp, ctags_policy, ) + app.wsgi_app = utils.ChainedApps( + app, + KlausRedirects(app.valid_repos), + ) app.wsgi_app = utils.ProxyFix(app.wsgi_app) if use_smarthttp: # `path -> Repo` mapping for Dulwich's web support - dulwich_backend = dulwich.server.DictBackend( - { - "/" + namespaced_name + '.git': repo - for namespaced_name, repo in app.valid_repos.items() - } - ) - # Dulwich takes care of all Git related requests/URLs - # and passes through everything else to klaus - dulwich_wrapped_app = dulwich.web.make_wsgi_chain( - backend=dulwich_backend, - fallback_app=app.wsgi_app, - ) - dulwich_wrapped_app = utils.ProxyFix(dulwich_wrapped_app) + dulwich_repos = {} + for namespaced_name, repo in app.valid_repos.items(): + dulwich_repos["/" + namespaced_name + '.git'] = repo + + dulwich_backend = dulwich.server.DictBackend(dulwich_repos) + dulwich_app = dulwich.web.make_wsgi_chain(backend=dulwich_backend) # `receive-pack` is requested by the "client" on a push # (the "server" is asked to *receive* packs), i.e. we need to secure @@ -206,18 +271,18 @@ def make_app( ) if unauthenticated_push: # DANGER ZONE: Don't require authentication for push'ing - app.wsgi_app = dulwich_wrapped_app + app.smarthttp = dulwich_app elif htdigest_file and not disable_push: # .htdigest file given. Use it to read the push-er credentials from. if require_browser_auth: # No need to secure push'ing if we already require HTTP auth # for all of the Web interface. - app.wsgi_app = dulwich_wrapped_app + app.smarthttp = dulwich_app else: # Web interface isn't already secured. Require authentication for push'ing. - app.wsgi_app = httpauth.DigestFileHttpAuthMiddleware( + app.smarthttp = httpauth.DigestFileHttpAuthMiddleware( htdigest_file, - wsgi_app=dulwich_wrapped_app, + wsgi_app=dulwich_app, routes=[PATTERN], ) else: @@ -225,8 +290,8 @@ def make_app( # use HTTP 403 here but since that results in freaky error messages # (see above) we keep asking for authentication (401) instead. # Git will print a nice error message after a few tries. - app.wsgi_app = httpauth.AlwaysFailingAuthMiddleware( - wsgi_app=dulwich_wrapped_app, + app.smarthttp = httpauth.AlwaysFailingAuthMiddleware( + wsgi_app=dulwich_app, routes=[PATTERN], ) diff --git a/klaus/utils.py b/klaus/utils.py index 0cb8f255..209ae3e7 100644 --- a/klaus/utils.py +++ b/klaus/utils.py @@ -2,6 +2,7 @@ import binascii import os import re +import sys import time import datetime import mimetypes @@ -103,6 +104,54 @@ def __call__(self, environ, start_response): return self.app(environ, start_response) +class ChainedApps(object): + """WSGI middleware to chain two or more Flask apps. + + The request is passed to the next app if a response has a 404 status.""" + + def __init__(self, *apps): + self.apps = apps + + def __call__(self, environ, start_response): + # this method is almost verbatim flask.Flask.wsgi_app(), + # except for the for/continue statements. + for app in self.apps: + ctx = app.request_context(environ) + error = None + first_response = None + try: + try: + ctx.push() + response = app.full_dispatch_request() + except Exception as e: + error = e + response = app.handle_exception(e) + except: # noqa: B001 + error = sys.exc_info()[1] + raise + + if first_response is None: + first_response = response + + if response.status_code == 404: + # pass through 404 codes + continue + + return response(environ, start_response) + finally: + if "werkzeug.debug.preserve_context" in environ: + environ["werkzeug.debug.preserve_context"](_cv_app.get()) + environ["werkzeug.debug.preserve_context"](_cv_request.get()) + + if error is not None and app.should_ignore_error(error): + error = None + + ctx.pop(error) + + if first_response: + return first_response(environ, start_response) + + def timesince(when, now=time.time): """Return the difference between `when` and `now` in human readable form.""" return naturaltime(now() - when) diff --git a/klaus/views.py b/klaus/views.py index 7d5c404c..a4f9baec 100644 --- a/klaus/views.py +++ b/klaus/views.py @@ -548,4 +548,7 @@ def get_response(self): def smarthttp(*args, **kwargs): - raise ValueError("this endpoint shouldn't be reachable") + if not current_app.use_smarthttp or not current_app.smarthttp: + raise NotFound() + + return current_app.smarthttp diff --git a/tests/test_contrib.py b/tests/test_contrib.py index c654948d..1f8e145a 100644 --- a/tests/test_contrib.py +++ b/tests/test_contrib.py @@ -135,7 +135,7 @@ def _can_push(http_get, url): url + "/info/refs?service=git-receive-pack", ), _check_http200( - http_get, TEST_REPO_NO_NAMESPACE_BASE_URL + "git-receive-pack" + http_get, url + "/git-receive-pack" ), subprocess.call(["git", "push", url, "master"], cwd=TEST_REPO_NO_NAMESPACE) == 0, diff --git a/tests/test_make_app.py b/tests/test_make_app.py index accac11e..998e3579 100644 --- a/tests/test_make_app.py +++ b/tests/test_make_app.py @@ -170,7 +170,7 @@ def _can_push(http_get, url): _check_http200( http_get, url + "/info/refs?service=git-receive-pack" ), - _check_http200(http_get, TEST_REPO_BASE_URL + "git-receive-pack"), + _check_http200(http_get, url + "/git-receive-pack"), subprocess.call(["git", "push", url, "master"], cwd=TEST_REPO) == 0, ] )