diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4a0b3b9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,18 @@ +# CHANGELOG + +This is a manually generated log to track changes to the repository for each release. +Each section should include general headers such as **Implemented enhancements** +and **Merged pull requests**. Critical items to know are: + + - renamed commands + - deprecated / removed commands + - changed defaults + - backward incompatible changes + - migration guidance + - changed behaviour + +The versions coincide with releases on pip. Only major versions will be released as tags on Github. + +## [0.0.x](https://github.com/converged-computing/flux-metrics-api/tree/main) (0.0.x) + - Support for certificates for uvicorn and change default port to 8443 (0.0.1) + - Skelton release (0.0.0) diff --git a/Dockerfile b/Dockerfile index 1da27f8..87a85ae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM fluxrm/flux-sched:focal # docker build -t flux_metrics_api . -# docker run -it -p 8080:8080 flux_metrics_api +# docker run -it -p 8443:8443 flux_metrics_api LABEL maintainer="Vanessasaurus <@vsoch>" diff --git a/README.md b/README.md index 1a61243..15a7a20 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ You'll want to be running in a Flux instance, as we need to connect to the broke $ flux start --test-size=4 ``` -And then start the server. This will use a default port and host (0.0.0.0:8080) that you can customize +And then start the server. This will use a default port and host (0.0.0.0:8443) that you can customize if desired. ```bash @@ -56,6 +56,12 @@ $ flux-metrics-api start $ flux-metrics-api start --port 9000 --host 127.0.0.1 ``` +If you want ssl (port 443) you can provide the path to a certificate and keyfile: + +```bash +$ flux-metrics-api start --ssl-certfile /etc/certs/tls.crt --ssl-keyfile /etc/certs/tls.key +``` + See `--help` to see other options available. ### Endpoints @@ -67,7 +73,7 @@ See `--help` to see other options available. Here is an example to get the "node_up_count" metric: ```bash - curl -s http://localhost:8080/apis/custom.metrics.k8s.io/v1beta2/namespaces/flux-operator/metrics/node_up_count | jq + curl -s http://localhost:8443/apis/custom.metrics.k8s.io/v1beta2/namespaces/flux-operator/metrics/node_up_count | jq ``` ```console { @@ -101,15 +107,20 @@ be a demo. You can either build it yourself, or use our build. ```bash $ docker build -t flux_metrics_api . -$ docker run -it -p 8080:8080 flux_metrics_api +$ docker run -it -p 8443:8443 flux_metrics_api ``` or ```bash -$ docker run -it -p 8080:8080 ghcr.io/converged-computing/flux-metrics-api +$ docker run -it -p 8443:8443 ghcr.io/converged-computing/flux-metrics-api ``` -You can then open up the browser at [http://localhost:8080/metrics/](http://localhost:8080/metrics) to see +### Development + +Note that this is implemented in Python, but (I found this after) we could [also use Go](https://github.com/kubernetes-sigs/custom-metrics-apiserver). +Specifically, I found this repository useful to see the [spec format](https://github.com/kubernetes-sigs/custom-metrics-apiserver/blob/master/pkg/generated/openapi/custommetrics/zz_generated.openapi.go). + +You can then open up the browser at [http://localhost:8443/metrics/](http://localhost:8443/metrics) to see the metrics! ## 😁️ Contributors 😁️ diff --git a/flux_metrics_api/apis.py b/flux_metrics_api/apis.py new file mode 100644 index 0000000..39c0dd6 --- /dev/null +++ b/flux_metrics_api/apis.py @@ -0,0 +1,54 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# HPCIC DevTools Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (MIT) + +import json +import os +import subprocess + +import flux_metrics_api.defaults as defaults +import flux_metrics_api.utils as utils + +# Global cache of responses +cache = {} + + +def get_kubernetes_endpoint(endpoint): + """ + Get an endpoint from the cluster. + """ + if defaults.USE_CACHE and endpoint in cache: + return cache[endpoint] + + # Point to the internal API server hostname + api_server = "https://kubernetes.default.svc" + + # Path to ServiceAccount directory + sa_account_dir = "/var/run/secrets/kubernetes.io/serviceaccount" + namespace_file = os.path.join(sa_account_dir, "namespace") + cert_file = os.path.join(sa_account_dir, "ca.crt") + token_file = os.path.join(sa_account_dir, "token") + + # Cut out early if we aren't running in the pod + if not all( + map(os.path.exists, [sa_account_dir, namespace_file, token_file, cert_file]) + ): + return {} + + # Get the token to do the request + token = utils.read_file(token_file) + + # Using subprocess to not add extra dependency - yes requires curl + # res = requests.get(f"{api_server}/apis", headers=headers, verify=cert_file) + # Kids don't do this at home + output = subprocess.check_output( + f'curl --cacert {cert_file} --header "Authorization: Bearer {token}" -X GET {api_server}/{endpoint}', + shell=True, + ) + try: + output = json.loads(output) + cache[endpoint] = output + except Exception: + return {} + return output diff --git a/flux_metrics_api/defaults.py b/flux_metrics_api/defaults.py index 5d539c0..ad374d8 100644 --- a/flux_metrics_api/defaults.py +++ b/flux_metrics_api/defaults.py @@ -3,6 +3,16 @@ # # SPDX-License-Identifier: (MIT) -API_VERSION = "custom.metrics.k8s.io/v1beta2" +API_ENDPOINT = "custom.metrics.k8s.io/v1beta2" API_ROOT = "/apis/custom.metrics.k8s.io/v1beta2" -NAMESPACES = None +NAMESPACE = "flux-operator" +SERVICE_NAME = "custom-metrics-apiserver" +USE_CACHE = True + + +def API_VERSION(): + """ + Derive the api version from the endpoint + """ + global API_ENDPOINT + return API_ENDPOINT.rstrip("/").rsplit("/")[-1] diff --git a/flux_metrics_api/routes.py b/flux_metrics_api/routes.py index 1e8b07c..aa24c66 100644 --- a/flux_metrics_api/routes.py +++ b/flux_metrics_api/routes.py @@ -3,21 +3,29 @@ # # SPDX-License-Identifier: (MIT) +from apispec import APISpec from starlette.endpoints import HTTPEndpoint from starlette.responses import JSONResponse from starlette.routing import Route -from starlette.schemas import SchemaGenerator +from starlette_apispec import APISpecSchemaGenerator import flux_metrics_api.defaults as defaults import flux_metrics_api.types as types import flux_metrics_api.version as version from flux_metrics_api.metrics import metrics -schemas = SchemaGenerator( - { - "openapi": "3.0.0", - "info": {"title": "Flux Metrics API", "version": version.__version__}, - } +schemas = APISpecSchemaGenerator( + APISpec( + title="Flux Metrics API", + version=version.__version__, + openapi_version="3.0.0", + info={"description": "Export Flux custom metrics."}, + ) +) + +not_found_response = JSONResponse( + {"detail": "The metric server is not running in a Kubernetes pod."}, + status_code=404, ) @@ -25,11 +33,12 @@ class Root(HTTPEndpoint): """ Root of the API - This needs to return 200 for a health check + This needs to return 200 for a health check. I later discovered it also needs + to return the listing of available metrics! """ async def get(self, request): - return JSONResponse({}) + return JSONResponse(types.new_resource_list()) def get_metric(request): @@ -41,18 +50,13 @@ def get_metric(request): """ metric_name = request.path_params["metric_name"] namespace = request.path_params.get("namespace") + print(f"Requested metric {metric_name} in namespace {namespace}") - if ( - namespace is not None - and defaults.NAMESPACES is not None - and namespace not in defaults.NAMESPACES - ): - return JSONResponse( - {"detail": "This namespace is not known to the server."}, status_code=404 - ) - + # TODO we don't do anything with namespace currently, we assume we won't + # be able to hit this if running in the wrong one # Unknown metric if metric_name not in metrics: + print(f"Unknown metric requested {metric_name}") return JSONResponse( {"detail": "This metric is not known to the server."}, status_code=404 ) @@ -63,7 +67,10 @@ def get_metric(request): # Get the value from Flux, assemble into listing value = metrics[metric_name]() metric_value = types.new_metric(metric, value=value) - listing = types.new_metric_list([metric_value]) + + # Give the endpoint for the service as metadata + metadata = {"selfLink": defaults.API_ROOT} + listing = types.new_metric_list([metric_value], metadata=metadata) return JSONResponse(listing) @@ -79,11 +86,34 @@ async def get(self, request): return get_metric(request) +class APIGroupList(HTTPEndpoint): + """ + Service a faux resource list just for our custom metrics endpoint. + """ + + async def get(self, request): + listing = types.new_group_list() + if not listing: + return not_found_response + return JSONResponse(listing) + + +class OpenAPI(HTTPEndpoint): + """ + Forward the cluster openapi endpoint + """ + + async def get(self, request): + version = request.path_params["version"] + openapi = types.get_cluster_schema(version) + if not openapi: + return not_found_response + return JSONResponse(openapi) + + def openapi_schema(request): """ Get the openapi spec from the endpoints - - TODO: debug why paths empty """ return JSONResponse(schemas.get_schema(routes=routes)) @@ -91,12 +121,15 @@ def openapi_schema(request): # STOPPED HERE - make open api spec s we can see endpoints and query routes = [ Route(defaults.API_ROOT, Root), - # Optional for openapi, we could add if needed + # This is a faux route so we can get the preferred resource version + Route("/apis", APIGroupList), + Route("/openapi/{version}", OpenAPI), Route(defaults.API_ROOT + "/namespaces/{namespace}/metrics/{metric_name}", Metric), Route(defaults.API_ROOT + "/{resource}/{name}/{metric_name}", Metric), Route( defaults.API_ROOT + "/namespaces/{namespace}/{resource}/{name}/{metric_name}", Metric, ), + # Route("/openapi/v2", openapi_schema, include_in_schema=False), Route(f"{defaults.API_ROOT}/openapi/v2", openapi_schema, include_in_schema=False), ] diff --git a/flux_metrics_api/server.py b/flux_metrics_api/server.py index b9708bf..02a8581 100644 --- a/flux_metrics_api/server.py +++ b/flux_metrics_api/server.py @@ -65,12 +65,13 @@ def get_parser(): ) start.add_argument( "--port", - help="Port to run application", - default=8080, + help="Port to run application (defaults to 8443)", + default=8443, type=int, ) + start.add_argument("--namespace", help="Namespace the API is running in") start.add_argument( - "--namespace", help="Scope to running in these namespace(s)", action="append" + "--service-name", help="Service name the metrics service is running from" ) start.add_argument( "--api-path", @@ -78,7 +79,6 @@ def get_parser(): help="Custom API path (defaults to /apis/custom.metrics.k8s.io/v1beta2)", default=None, ) - start.add_argument( "--host", help="Host address to run application", @@ -90,6 +90,14 @@ def get_parser(): default=False, action="store_true", ) + start.add_argument( + "--no-cache", + help="Do not cache Kubernetes API responses.", + default=False, + action="store_true", + ) + start.add_argument("--ssl-keyfile", help="full path to ssl keyfile") + start.add_argument("--ssl-certfile", help="full path to ssl certfile") return parser @@ -97,8 +105,20 @@ def start(args): """ Start the server with uvicorn """ + # Validate certificates if provided + if args.ssl_keyfile and not args.ssl_certfile: + sys.exit("A --ssl-keyfile was provided without a --ssl-certfile.") + if args.ssl_certfile and not args.ssl_keyfile: + sys.exit("A --ssl-certfile was provided without a --ssl-keyfile.") + app = Starlette(debug=args.debug, routes=routes) - uvicorn.run(app, host=args.host, port=args.port) + uvicorn.run( + app, + host=args.host, + port=args.port, + ssl_keyfile=args.ssl_keyfile, + ssl_certfile=args.ssl_certfile, + ) def main(): @@ -131,14 +151,22 @@ def help(return_code=0): ) # Setup the registry - non verbose is default - print(f"API endpoint is at {defaults.API_ROOT}") if args.api_path is not None: - print(f"Setting API endpoint to {args.api_path}") defaults.API_ROOT = args.api_path + print(f"API endpoint is at {defaults.API_ROOT}") - # Limit to specific namespaces? + # Do not cache responses + if args.no_cache is True: + defaults.USE_CACHE = False + + # Set namespace or service name to be different than defaults if args.namespace: - defaults.NAMESPACES = args.namespace + defaults.NAMESPACE = args.namespace + print(f"Running from namespace {defaults.NAMESPACE}") + + if args.service_name: + defaults.SERVICE_NAME = args.service_name + print(f"Service name {defaults.SERVICE_NAME}") # Does the user want a shell? if args.command == "start": diff --git a/flux_metrics_api/types.py b/flux_metrics_api/types.py index e2df024..c8f8393 100644 --- a/flux_metrics_api/types.py +++ b/flux_metrics_api/types.py @@ -3,7 +3,49 @@ # # SPDX-License-Identifier: (MIT) +from datetime import datetime + +import flux_metrics_api.apis as apis import flux_metrics_api.defaults as defaults +from flux_metrics_api.metrics import metrics + + +def new_group_list(): + """ + Return a faux group list to get the version of the custom metrics API + """ + return apis.get_kubernetes_endpoint("apis") + + +def get_cluster_schema(version="v2"): + """ + Get the API group list, assuming we are inside a pod. + """ + return apis.get_kubernetes_endpoint(f"openapi/{version}") + + +def new_resource_list(): + """ + The root of the server returns the api list with available metrics. + """ + listing = { + "kind": "APIResourceList", + "apiVersion": defaults.API_VERSION(), + "groupVersion": defaults.API_ENDPOINT, + "resources": [], + } + + for metric_name in metrics: + listing["resources"].append( + { + "name": metric_name, + "singularName": metric_name, + "namespaced": True, + "kind": "MetricValueList", + "verbs": ["get"], + } + ) + return listing def new_identifier(name: str, selector: dict = None): @@ -11,12 +53,14 @@ def new_identifier(name: str, selector: dict = None): Get a new metric identifier. """ metric = {"name": name} + + # A selector would be a label on a metric (we don't have any currently) if selector is not None: metric["selector"] = selector return metric -def new_metric(metric, value, time="", windowSeconds=0, describedObject=None): +def new_metric(metric, value, timestamp="", windowSeconds=0): """ Get the metric value for an object. @@ -25,21 +69,34 @@ def new_metric(metric, value, time="", windowSeconds=0, describedObject=None): which the metric was calculated (0 for instantaneous, which is what we are making). describedObject is the object the metric was collected from. """ + # This probably needs work - I just fudged it for now + timestamp = timestamp or datetime.now().strftime("%Y-%m-%dT%H:%M:%S+00:00") + + # Our custom metrics API always comes from a service + describedObject = { + "kind": "Service", + "namespace": defaults.NAMESPACE, + "name": defaults.SERVICE_NAME, + "apiVersion": defaults.API_VERSION(), + } return { "metric": metric, "value": value, - "time": time, + "timestamp": timestamp, "windowSeconds": windowSeconds, "describedObject": describedObject, } -def new_metric_list(metrics): +def new_metric_list(metrics, metadata=None): """ Put list of metrics into proper list format """ - return { + listing = { "items": metrics, - "apiVersion": defaults.API_VERSION, + "apiVersion": defaults.API_ENDPOINT, "kind": "MetricValueList", } + if metadata is not None: + listing["metadata"] = metadata + return listing diff --git a/flux_metrics_api/utils.py b/flux_metrics_api/utils.py new file mode 100644 index 0000000..199596e --- /dev/null +++ b/flux_metrics_api/utils.py @@ -0,0 +1,13 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# HPCIC DevTools Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (MIT) + + +def read_file(path): + """ + Read content from a file + """ + with open(path, "r") as fd: + content = fd.read() + return content diff --git a/flux_metrics_api/version.py b/flux_metrics_api/version.py index e2c9043..c0a73f0 100644 --- a/flux_metrics_api/version.py +++ b/flux_metrics_api/version.py @@ -18,6 +18,7 @@ INSTALL_REQUIRES = ( ("uvicorn", {"min_version": None}), ("starlette", {"min_version": None}), + ("starlette-apispec", {"min_version": None}), ) TESTS_REQUIRES = (("pytest", {"min_version": "4.6.2"}),)