diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 348551aac..57359f36a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,12 +16,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - # - uses: chartboost/ruff-action@v1 - # Until this gets updated we need to use this commit hash (or later) - - uses: chartboost/ruff-action@491342200cdd1cf4d5132a30ddc546b3b5bc531b + - uses: chartboost/ruff-action@v1 with: args: 'format --check' - changed-files: 'true' build-image: needs: [ruff, ruff-format] runs-on: ubuntu-latest @@ -52,10 +49,11 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - test: [scenarios_test.py, rpc_test.py, graph_test.py, ln_test.py, dag_connection_test.py] + test: [scenarios_test.py, rpc_test.py, graph_test.py, ln_test.py, dag_connection_test.py, logging_test.py] steps: - uses: actions/checkout@v4 - uses: hynek/setup-cached-uv@v1 + - uses: azure/setup-helm@v4.2.0 - uses: medyagh/setup-minikube@master with: mount-path: ${{ github.workspace }}:/mnt/src diff --git a/docs/graph.md b/docs/graph.md index fa7da6534..9c06e505a 100644 --- a/docs/graph.md +++ b/docs/graph.md @@ -50,6 +50,7 @@ lightning network channel (see [lightning.md](lightning.md)). + @@ -66,20 +67,21 @@ lightning network channel (see [lightning.md](lightning.md)). ``` -| key | for | type | default | explanation | -|----------------|-------|---------|-----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| services | graph | string | | A space-separated list of extra service containers to deploy in the network. See [docs/services.md](services.md) for complete list of available services | -| version | node | string | | Bitcoin Core version with an available Warnet tank image on Dockerhub. May also be a GitHub repository with format user/repository:branch to build from source code | -| image | node | string | | Bitcoin Core Warnet tank image on Dockerhub with the format repository/image:tag | -| bitcoin_config | node | string | | A string of Bitcoin Core options in command-line format, e.g. '-debug=net -blocksonly' | -| tc_netem | node | string | | A tc-netem command as a string beginning with 'tc qdisc add dev eth0 root netem' | -| exporter | node | boolean | False | Whether to attach a Prometheus data exporter to the tank | -| collect_logs | node | boolean | False | Whether to collect Bitcoin Core debug logs with Promtail | -| build_args | node | string | | A string of configure options used when building Bitcoin Core from source code, e.g. '--without-gui --disable-tests' | -| ln | node | string | | Attach a lightning network node of this implementation (currently only supports 'lnd') | -| ln_image | node | string | | Specify a lightning network node image from Dockerhub with the format repository/image:tag | -| ln_cb_image | node | string | | Specify a lnd Circuit Breaker image from Dockerhub with the format repository/image:tag | -| ln_config | node | string | | A string of arguments for the lightning network node in command-line format, e.g. '--protocol.wumbo-channels --bitcoin.timelockdelta=80' | -| channel_open | edge | string | | Indicate that this edge is a lightning channel with these arguments passed to lnd openchannel | -| source_policy | edge | string | | Update the channel originator policy by passing these arguments passed to lnd updatechanpolicy | -| target_policy | edge | string | | Update the channel partner policy by passing these arguments passed to lnd updatechanpolicy | +| key | for | type | default | explanation | +|----------------|-------|---------|-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| services | graph | string | | A space-separated list of extra service containers to deploy in the network. See [docs/services.md](services.md) for complete list of available services | +| version | node | string | | Bitcoin Core version with an available Warnet tank image on Dockerhub. May also be a GitHub repository with format user/repository:branch to build from source code | +| image | node | string | | Bitcoin Core Warnet tank image on Dockerhub with the format repository/image:tag | +| bitcoin_config | node | string | | A string of Bitcoin Core options in command-line format, e.g. '-debug=net -blocksonly' | +| tc_netem | node | string | | A tc-netem command as a string beginning with 'tc qdisc add dev eth0 root netem' | +| exporter | node | boolean | False | Whether to attach a Prometheus data exporter to the tank | +| metrics | node | string | Block count, peers in/out, mempool size | A space-separated string of RPC queries to scrape by prometheus | +| collect_logs | node | boolean | False | Whether to collect Bitcoin Core debug logs with Promtail | +| build_args | node | string | | A string of configure options used when building Bitcoin Core from source code, e.g. '--without-gui --disable-tests' | +| ln | node | string | | Attach a lightning network node of this implementation (currently only supports 'lnd' or 'cln') | +| ln_image | node | string | | Specify a lightning network node image from Dockerhub with the format repository/image:tag | +| ln_cb_image | node | string | | Specify a lnd Circuit Breaker image from Dockerhub with the format repository/image:tag | +| ln_config | node | string | | A string of arguments for the lightning network node in command-line format, e.g. '--protocol.wumbo-channels --bitcoin.timelockdelta=80' | +| channel_open | edge | string | | Indicate that this edge is a lightning channel with these arguments passed to lnd openchannel | +| source_policy | edge | string | | Update the channel originator policy by passing these arguments passed to lnd updatechanpolicy | +| target_policy | edge | string | | Update the channel partner policy by passing these arguments passed to lnd updatechanpolicy | diff --git a/docs/monitoring.md b/docs/monitoring.md index 122483c3f..8e6fb1cd1 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -1,32 +1,102 @@ # Monitoring -## Monitoring container resource usage +## Prometheus -When run in docker, a few additional containers are started up: +To monitor RPC return values over time, a Prometheus data exporter can be connected +to any Bitcoin Tank and configured to scrape any available RPC results. -* CAdvisor (container Monitoring) -* Prometheus (log scraper) -* Grafana (graphing/dashboard tool) +The `bitcoin-exporter` image is defined in `resources/images/exporter` and +maintained in the BitcoinDevProject dockerhub organization. To add the exporter +in the Tank pod with Bitcoin Core add the `"exporter"` key to the node in the graphml file: -## CAdvisor +```xml + + 27.0 + true + +``` -CAdvisor needs no additional setup, and can be accessed from the docker host at -localhost:8080 +The default metrics are defined in the `bitcoin-exporter` image: +- Block count +- Number of inbound peers +- Number of outbound peers +- Mempool size (# of TXs) -## Prometheus +Metrics can be configured by setting a `"metrics"` key to the node in the graphml file. +The metrics value is a space-separated list of labels, RPC commands with arguments, and +JSON keys to resolve the desired data: + +``` +label=method(arguments)[JSON result key][...] +``` + +For example, the default metrics listed above are defined as: + +```xml + + 27.0 + true + blocks=getblockcount() inbounds=getnetworkinfo()["connections_in"] outbounds=getnetworkinfo()["connections_in"] mempool_size=getmempoolinfo()["size"] + +``` -Prometheus should also not need any additional setup, and can be accessed from -the docker host at localhost:9090 +The data can be retrieved from the Prometheus exporter on port `9332`, example: + +``` +# HELP blocks getblockcount() +# TYPE blocks gauge +blocks 704.0 +# HELP inbounds getnetworkinfo()["connections_in"] +# TYPE inbounds gauge +inbounds 0.0 +# HELP outbounds getnetworkinfo()["connections_in"] +# TYPE outbounds gauge +outbounds 0.0 +# HELP mempool_size getmempoolinfo()["size"] +# TYPE mempool_size gauge +mempool_size 0.0 +``` ## Grafana -Grafana is provisioned with a single default dashboard, but alternative -dashboards can be added or created. +Data from Prometheus exporters can be collected and fed into Grafana for a +web-based interface. + +### Install logging infrastructure + +First make sure you have `helm` installed, then run the `install_logging` script: + +```bash +resources/scripts/install_logging.sh +``` + +To forward port `3000` and view the Grafana dashboard run the `connect_logging` script: + +```bash +resources/scripts/connect_logging.sh +``` + +The Grafana dashboard (and API) will be accessible without requiring authentication +at http://localhost:3000 + +## Dashboards + +To view the default metrics in the included default dashboard, upload the dashboard +JSON file to the Grafana server: + +``` +curl localhost:3000/api/dashboards/db \ + -H "Content-Type: application/json" \ + --data "{\"dashboard\": $(cat resources/configs/grafana/default_dashboard.json)}" +``` + +Note the URL in the reply from the server (example): + +``` +{"folderUid":"","id":2,"slug":"default-warnet-dashboard","status":"success","uid":"fdu0pda1z6a68b","url":"/d/fdu0pda1z6a68b/default-warnet-dashboard","version":1}( +``` -Grafana can be accessed on the docker host from localhost:3000 using username -`admin` and password `admin` by default. +Open the dashboard in your browser (example): -The default dashboard is called "Docker Container & Host Metrics" and can be -accessed via the "dashboards" tab, or from the bottom right of the home screen. +`http://localhost:3000/d/fdu0pda1z6a68b/default-warnet-dashboard` -Additional dashboards and datasources may be added in the future. diff --git a/docs/warcli.md b/docs/warcli.md index 0dd3a0d32..4afc34a06 100644 --- a/docs/warcli.md +++ b/docs/warcli.md @@ -26,10 +26,6 @@ options: |----------|--------|------------|-----------| | commands | String | | | -### `warcli setup` -Run the Warnet quick start setup script - - ## Bitcoin ### `warcli bitcoin debug-log` @@ -73,6 +69,18 @@ options: ## Cluster +### `warcli cluster deploy` +Setup Warnet using the current kubectl-configured cluster + + +### `warcli cluster minikube-clean` +Reinit minikube images + + +### `warcli cluster minikube-setup` +Setup minikube for use with Warnet + + ### `warcli cluster port-start` Port forward (runs as a detached process) @@ -81,11 +89,7 @@ Port forward (runs as a detached process) Stop the port forwarding process -### `warcli cluster start` -Setup and start Warnet with minikube - - -### `warcli cluster stop` +### `warcli cluster teardown` Stop the warnet server and tear down the cluster @@ -129,19 +133,19 @@ options: ## Image ### `warcli image build` -Build bitcoind and bitcoin-cli from \/\ as \:\. +Build bitcoind and bitcoin-cli from \ at \ as \:\. Optionally deploy to remote registry using --action=push, otherwise image is loaded to local registry. options: | name | type | required | default | |------------|--------|------------|-----------| | repo | String | yes | | -| branch | String | yes | | +| commit_sha | String | yes | | | registry | String | yes | | | tag | String | yes | | | build_args | String | | | | arches | String | | | -| action | String | | | +| action | String | | "load" | ## Ln @@ -207,11 +211,11 @@ options: Start a warnet with topology loaded from a \ into [network] options: -| name | type | required | default | -|------------|--------|------------|-----------------------------------| -| graph_file | Path | | src/warnet/graphs/default.graphml | -| force | Bool | | False | -| network | String | | "warnet" | +| name | type | required | default | +|------------|--------|------------|----------------------------------| +| graph_file | Path | | resources/graphs/default.graphml | +| force | Bool | | False | +| network | String | | "warnet" | ### `warcli network status` Get status of a warnet named [network] diff --git a/justfile b/justfile index fa53f8f5e..d7a49cb80 100644 --- a/justfile +++ b/justfile @@ -61,7 +61,7 @@ stop: set -euxo pipefail kubectl delete namespace warnet - kubectl delete namespace warnet-logging + kubectl delete namespace warnet-logging --ignore-not-found kubectl config set-context --current --namespace=default minikube image rm warnet/dev @@ -84,7 +84,7 @@ startd: stopd: # Delete all resources kubectl delete namespace warnet - kubectl delete namespace warnet-logging + kubectl delete namespace warnet-logging --ignore-not-found kubectl config set-context --current --namespace=default echo Done... diff --git a/pyproject.toml b/pyproject.toml index f0431d78a..458dcfa9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ where = ["src", "resources"] [tool.ruff] extend-exclude = [ "src/test_framework/*.py", + "resources/images/exporter/authproxy.py", ] line-length = 100 indent-width = 4 diff --git a/resources/configs/grafana/default_dashboard.json b/resources/configs/grafana/default_dashboard.json new file mode 100644 index 000000000..0ed627e22 --- /dev/null +++ b/resources/configs/grafana/default_dashboard.json @@ -0,0 +1,119 @@ +{ + "title": "Default Warnet Dashboard", + "refresh": "5s", + "panels": + [ + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "targets": + [ + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "expr": "outbounds", + "legendFormat": "{{pod}}", + "range": true + } + ], + "title": "Outbound connections", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + } + }, + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "targets": + [ + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "expr": "inbounds", + "legendFormat": "{{pod}}", + "range": true + } + ], + "title": "Inbound connections", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + } + }, + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "targets": + [ + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "expr": "mempool_size", + "legendFormat": "{{pod}}", + "range": true + } + ], + "title": "Mempool size", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + } + }, + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "targets": + [ + { + "datasource": + { + "type": "prometheus", + "uid": "PBFA97CFB590B2094" + }, + "expr": "blocks", + "legendFormat": "{{pod}}", + "range": true + } + ], + "title": "Blocks", + "type": "timeseries", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + } + } + ] +} \ No newline at end of file diff --git a/resources/graphs/default.graphml b/resources/graphs/default.graphml index 153bd52b0..ce84579df 100644 --- a/resources/graphs/default.graphml +++ b/resources/graphs/default.graphml @@ -1,11 +1,20 @@ - - - - - - - + + + + + + + + + + + + + + + + 27.0 diff --git a/resources/images/exporter/Dockerfile b/resources/images/exporter/Dockerfile new file mode 100644 index 000000000..ca5e9668e --- /dev/null +++ b/resources/images/exporter/Dockerfile @@ -0,0 +1,14 @@ +# Use an official Python runtime as the base image +FROM python:3.12-slim + +# Python dependencies +RUN pip install --no-cache-dir prometheus_client + +# Prometheus exporter script for bitcoind +COPY bitcoin-exporter.py / + +# Bitcoin RPC client +COPY authproxy.py / + +# -u: force the stdout and stderr streams to be unbuffered +CMD ["python", "-u", "/bitcoin-exporter.py"] diff --git a/resources/images/exporter/authproxy.py b/resources/images/exporter/authproxy.py new file mode 100644 index 000000000..03042877b --- /dev/null +++ b/resources/images/exporter/authproxy.py @@ -0,0 +1,189 @@ +# Copyright (c) 2011 Jeff Garzik +# +# Previous copyright, from python-jsonrpc/jsonrpc/proxy.py: +# +# Copyright (c) 2007 Jan-Klaas Kollhof +# +# This file is part of jsonrpc. +# +# jsonrpc is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. +# +# This software is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this software; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +"""HTTP proxy for opening RPC connection to bitcoind. + +AuthServiceProxy has the following improvements over python-jsonrpc's +ServiceProxy class: + +- HTTP connections persist for the life of the AuthServiceProxy object + (if server supports HTTP/1.1) +- sends protocol 'version', per JSON-RPC 1.1 +- sends proper, incrementing 'id' +- sends Basic HTTP authentication headers +- parses all JSON numbers that look like floats as Decimal +- uses standard Python json lib +""" + +import base64 +import decimal +from http import HTTPStatus +import http.client +import json +import logging +import pathlib +import socket +import time +import urllib.parse + +HTTP_TIMEOUT = 30 +USER_AGENT = "AuthServiceProxy/0.1" + +log = logging.getLogger("BitcoinRPC") + +class JSONRPCException(Exception): + def __init__(self, rpc_error, http_status=None): + try: + errmsg = '%(message)s (%(code)i)' % rpc_error + except (KeyError, TypeError): + errmsg = '' + super().__init__(errmsg) + self.error = rpc_error + self.http_status = http_status + + +def serialization_fallback(o): + if isinstance(o, decimal.Decimal): + return str(o) + if isinstance(o, pathlib.Path): + return str(o) + raise TypeError(repr(o) + " is not JSON serializable") + +class AuthServiceProxy(): + __id_count = 0 + + # ensure_ascii: escape unicode as \uXXXX, passed to json.dumps + def __init__(self, service_url, service_name=None, timeout=HTTP_TIMEOUT, connection=None, ensure_ascii=True): + self.__service_url = service_url + self._service_name = service_name + self.ensure_ascii = ensure_ascii # can be toggled on the fly by tests + self.__url = urllib.parse.urlparse(service_url) + user = None if self.__url.username is None else self.__url.username.encode('utf8') + passwd = None if self.__url.password is None else self.__url.password.encode('utf8') + authpair = user + b':' + passwd + self.__auth_header = b'Basic ' + base64.b64encode(authpair) + # clamp the socket timeout, since larger values can cause an + # "Invalid argument" exception in Python's HTTP(S) client + # library on some operating systems (e.g. OpenBSD, FreeBSD) + self.timeout = min(timeout, 2147483) + self._set_conn(connection) + + def __getattr__(self, name): + if name.startswith('__') and name.endswith('__'): + # Python internal stuff + raise AttributeError + if self._service_name is not None: + name = "%s.%s" % (self._service_name, name) + return AuthServiceProxy(self.__service_url, name, connection=self.__conn) + + def _request(self, method, path, postdata): + ''' + Do a HTTP request. + ''' + headers = {'Host': self.__url.hostname, + 'User-Agent': USER_AGENT, + 'Authorization': self.__auth_header, + 'Content-type': 'application/json'} + self.__conn.request(method, path, postdata, headers) + return self._get_response() + + def get_request(self, *args, **argsn): + AuthServiceProxy.__id_count += 1 + + log.debug("-{}-> {} {}".format( + AuthServiceProxy.__id_count, + self._service_name, + json.dumps(args or argsn, default=serialization_fallback, ensure_ascii=self.ensure_ascii), + )) + if args and argsn: + params = dict(args=args, **argsn) + else: + params = args or argsn + return {'version': '1.1', + 'method': self._service_name, + 'params': params, + 'id': AuthServiceProxy.__id_count} + + def __call__(self, *args, **argsn): + postdata = json.dumps(self.get_request(*args, **argsn), default=serialization_fallback, ensure_ascii=self.ensure_ascii) + response, status = self._request('POST', self.__url.path, postdata.encode('utf-8')) + if response['error'] is not None: + raise JSONRPCException(response['error'], status) + elif 'result' not in response: + raise JSONRPCException({ + 'code': -343, 'message': 'missing JSON-RPC result'}, status) + elif status != HTTPStatus.OK: + raise JSONRPCException({ + 'code': -342, 'message': 'non-200 HTTP status code but no JSON-RPC error'}, status) + else: + return response['result'] + + def batch(self, rpc_call_list): + postdata = json.dumps(list(rpc_call_list), default=serialization_fallback, ensure_ascii=self.ensure_ascii) + log.debug("--> " + postdata) + response, status = self._request('POST', self.__url.path, postdata.encode('utf-8')) + if status != HTTPStatus.OK: + raise JSONRPCException({ + 'code': -342, 'message': 'non-200 HTTP status code but no JSON-RPC error'}, status) + return response + + def _get_response(self): + req_start_time = time.time() + try: + http_response = self.__conn.getresponse() + except socket.timeout: + raise JSONRPCException({ + 'code': -344, + 'message': '%r RPC took longer than %f seconds. Consider ' + 'using larger timeout for calls that take ' + 'longer to return.' % (self._service_name, + self.__conn.timeout)}) + if http_response is None: + raise JSONRPCException({ + 'code': -342, 'message': 'missing HTTP response from server'}) + + content_type = http_response.getheader('Content-Type') + if content_type != 'application/json': + raise JSONRPCException( + {'code': -342, 'message': 'non-JSON HTTP response with \'%i %s\' from server' % (http_response.status, http_response.reason)}, + http_response.status) + + responsedata = http_response.read().decode('utf8') + response = json.loads(responsedata, parse_float=decimal.Decimal) + elapsed = time.time() - req_start_time + if "error" in response and response["error"] is None: + log.debug("<-%s- [%.6f] %s" % (response["id"], elapsed, json.dumps(response["result"], default=serialization_fallback, ensure_ascii=self.ensure_ascii))) + else: + log.debug("<-- [%.6f] %s" % (elapsed, responsedata)) + return response, http_response.status + + def __truediv__(self, relative_uri): + return AuthServiceProxy("{}/{}".format(self.__service_url, relative_uri), self._service_name, connection=self.__conn) + + def _set_conn(self, connection=None): + port = 80 if self.__url.port is None else self.__url.port + if connection: + self.__conn = connection + self.timeout = connection.timeout + elif self.__url.scheme == 'https': + self.__conn = http.client.HTTPSConnection(self.__url.hostname, port, timeout=self.timeout) + else: + self.__conn = http.client.HTTPConnection(self.__url.hostname, port, timeout=self.timeout) diff --git a/resources/images/exporter/bitcoin-exporter.py b/resources/images/exporter/bitcoin-exporter.py new file mode 100644 index 000000000..8b2cbec25 --- /dev/null +++ b/resources/images/exporter/bitcoin-exporter.py @@ -0,0 +1,67 @@ +import os + +from authproxy import AuthServiceProxy +from prometheus_client import Gauge, start_http_server + + +# Ensure that all RPC calls are made with brand new http connections +def auth_proxy_request(self, method, path, postdata): + self._set_conn() # creates new http client connection + return self.oldrequest(method, path, postdata) + + +AuthServiceProxy.oldrequest = AuthServiceProxy._request +AuthServiceProxy._request = auth_proxy_request + + +# RPC Credentials for bitcoin node +# By default we assume the container is in the same pod as bitcoind, on regtest +BITCOIN_RPC_HOST = os.environ.get("BITCOIN_RPC_HOST", "localhost") +BITCOIN_RPC_PORT = os.environ.get("BITCOIN_RPC_PORT", "18443") +BITCOIN_RPC_USER = os.environ.get("BITCOIN_RPC_USER", "warnet_user") +BITCOIN_RPC_PASSWORD = os.environ.get("BITCOIN_RPC_PASSWORD", "2themoon") + +# Port where prometheus server will scrape metrics data +METRICS_PORT = int(os.environ.get("METRICS_PORT", "9332")) + +# Bitcoin Core RPC data to scrape. Expressed as labeled RPC queries separated by spaces +# label=method(params)[return object key][...] +METRICS = os.environ.get( + "METRICS", + 'blocks=getblockcount() inbounds=getnetworkinfo()["connections_in"] outbounds=getnetworkinfo()["connections_in"] mempool_size=getmempoolinfo()["size"]', +) + +# Set up bitcoind RPC client +rpc = AuthServiceProxy( + service_url=f"http://{BITCOIN_RPC_USER}:{BITCOIN_RPC_PASSWORD}@{BITCOIN_RPC_HOST}:{BITCOIN_RPC_PORT}" +) + + +# Create closure outside the loop +def make_metric_function(cmd): + try: + return lambda: eval(f"rpc.{cmd}") + except Exception: + return None + + +# Parse RPC queries into metrics +commands = METRICS.split(" ") +for labeled_cmd in commands: + if "=" not in labeled_cmd: + continue + label, cmd = labeled_cmd.strip().split("=") + # label, description i.e. ("bitcoin_conn_in", "Number of connections in") + metric = Gauge(label, cmd) + metric.set_function(make_metric_function(cmd)) + print(f"Metric created: {labeled_cmd}") + +# Start the server +server, thread = start_http_server(METRICS_PORT) + +print(f"Server: {server}") +print(f"Thread: {thread}") + +# Keep alive by waiting for endless loop to end +thread.join() +server.shutdown() diff --git a/resources/manifests/grafana_values.yaml b/resources/manifests/grafana_values.yaml index d530b5094..110622911 100644 --- a/resources/manifests/grafana_values.yaml +++ b/resources/manifests/grafana_values.yaml @@ -6,8 +6,17 @@ datasources: apiVersion: 1 datasources: - name: Prometheus + uid: "PBFA97CFB590B2094" type: prometheus url: http://prometheus-kube-prometheus-prometheus.warnet-logging:9090 - name: Loki type: loki url: http://loki-gateway.warnet-logging:80 +grafana.ini: + auth: + disable_login_form: true + disable_signout_menu: true + auth.anonymous: + enabled: true + org_name: Main Org. + org_role: Editor \ No newline at end of file diff --git a/resources/manifests/namespace.yaml b/resources/manifests/namespace.yaml index f37e67014..8f630f9f7 100644 --- a/resources/manifests/namespace.yaml +++ b/resources/manifests/namespace.yaml @@ -3,11 +3,4 @@ kind: Namespace metadata: name: warnet labels: - name: warnet ---- -apiVersion: v1 -kind: Namespace -metadata: - name: warnet-logging - labels: - name: warnet-logging \ No newline at end of file + name: warnet \ No newline at end of file diff --git a/resources/scripts/apidocs.py b/resources/scripts/apidocs.py index d4b3c4c9b..3815126a2 100755 --- a/resources/scripts/apidocs.py +++ b/resources/scripts/apidocs.py @@ -8,7 +8,7 @@ from tabulate import tabulate from warnet.cli.main import cli -file_path = Path(os.path.dirname(os.path.abspath(__file__))) / ".." / "docs" / "warcli.md" +file_path = Path(os.path.dirname(os.path.abspath(__file__))) / ".." / ".." / "docs" / "warcli.md" doc = "" diff --git a/resources/scripts/connect_logging.sh b/resources/scripts/connect_logging.sh index 862381a60..94edc35ae 100755 --- a/resources/scripts/connect_logging.sh +++ b/resources/scripts/connect_logging.sh @@ -1,8 +1,16 @@ #!/bin/bash -set -e +# NO `set -e` here so an error does not exit the script POD_NAME=$(kubectl get pods --namespace warnet-logging -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=loki-grafana" -o jsonpath="{.items[0].metadata.name}") -echo "Go to http://localhost:3000 and login with the username 'admin' and the password 'password' to see your logs" +echo "Go to http://localhost:3000" +echo "Grafana pod name: ${POD_NAME}" -kubectl --namespace warnet-logging port-forward "${POD_NAME}" 3000 \ No newline at end of file +while true; do + echo "Attempting to start Grafana port forwarding" + kubectl --namespace warnet-logging port-forward "${POD_NAME}" 3000 2>&1 + echo "Grafana port forwarding exited with status: $?" + sleep 5 +done; + +echo "warnet-logging port-forward exited" \ No newline at end of file diff --git a/resources/scripts/graphdocs.py b/resources/scripts/graphdocs.py index bf2d10520..8f3f80d81 100755 --- a/resources/scripts/graphdocs.py +++ b/resources/scripts/graphdocs.py @@ -9,7 +9,7 @@ graph_schema = load_schema() -file_path = Path(os.path.dirname(os.path.abspath(__file__))) / ".." / "docs" / "graph.md" +file_path = Path(os.path.dirname(os.path.abspath(__file__))) / ".." / ".." / "docs" / "graph.md" doc = "" diff --git a/src/warnet/backend/kubernetes_backend.py b/src/warnet/backend/kubernetes_backend.py index 6516ed7e1..4143e4f83 100644 --- a/src/warnet/backend/kubernetes_backend.py +++ b/src/warnet/backend/kubernetes_backend.py @@ -414,15 +414,18 @@ def create_bitcoind_container(self, tank: Tank) -> client.V1Container: return bitcoind_container def create_prometheus_container(self, tank) -> client.V1Container: + env = [ + client.V1EnvVar(name="BITCOIN_RPC_HOST", value="127.0.0.1"), + client.V1EnvVar(name="BITCOIN_RPC_PORT", value=str(tank.rpc_port)), + client.V1EnvVar(name="BITCOIN_RPC_USER", value=tank.rpc_user), + client.V1EnvVar(name="BITCOIN_RPC_PASSWORD", value=tank.rpc_password), + ] + if tank.metrics is not None: + env.append( + client.V1EnvVar(name="METRICS", value=tank.metrics), + ) return client.V1Container( - name="prometheus", - image="jvstein/bitcoin-prometheus-exporter:latest", - env=[ - client.V1EnvVar(name="BITCOIN_RPC_HOST", value="127.0.0.1"), - client.V1EnvVar(name="BITCOIN_RPC_PORT", value=str(tank.rpc_port)), - client.V1EnvVar(name="BITCOIN_RPC_USER", value=tank.rpc_user), - client.V1EnvVar(name="BITCOIN_RPC_PASSWORD", value=tank.rpc_password), - ], + name="prometheus", image="bitcoindevproject/bitcoin-exporter:latest", env=env ) def check_logging_crds_installed(self): @@ -436,11 +439,13 @@ def apply_prometheus_service_monitors(self, tanks): if not tank.exporter: continue + tank_name = self.get_pod_name(tank.index, ServiceType.BITCOIN) + service_monitor = { "apiVersion": "monitoring.coreos.com/v1", "kind": "ServiceMonitor", "metadata": { - "name": f"warnet-tank-{tank.index:06d}", + "name": tank_name, "namespace": MAIN_NAMESPACE, "labels": { "app.kubernetes.io/name": "bitcoind-metrics", @@ -449,7 +454,7 @@ def apply_prometheus_service_monitors(self, tanks): }, "spec": { "endpoints": [{"port": "prometheus-metrics"}], - "selector": {"matchLabels": {"app": f"warnet-tank-{tank.index:06d}"}}, + "selector": {"matchLabels": {"app": tank_name}}, }, } # Create the custom resource using the dynamic client diff --git a/src/warnet/graph_schema.json b/src/warnet/graph_schema.json index 1c8ff6cb6..ac1f7aa9f 100644 --- a/src/warnet/graph_schema.json +++ b/src/warnet/graph_schema.json @@ -33,6 +33,9 @@ "type": "boolean", "default": false, "comment": "Whether to attach a Prometheus data exporter to the tank"}, + "metrics": { + "type": "string", + "comment": "A space-separated string of RPC queries to scrape by Prometheus"}, "collect_logs": { "type": "boolean", "default": false, diff --git a/src/warnet/scenarios/tx_flood.py b/src/warnet/scenarios/tx_flood.py index 4ddc7e519..00671e376 100755 --- a/src/warnet/scenarios/tx_flood.py +++ b/src/warnet/scenarios/tx_flood.py @@ -37,7 +37,7 @@ def orders(self, node): if bal < 1: continue amounts = {} - num_out = randrange(1, len(self.nodes) // 2) + num_out = randrange(1, (len(self.nodes) // 2) + 1) for _ in range(num_out): sats = int(float((bal / 20) / num_out) * 1e8) amounts[choice(self.addrs)] = randrange(sats // 4, sats) / 1e8 diff --git a/src/warnet/tank.py b/src/warnet/tank.py index 9b5e37b2f..ac04d3f70 100644 --- a/src/warnet/tank.py +++ b/src/warnet/tank.py @@ -49,6 +49,7 @@ def __init__(self, index: int, warnet): self.bitcoin_config = "" self.netem = None self.exporter = False + self.metrics = None self.collect_logs = False self.build_args = "" self.lnnode: LNNode | None = None @@ -107,6 +108,9 @@ def parse_graph_node(self, node): else: raise Exception(f"Unsupported Lightning Network implementation: {options['impl']}") + if "metrics" in node: + self.metrics = node["metrics"] + logger.debug( f"Parsed graph node: {self.index} with attributes: {[f'{key}={value}' for key, value in graph_properties.items()]}" ) diff --git a/test/data/12_node_ring.graphml b/test/data/12_node_ring.graphml index 20e0b25ee..7cdf3a7f7 100644 --- a/test/data/12_node_ring.graphml +++ b/test/data/12_node_ring.graphml @@ -1,8 +1,20 @@ - - - - + + + + + + + + + + + + + + + + 27.0 diff --git a/test/data/build_v24_test.graphml b/test/data/build_v24_test.graphml index 089b3f72d..5dc8c7297 100644 --- a/test/data/build_v24_test.graphml +++ b/test/data/build_v24_test.graphml @@ -1,9 +1,20 @@ - - - - - + + + + + + + + + + + + + + + + 27.0 diff --git a/test/data/ln.graphml b/test/data/ln.graphml index 2eb054039..e0606c93f 100644 --- a/test/data/ln.graphml +++ b/test/data/ln.graphml @@ -1,10 +1,11 @@ - + + diff --git a/test/data/logging.graphml b/test/data/logging.graphml new file mode 100644 index 000000000..54b3b73cb --- /dev/null +++ b/test/data/logging.graphml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + 27.0 + true + + + 27.0 + true + txrate=getchaintxstats(10)["txrate"] + + + 27.0 + + + + + + diff --git a/test/data/permutations.graphml b/test/data/permutations.graphml index 7bf557ec7..0c4686f61 100644 --- a/test/data/permutations.graphml +++ b/test/data/permutations.graphml @@ -1,13 +1,20 @@ - - - - - - - - - + + + + + + + + + + + + + + + + + 27.0 diff --git a/test/data/services.graphml b/test/data/services.graphml index 7b7ffab7b..c9e0a0d01 100644 --- a/test/data/services.graphml +++ b/test/data/services.graphml @@ -5,6 +5,7 @@ + diff --git a/test/data/ten_semi_unconnected.graphml b/test/data/ten_semi_unconnected.graphml index 065be6127..c2277407c 100644 --- a/test/data/ten_semi_unconnected.graphml +++ b/test/data/ten_semi_unconnected.graphml @@ -1,13 +1,20 @@ - - - - - - - - - + + + + + + + + + + + + + + + + + 26.0 diff --git a/test/logging_test.py b/test/logging_test.py new file mode 100755 index 000000000..2b7797970 --- /dev/null +++ b/test/logging_test.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + +import logging +import os +import threading +from datetime import datetime +from pathlib import Path +from subprocess import PIPE, Popen, run + +import requests +from test_base import TestBase + + +class LoggingTest(TestBase): + def __init__(self): + super().__init__() + self.graph_file_path = Path(os.path.dirname(__file__)) / "data" / "logging.graphml" + self.scripts_dir = Path(os.path.dirname(__file__)) / ".." / "resources" / "scripts" + self.connect_logging_process = None + self.connect_logging_thread = None + self.connect_logging_logger = logging.getLogger("cnct_log") + + def run_test(self): + self.start_server() + try: + self.start_logging() + self.setup_network() + self.test_prometheus_and_grafana() + finally: + if self.connect_logging_process is not None: + self.log.info("Terminating background connect_logging.sh process...") + self.connect_logging_process.terminate() + self.stop_server() + + def start_logging(self): + self.log.info("Running install_logging.sh") + # Block until complete + run([f"{self.scripts_dir / 'install_logging.sh'}"]) + self.log.info("Running connect_logging.sh") + # Stays alive in background + self.connect_logging_process = Popen( + [f"{self.scripts_dir / 'connect_logging.sh'}"], + stdout=PIPE, + stderr=PIPE, + bufsize=1, + universal_newlines=True, + ) + self.log.info("connect_logging.sh started...") + self.connect_logging_thread = threading.Thread( + target=self.output_reader, + args=(self.connect_logging_process.stdout, self.connect_logging_logger.info), + ) + self.connect_logging_thread.daemon = True + self.connect_logging_thread.start() + + self.log.info("Waiting for RPC") + self.wait_for_rpc("scenarios_available") + + def setup_network(self): + self.log.info("Setting up network") + self.log.info(self.warcli(f"network start {self.graph_file_path}")) + self.wait_for_all_tanks_status(target="running", timeout=10 * 60) + self.wait_for_all_edges() + + def make_grafana_api_request(self, ds_uid, start, metric): + self.log.info("Making Grafana request...") + data = { + "queries": [{"expr": metric, "datasource": {"type": "prometheus", "uid": ds_uid}}], + "from": f"{start}", + "to": "now", + } + reply = requests.post("http://localhost:3000/api/ds/query", json=data) + assert reply.status_code == 200 + + # Default ref ID is "A", only inspecting one "frame" + return reply.json()["results"]["A"]["frames"][0]["data"]["values"] + + def test_prometheus_and_grafana(self): + self.log.info("Starting network activity scenarios") + self.warcli("scenarios run miner_std --allnodes --interval=5 --mature") + self.warcli("scenarios run tx_flood --interval=1") + + prometheus_ds = requests.get("http://localhost:3000/api/datasources/name/Prometheus") + assert prometheus_ds.status_code == 200 + prometheus_uid = prometheus_ds.json()["uid"] + self.log.info(f"Got Prometheus data source uid from Grafana: {prometheus_uid}") + + start = int(datetime.now().timestamp() * 1000) + + def get_five_values_for_metric(metric): + data = self.make_grafana_api_request(prometheus_uid, start, metric) + if len(data) < 1: + self.log.info(f"No Grafana data yet for {metric}") + return False + timestamps = data[0] + values = data[1] + self.log.info(f"Grafana data: {metric} times: {timestamps}") + self.log.info(f"Grafana data: {metric} values: {values}") + return len(values) > 5 + + self.wait_for_predicate(lambda: get_five_values_for_metric("blocks")) + self.wait_for_predicate(lambda: get_five_values_for_metric("txrate")) + + +if __name__ == "__main__": + test = LoggingTest() + test.run_test()