Skip to content

Commit

Permalink
Merge branch 'scalability/week-43' into 'master'
Browse files Browse the repository at this point in the history
Wrapper around Prometheus and fix Farm deployment scripts

 

See merge request dfinity-lab/public/ic!15613
  • Loading branch information
Dfinity-skaestle committed Oct 26, 2023
2 parents cc1c3a8 + f299d74 commit 7465141
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 16 deletions.
4 changes: 1 addition & 3 deletions scalability/common/farm.py
Expand Up @@ -8,7 +8,6 @@
import sys
import time
import uuid
from pathlib import Path
from typing import List

import gflags
Expand Down Expand Up @@ -381,8 +380,7 @@ def prepare_and_register_config_image(self):
from common import ictools

# Generate config image
p = Path(__file__).parents[2]
path = os.path.join(p, self.artifacts_path, "ic-prep")
path = os.path.join(self.artifacts_path, "ic-prep")
FLAGS.ic_prep_bin = path
self.ic_config = ictools.ic_prep(
subnets=self.ic_node_ipv6s,
Expand Down
25 changes: 16 additions & 9 deletions scalability/common/prometheus.py
Expand Up @@ -18,6 +18,13 @@
"prometheus_url", "https://ic-metrics-prometheus-staging.ch1-obsstage1.dfinity.network", "The URL to the prometheus service."
)

def __json_loads_wrapper(str):
try:
return json.loads(str)
except json.JSONDecodeError:
print(colored("Failed to parse JSON received from Prometheus: " + str, "red"))
raise


class Prometheus(metrics.Metric):
"""Abstraction for collecting prometheus metrics."""
Expand Down Expand Up @@ -129,7 +136,7 @@ def get_http_request_rate_for_timestamp(testnet, load_hosts, timestamp):
)

payload = {"time": timestamp, "query": query}
return json.loads(get_prometheus(payload).text)
return __json_loads_wrapper(get_prometheus(payload).text)


def get_http_request_rate(testnet, load_hosts, t_start, t_end, request_type="query"):
Expand All @@ -144,7 +151,7 @@ def get_http_request_rate(testnet, load_hosts, t_start, t_end, request_type="que
payload = {"start": t_start, "end": t_end, "step": "10s", "query": query}

r = get_prometheus_range(payload)
j = json.loads(r.text)
j = __json_loads_wrapper(r.text)

return j

Expand All @@ -163,7 +170,7 @@ def get_execution_query_latency(testnet, load_hosts, t_start, t_end):
print("Prometheus: {}".format(json.dumps(payload, indent=2)))

r = get_prometheus_range(payload)
j = json.loads(r.text)
j = __json_loads_wrapper(r.text)

return j

Expand All @@ -175,7 +182,7 @@ def get_canister_install_rate(testnet, hosts, timestamp):
q = f'rate(execution_subnet_message_duration_seconds_count{{{common},method_name="ic00_install_code"}}[60s])'

payload = {"time": timestamp, "query": q}
return json.loads(get_prometheus(payload).text)
return __json_loads_wrapper(get_prometheus(payload).text)


def get_num_canisters_installed(testnet, hosts, timestamp):
Expand All @@ -185,7 +192,7 @@ def get_num_canisters_installed(testnet, hosts, timestamp):

q = f'replicated_state_registered_canisters{{{common},status="running"}}'
payload = {"time": timestamp, "query": q}
return json.loads(get_prometheus(payload).text)
return __json_loads_wrapper(get_prometheus(payload).text)


def get_xnet_stream_size(testnet, t_start, t_end):
Expand All @@ -194,7 +201,7 @@ def get_xnet_stream_size(testnet, t_start, t_end):
q = f"mr_stream_messages{{{common}}}"
payload = {"start": t_start, "end": t_end, "step": "10s", "query": q}
r = get_prometheus_range(payload)
return json.loads(r.text)
return __json_loads_wrapper(r.text)


def get_http_request_duration(testnet, hosts: List[str], t_start, t_end, request_type="query", step=60):
Expand All @@ -215,7 +222,7 @@ def get_http_request_duration(testnet, hosts: List[str], t_start, t_end, request
}

r = get_prometheus_range(payload)
data = json.loads(r.text)
data = __json_loads_wrapper(r.text)

print(data)
r = parse(data)
Expand All @@ -242,7 +249,7 @@ def get_finalization_rate(testnet, hosts, t_start, t_end):
}
r = get_prometheus(payload)
print(f"Prometheus response is: {r.text}")
return json.loads(r.text)
return __json_loads_wrapper(r.text)


def get_state_sync_duration(testnet, load_hosts, timestamp):
Expand All @@ -257,7 +264,7 @@ def get_state_sync_duration(testnet, load_hosts, timestamp):
payload = {"time": timestamp, "query": query}

r = get_prometheus(payload)
j = json.loads(r.text)
j = __json_loads_wrapper(r.text)

return j

Expand Down
2 changes: 1 addition & 1 deletion scalability/common/workload_experiment.py
Expand Up @@ -296,7 +296,7 @@ def __wait_for_quiet(self, max_num_iterations: int = 60, sleep_per_iteration_s:
if rate_rps <= self.quiet_rate_rps:
recovered = True

except StatisticsError:
except (StatisticsError, json.JSONDecodeError):
logging.error(f"Failed to parse prometheus response {r} - {logging.traceback.format_exc()}")

time.sleep(sleep_per_iteration_s)
Expand Down
6 changes: 3 additions & 3 deletions scalability/helpers/build-and-run.py
Expand Up @@ -29,9 +29,9 @@ def build_icos():
ic_root = get_ic_root()
if FLAGS.clean:
print(colored("Doing clean build", "green"))
subprocess.check_output(shlex.split("gitlab-ci/container/container-run.sh bazel clean"), cwd=ic_root)
subprocess.check_output(shlex.split("bazel clean"), cwd=ic_root)
subprocess.check_output(
shlex.split("gitlab-ci/container/container-run.sh rm -rf $(bazel info repository_cache)"), cwd=ic_root
shlex.split("rm -rf ./$(bazel info repository_cache)"), cwd=ic_root
)
else:
print(
Expand All @@ -48,7 +48,7 @@ def build_icos():

subprocess.check_output(
shlex.split(
"./gitlab-ci/container/container-run.sh bazel run --config=systest //ic-os/guestos/envs/dev:upload_disk-img"
"bazel run --config=systest //ic-os/guestos/envs/dev:upload_disk-img"
),
cwd=ic_root,
)
Expand Down

0 comments on commit 7465141

Please sign in to comment.