From 9b13071fbefef8d1a1d723de93538e5d8818519c Mon Sep 17 00:00:00 2001
From: hvanz <hernan.vanzetto@gmail.com>
Date: Thu, 21 Mar 2024 11:52:08 +0100
Subject: [PATCH 1/9] Improve prometheus_plotter.py

---
 docs/references/qa/method.md               |   8 +-
 scripts/qa/reporting/README.md             |  27 +-
 scripts/qa/reporting/prometheus_plotter.py | 278 ++++++++++-----------
 3 files changed, 153 insertions(+), 160 deletions(-)

diff --git a/docs/references/qa/method.md b/docs/references/qa/method.md
index 4127473fe5..e55ba5d947 100644
--- a/docs/references/qa/method.md
+++ b/docs/references/qa/method.md
@@ -159,12 +159,14 @@ The CometBFT team should improve it at every iteration to increase the amount of
 [`latency_throughput.py`]: ../../../scripts/qa/reporting/README.md#Latency-vs-Throughput-Plotting
 [`latency_plotter.py`]: ../../../scripts/qa/reporting/README.md#Latency-vs-Throughput-Plotting-version-2
 
-#### Extracting Prometheus Metrics
+#### Extract Prometheus Metrics
 
 1. Stop the prometheus server if it is running as a service (e.g. a `systemd` unit).
-2. Unzip the prometheus database retrieved from the testnet, and move it to replace the
-   local prometheus database.
+2. Unzip the prometheus database retrieved from the testnet.
 3. Start the prometheus server and make sure no error logs appear at start up.
+    ```bash
+    prometheus --storage.tsdb.path=path/to/prometheus/data --config.file=path/to/prometheus.yml
+    ```
 4. Identify the time window you want to plot in your graphs.
 5. Execute the [`prometheus_plotter.py`] script for the time window.
 
diff --git a/scripts/qa/reporting/README.md b/scripts/qa/reporting/README.md
index d8598e2214..1bd0067a6b 100644
--- a/scripts/qa/reporting/README.md
+++ b/scripts/qa/reporting/README.md
@@ -75,23 +75,16 @@ python3 latency_plotter.py /path/to/csv/files/raw.csv
 
 ## Prometheus metrics
 
-1. Ensure that Prometheus is running locally and listening on port 9090. 
-2. Tweak the script to your needs
-   1. Adjust the time window
-   2. Select the right fork 
-   3. Select the right test case
-   4. Tweak/add/remove metrics
-3. Run the script as follows
-   ```bash
-   # Do the following while ensuring that the virtual environment is activated (see
-   # the Setup steps).
-   #
-   # This will generate a series of plots in the folder `imgs` of the current folder.
-
-   mkdir imgs
-   python3 prometheus_plotter.py
-   ```
-4. Plots are saved in the `imgs` folder.
+The `prometheus_plotter.py` script generates a series of plots in the folder `imgs` of the current folder.
+
+Before running the script, check that a Prometheus server in `localhost:9090`. This is the default URL hardcoded in the script.
+
+Run the script from the virtual environment as follows:
+```bash
+python3 prometheus_plotter.py <release_name> <start_time> <window_size> <test_case>
+```
+
+For details and examples of how to run the script, just run `python3 prometheus_plotter.py` 
 
 [matplotlib]: https://matplotlib.org/
 [pandas]: https://pandas.pydata.org
diff --git a/scripts/qa/reporting/prometheus_plotter.py b/scripts/qa/reporting/prometheus_plotter.py
index fbc62050f8..1c5e6bcc4d 100644
--- a/scripts/qa/reporting/prometheus_plotter.py
+++ b/scripts/qa/reporting/prometheus_plotter.py
@@ -1,151 +1,149 @@
 # pip install numpy pandas matplotlib requests 
-
-import sys
 import os
+import requests 
+import sys
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import matplotlib.dates as md
-
 import numpy as np 
 import pandas as pd 
 
-import requests 
 from urllib.parse import urljoin
-
-from prometheus_pandas import query
-
-#release = 'v0.37.0-alpha.2'
-release = 'v0.38.0-alpha.2'
-path = os.path.join('imgs')
-prometheus = query.Prometheus('http://localhost:9090')
-
-# Time window
-#window_size = dict(seconds=150) #CMT 0.37.x-alpha3
-#window_size = dict(seconds=126) #TM v0.37 (200 nodes) baseline
-#window_size = dict(hours=1, minutes=28, seconds=25) #TM v0.37.0-alpha.2 (rotating)
-#window_size = dict(seconds=130) #homogeneous
-#window_size = dict(seconds=127) #baseline
-#window_size = dict(seconds=115) #CMT v0.38.0-alpha.2 (200 nodes)
-#window_size = dict(hours=1, minutes=46) #CMT v0.38.0-alpha.2 (rotating)
-window_size = dict(seconds=150) #CMT v0.38.0-alpha.2 (ve baseline)
-
-ext_window_size = dict(seconds=200)
-
-# Use the time provided by latency_plotter for the selected experiment.
-#left_end = '2023-02-08T13:12:20Z' #cmt2 tm1
-#left_end = '2023-02-08T10:31:50Z' #cmt1 tm2
-#left_end = '2023-02-14T15:18:00Z' #cmt1 tm1
-#left_end = '2023-02-07T18:07:00Z' #homogeneous
-#left_end = '2022-10-13T19:41:23Z' #baseline
-#left_end = '2023-02-22T18:56:29Z' #CMT v0.37.x-alpha3
-#left_end = '2022-10-13T15:57:50Z' #TM v0.37 (200 nodes) baseline
-#left_end = '2023-03-20T19:45:35Z' #feature/abci++vef merged with main (7d8c9d426)
-#left_end = '2023-05-22T09:39:20Z' #CMT v0.38.0-alpha.2 - 200 nodes
-#left_end = '2022-10-10T15:47:15Z' #TM v0.37.0-alpha.2 - rotating
-#left_end = '2023-05-23T08:09:50Z' #CMT v0.38.0-alpha.2 - rotating
-
-#left_end = '2023-05-25T18:18:04Z' #CMT v0.38.0-alpha.2 - ve baseline
-#left_end = '2023-05-30T19:05:32Z' #CMT v0.38.0-alpha.2 - ve 2k
-left_end = '2023-05-30T20:44:46Z' #CMT v0.38.0-alpha.2 - ve 4k
-#left_end = '2023-05-25T19:42:08Z' #CMT v0.38.0-alpha.2 - ve 8k
-#left_end = '2023-05-26T00:28:12Z' #CMT v0.38.0-alpha.2 - ve 16k
-#left_end = '2023-05-26T02:12:27Z' #CMT v0.38.0-alpha.2 - ve 32k
-
-useManualrightEnd = False
-if useManualrightEnd: 
-   #right_end = '2023-05-25T18:54:04Z' #CMT v0.38.0-alpha.2 - ve baseline
-   #right_end = '2023-05-30T19:40:41Z' #CMT v0.38.0-alpha.2 - ve 2k
-   right_end = '2023-05-30T21:15:37Z' #CMT v0.38.0-alpha.2 - ve 4k
-   #right_end = '2023-05-25T20:16:00Z' #CMT v0.38.0-alpha.2 - ve 8k
-   #right_end = '2023-05-26T01:01:57Z' #CMT v0.38.0-alpha.2 - ve 16k 
-   #right_end = '2023-05-26T02:46:19Z' #CMT v0.38.0-alpha.2 - ve 32k 
-   time_window = (left_end, right_end)
-else:
-   right_end = pd.to_datetime(left_end) + pd.Timedelta(**window_size)
-   time_window = (left_end, right_end.strftime('%Y-%m-%dT%H:%M:%SZ'))
-
-ext_right_end = pd.to_datetime(left_end) + pd.Timedelta(**ext_window_size)
-ext_time_window = (left_end, ext_right_end.strftime('%Y-%m-%dT%H:%M:%SZ'))
-
-
-fork='cometbft'
-#fork='tendermint'
-
-# Do prometheus queries, depending on the test case
-queries200Nodes = [
-    (( fork + '_mempool_size',                     time_window[0], time_window[1], '1s'), 'mempool_size',              dict(ylabel='TXs',               xlabel='time (s)', title='Mempool Size',                   legend=False, figsize=(10,6), grid=True, kind='area',stacked=True), False),
-    (( fork + '_p2p_peers',                        time_window[0], time_window[1], '1s'), 'peers',                     dict(ylabel='# Peers',           xlabel='time (s)', title='Peers',                          legend=False, figsize=(10,6), grid=True), True),
-    (( 'avg(' + fork + '_mempool_size)',                time_window[0], time_window[1], '1s'), 'avg_mempool_size',          dict(ylabel='TXs',               xlabel='time (s)', title='Average Mempool Size',           legend=False, figsize=(10,6), grid=True), False),
-    #(( 'cometbft_consensus_height',                 time_window[0], time_window[1], '1s'), 'blocks_regular',           dict(ylabel='# Blocks',          xlabel='time (s)', title='Blocks in time',                 legend=False, figsize=(10,6), grid=True), False), 
-    (( fork + '_consensus_rounds',                 time_window[0], time_window[1], '1s'), 'rounds',                    dict(ylabel='# Rounds',          xlabel='time (s)', title='Rounds per block',               legend=False, figsize=(10,6), grid=True), False),
-    (( 'rate(' + fork + '_consensus_height[20s])*60',    time_window[0], time_window[1], '1s'), 'block_rate_regular',        dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), True),
-    #(( 'avg(rate(cometbft_consensus_height[20s])*60)',    time_window[0], time_window[1], '1s'), 'block_rate_avg_reg',   dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), False),
-    #(( 'cometbft_consensus_total_txs',              time_window[0], time_window[1], '1s'), 'total_txs_regular',        dict(ylabel='# TXs',             xlabel='time (s)', title='Transactions in time',           legend=False, figsize=(10,6), grid=True), False),
-    (( 'rate(' + fork + '_consensus_total_txs[20s])*60', time_window[0], time_window[1], '1s'), 'total_txs_rate_regular',    dict(ylabel='TXs/min',           xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), True),
-    #(( 'avg(rate(cometbft_consensus_total_txs[20s])*60)', time_window[0], time_window[1], '1s'), 'total_txs_rate_avg_reg',   dict(ylabel='TXs/min',       xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), False),
-    (( 'process_resident_memory_bytes',             time_window[0], time_window[1], '1s'), 'memory',                    dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Memory usage',                   legend=False, figsize=(10,6), grid=True), False),
-    (( 'avg(process_resident_memory_bytes)',        time_window[0], time_window[1], '1s'), 'avg_memory',                dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Average Memory usage',           legend=False, figsize=(10,6), grid=True), False),
-    (( 'node_load1',                                time_window[0], time_window[1], '1s'), 'cpu',                       dict(ylabel='Load',              xlabel='time (s)', title='Node load',                      legend=False, figsize=(10,6), grid=True), False), 
-    (( 'avg(node_load1)',                           time_window[0], time_window[1], '1s'), 'avg_cpu',                   dict(ylabel='Load',              xlabel='time (s)', title='Average Node load',              legend=False, figsize=(10,6), grid=True), False),
-    #extended window metrics
-    (( fork + '_consensus_height',                 ext_time_window[0], ext_time_window[1], '1s'), 'blocks',            dict(ylabel='# Blocks',          xlabel='time (s)', title='Blocks in time',                 legend=False, figsize=(10,6), grid=True), False), 
-    (( 'rate(' + fork + '_consensus_height[20s])*60',    ext_time_window[0], ext_time_window[1], '1s'), 'block_rate',        dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), True),
-    (( fork + '_consensus_total_txs',              ext_time_window[0], ext_time_window[1], '1s'), 'total_txs',         dict(ylabel='# TXs',             xlabel='time (s)', title='Transactions in time',           legend=False, figsize=(10,6), grid=True), False),
-    (( 'rate(' + fork + '_consensus_total_txs[20s])*60', ext_time_window[0], ext_time_window[1], '1s'), 'total_txs_rate',    dict(ylabel='TXs/min',           xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), True),
-]
-
-queriesRotating = [
-    (( 'rate(' + fork + '_consensus_height[20s])*60',    time_window[0], time_window[1], '1s'), 'rotating_block_rate',    dict(ylabel='blocks/min',     xlabel='time', title='Rate of Block Creation',         legend=False, figsize=(10,6), grid=True), False),
-    (( 'rate(' + fork + '_consensus_total_txs[20s])*60', time_window[0], time_window[1], '1s'), 'rotating_txs_rate',      dict(ylabel='TXs/min',        xlabel='time', title='Rate of Transaction processing', legend=False, figsize=(10,6), grid=True), False),
-    (( fork + '_consensus_height{job=~"ephemeral.*"} or ' + fork + '_blocksync_latest_block_height{job=~"ephemeral.*"}',
+from prometheus_pandas import query as prometheus_query
+
+
+PROMETHEUS_URL = 'http://localhost:9090'
+IMAGES_DIR = 'imgs'
+TEST_CASES = ['200_nodes', 'rotating', 'vote_extensions']
+
+
+def usage():
+    print("Usage:")
+    print(f"\t{sys.argv[0]} release_name start_time window_size test_case")
+    print("where:")
+    print(f"- start_time is a UTF time in '%Y-%m-%dT%H:%M:%SZ' format")
+    print(f"- window size is in seconds")
+    print(f"- test_case is one of {TEST_CASES}")
+    print(f"Example: \t{sys.argv[0]} v1.0.0-alpha.2 2024-03-21T08:45:23Z 180 200_nodes")
+    exit(1)
+
+
+def queries_200_nodes(time_window, ext_time_window):
+    return [
+        (( 'cometbft_mempool_size',                           time_window[0], time_window[1], '1s'), 'mempool_size',              dict(ylabel='TXs',               xlabel='time (s)', title='Mempool Size',                   legend=False, figsize=(10,6), grid=True, kind='area',stacked=True), False),
+        (( 'cometbft_p2p_peers',                              time_window[0], time_window[1], '1s'), 'peers',                     dict(ylabel='# Peers',           xlabel='time (s)', title='Peers',                          legend=False, figsize=(10,6), grid=True), True),
+        (( 'avg(cometbft_mempool_size)',                      time_window[0], time_window[1], '1s'), 'avg_mempool_size',          dict(ylabel='TXs',               xlabel='time (s)', title='Average Mempool Size',           legend=False, figsize=(10,6), grid=True), False),
+        #(( 'cometbft_consensus_height',                       time_window[0], time_window[1], '1s'), 'blocks_regular',            dict(ylabel='# Blocks',           xlabel='time (s)', title='Blocks in time',                 legend=False, figsize=(10,6), grid=True), False), 
+        (( 'cometbft_consensus_rounds',                       time_window[0], time_window[1], '1s'), 'rounds',                    dict(ylabel='# Rounds',          xlabel='time (s)', title='Rounds per block',               legend=False, figsize=(10,6), grid=True), False),
+        (( 'rate(cometbft_consensus_height[20s])*60',         time_window[0], time_window[1], '1s'), 'block_rate_regular',        dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), True),
+        #(( 'avg(rate(cometbft_consensus_height[20s])*60)',    time_window[0], time_window[1], '1s'), 'block_rate_avg_reg',        dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), False),
+        #(( 'cometbft_consensus_total_txs',                    time_window[0], time_window[1], '1s'), 'total_txs_regular',         dict(ylabel='# TXs',             xlabel='time (s)', title='Transactions in time',           legend=False, figsize=(10,6), grid=True), False),
+        (( 'rate(cometbft_consensus_total_txs[20s])*60',      time_window[0], time_window[1], '1s'), 'total_txs_rate_regular',    dict(ylabel='TXs/min',           xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), True),
+        #(( 'avg(rate(cometbft_consensus_total_txs[20s])*60)', time_window[0], time_window[1], '1s'), 'total_txs_rate_avg_reg',    dict(ylabel='TXs/min',           xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), False),
+        (( 'process_resident_memory_bytes',                   time_window[0], time_window[1], '1s'), 'memory',                    dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Memory usage',                   legend=False, figsize=(10,6), grid=True), False),
+        (( 'avg(process_resident_memory_bytes)',              time_window[0], time_window[1], '1s'), 'avg_memory',                dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Average Memory usage',           legend=False, figsize=(10,6), grid=True), False),
+        (( 'node_load1',                                      time_window[0], time_window[1], '1s'), 'cpu',                       dict(ylabel='Load',              xlabel='time (s)', title='Node load',                      legend=False, figsize=(10,6), grid=True), False), 
+        (( 'avg(node_load1)',                                 time_window[0], time_window[1], '1s'), 'avg_cpu',                   dict(ylabel='Load',              xlabel='time (s)', title='Average Node load',              legend=False, figsize=(10,6), grid=True), False),
+        
+        # Extended window metrics
+        (( 'cometbft_consensus_height',                       ext_time_window[0], ext_time_window[1], '1s'), 'blocks',            dict(ylabel='# Blocks',          xlabel='time (s)', title='Blocks in time',                 legend=False, figsize=(10,6), grid=True), False), 
+        (( 'rate(cometbft_consensus_height[20s])*60',         ext_time_window[0], ext_time_window[1], '1s'), 'block_rate',        dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), True),
+        (( 'cometbft_consensus_total_txs',                    ext_time_window[0], ext_time_window[1], '1s'), 'total_txs',         dict(ylabel='# TXs',             xlabel='time (s)', title='Transactions in time',           legend=False, figsize=(10,6), grid=True), False),
+        (( 'rate(cometbft_consensus_total_txs[20s])*60',      ext_time_window[0], ext_time_window[1], '1s'), 'total_txs_rate',    dict(ylabel='TXs/min',           xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), True),
+    ]
+
+
+def queries_rotating(time_window):
+    return [
+        (( 'rate(cometbft_consensus_height[20s])*60',    time_window[0], time_window[1], '1s'), 'rotating_block_rate',    dict(ylabel='blocks/min',     xlabel='time', title='Rate of Block Creation',         legend=False, figsize=(10,6), grid=True), False),
+        (( 'rate(cometbft_consensus_total_txs[20s])*60', time_window[0], time_window[1], '1s'), 'rotating_txs_rate',      dict(ylabel='TXs/min',        xlabel='time', title='Rate of Transaction processing', legend=False, figsize=(10,6), grid=True), False),
+        (( 'cometbft_consensus_height{job=~"ephemeral.*"} or cometbft_blocksync_latest_block_height{job=~"ephemeral.*"}',
                                                          time_window[0], time_window[1], '1s'), 'rotating_eph_heights',   dict(ylabel='height',         xlabel='time', title='Heights of Ephemeral Nodes',     legend=False, figsize=(10,6), grid=True), False),
-    (( fork + '_p2p_peers',                              time_window[0], time_window[1], '1s'), 'rotating_peers',         dict(ylabel='# peers',        xlabel='time', title='Peers',                          legend=False, figsize=(10,6), grid=True), False),
-    (( 'avg(process_resident_memory_bytes)',             time_window[0], time_window[1], '1s'), 'rotating_avg_memory',    dict(ylabel='memory (bytes)', xlabel='time', title='Average Memory Usage',           legend=False, figsize=(10,6), grid=True), False),
-    (( 'node_load1',                                     time_window[0], time_window[1], '1s'), 'rotating_cpu',           dict(ylabel='load',           xlabel='time', title='Node Load',                      legend=False, figsize=(10,6), grid=True), False),
-]
-
-queriesVExtension= [
-    (( fork + '_mempool_size',                     time_window[0], time_window[1], '1s'), 'mempool_size',              dict(ylabel='TXs',               xlabel='time (s)', title='Mempool Size',                   legend=False, figsize=(10,6), grid=True, kind='area',stacked=True), False),
-    (( fork + '_mempool_size',                     time_window[0], time_window[1], '1s'), 'mempool_size_not_stacked',              dict(ylabel='TXs',               xlabel='time (s)', title='Mempool Size',                   legend=False, figsize=(10,6), grid=True, stacked=False), False),
-    (( fork + '_p2p_peers',                        time_window[0], time_window[1], '1s'), 'peers',                     dict(ylabel='# Peers',           xlabel='time (s)', title='Peers',                          legend=False, figsize=(10,6), grid=True), True),
-    (( 'avg(' + fork + '_mempool_size)',                time_window[0], time_window[1], '1s'), 'avg_mempool_size',          dict(ylabel='TXs',               xlabel='time (s)', title='Average Mempool Size',           legend=False, figsize=(10,6), grid=True), False),
-    (( fork + '_consensus_rounds',                 time_window[0], time_window[1], '1s'), 'rounds',                    dict(ylabel='# Rounds',          xlabel='time (s)', title='Rounds per block',               legend=False, figsize=(10,6), grid=True), False),
-    (( 'process_resident_memory_bytes',             time_window[0], time_window[1], '1s'), 'memory',                    dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Memory usage',                   legend=False, figsize=(10,6), grid=True), False),
-    (( 'avg(process_resident_memory_bytes)',        time_window[0], time_window[1], '1s'), 'avg_memory',                dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Average Memory usage',           legend=False, figsize=(10,6), grid=True), False),
-    (( 'node_load1',                                time_window[0], time_window[1], '1s'), 'cpu',                       dict(ylabel='Load',              xlabel='time (s)', title='Node load',                      legend=False, figsize=(10,6), grid=True), False), 
-    (( 'avg(node_load1)',                           time_window[0], time_window[1], '1s'), 'avg_cpu',                   dict(ylabel='Load',              xlabel='time (s)', title='Average Node load',              legend=False, figsize=(10,6), grid=True), False),
-    (( fork + '_consensus_height',                 time_window[0], time_window[1], '1s'), 'blocks',            dict(ylabel='# Blocks',          xlabel='time (s)', title='Blocks in time',                 legend=False, figsize=(10,6), grid=True), False), 
-    (( 'rate(' + fork + '_consensus_height[20s])*60',    time_window[0], time_window[1], '1s'), 'block_rate',        dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), True),
-    (( fork + '_consensus_total_txs',              time_window[0], time_window[1], '1s'), 'total_txs',         dict(ylabel='# TXs',             xlabel='time (s)', title='Transactions in time',           legend=False, figsize=(10,6), grid=True), False),
-    (( 'rate(' + fork + '_consensus_total_txs[20s])*60', time_window[0], time_window[1], '1s'), 'total_txs_rate',    dict(ylabel='TXs/min',           xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), True),
-]
-
-#queries = queries200Nodes
-#queries = queriesRotating
-queries = queriesVExtension
-
-
-for (query, file_name, pandas_params, plot_average) in queries:
-    print(query)
-
-    data_frame = prometheus.query_range(*query)
-    #Tweak the x ticks
-    data_frame = data_frame.set_index(md.date2num(data_frame.index))
-
-
-    pandas_params["title"] += "  -  " + release
-    ax = data_frame.plot(**pandas_params)
-    if plot_average:
-        average = data_frame.mean(axis=1)
-        data_frame['__average__'] = average
-        pandas_params['lw'] = 8
-        pandas_params['style'] = ['--']
-        pandas_params['color'] = ['red']
-        ax = data_frame['__average__'].plot(**pandas_params)
-
-    ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M:%S'))
-    plt.savefig(os.path.join(path, file_name + '.png'))
-    plt.plot()
-
-plt.show()
+        (( 'cometbft_p2p_peers',                         time_window[0], time_window[1], '1s'), 'rotating_peers',         dict(ylabel='# peers',        xlabel='time', title='Peers',                          legend=False, figsize=(10,6), grid=True), False),
+        (( 'avg(process_resident_memory_bytes)',         time_window[0], time_window[1], '1s'), 'rotating_avg_memory',    dict(ylabel='memory (bytes)', xlabel='time', title='Average Memory Usage',           legend=False, figsize=(10,6), grid=True), False),
+        (( 'node_load1',                                 time_window[0], time_window[1], '1s'), 'rotating_cpu',           dict(ylabel='load',           xlabel='time', title='Node Load',                      legend=False, figsize=(10,6), grid=True), False),
+    ]
+
+
+def queries_vote_extensions(time_window):
+    return [
+        (( 'cometbft_mempool_size',                      time_window[0], time_window[1], '1s'), 'mempool_size',              dict(ylabel='TXs',               xlabel='time (s)', title='Mempool Size',                   legend=False, figsize=(10,6), grid=True, kind='area',stacked=True), False),
+        (( 'cometbft_mempool_size',                      time_window[0], time_window[1], '1s'), 'mempool_size_not_stacked',  dict(ylabel='TXs',               xlabel='time (s)', title='Mempool Size',                   legend=False, figsize=(10,6), grid=True, stacked=False), False),
+        (( 'cometbft_p2p_peers',                         time_window[0], time_window[1], '1s'), 'peers',                     dict(ylabel='# Peers',           xlabel='time (s)', title='Peers',                          legend=False, figsize=(10,6), grid=True), True),
+        (( 'avg(cometbft_mempool_size)',                 time_window[0], time_window[1], '1s'), 'avg_mempool_size',          dict(ylabel='TXs',               xlabel='time (s)', title='Average Mempool Size',           legend=False, figsize=(10,6), grid=True), False),
+        (( 'cometbft_consensus_rounds',                  time_window[0], time_window[1], '1s'), 'rounds',                    dict(ylabel='# Rounds',          xlabel='time (s)', title='Rounds per block',               legend=False, figsize=(10,6), grid=True), False),
+        (( 'process_resident_memory_bytes',              time_window[0], time_window[1], '1s'), 'memory',                    dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Memory usage',                   legend=False, figsize=(10,6), grid=True), False),
+        (( 'avg(process_resident_memory_bytes)',         time_window[0], time_window[1], '1s'), 'avg_memory',                dict(ylabel='Memory (bytes)',    xlabel='time (s)', title='Average Memory usage',           legend=False, figsize=(10,6), grid=True), False),
+        (( 'node_load1',                                 time_window[0], time_window[1], '1s'), 'cpu',                       dict(ylabel='Load',              xlabel='time (s)', title='Node load',                      legend=False, figsize=(10,6), grid=True), False), 
+        (( 'avg(node_load1)',                            time_window[0], time_window[1], '1s'), 'avg_cpu',                   dict(ylabel='Load',              xlabel='time (s)', title='Average Node load',              legend=False, figsize=(10,6), grid=True), False),
+        (( 'cometbft_consensus_height',                  time_window[0], time_window[1], '1s'), 'blocks',                    dict(ylabel='# Blocks',          xlabel='time (s)', title='Blocks in time',                 legend=False, figsize=(10,6), grid=True), False), 
+        (( 'rate(cometbft_consensus_height[20s])*60',    time_window[0], time_window[1], '1s'), 'block_rate',                dict(ylabel='Blocks/min',        xlabel='time (s)', title='Rate of block creation',         legend=False, figsize=(10,6), grid=True), True),
+        (( 'cometbft_consensus_total_txs',               time_window[0], time_window[1], '1s'), 'total_txs',                 dict(ylabel='# TXs',             xlabel='time (s)', title='Transactions in time',           legend=False, figsize=(10,6), grid=True), False),
+        (( 'rate(cometbft_consensus_total_txs[20s])*60', time_window[0], time_window[1], '1s'), 'total_txs_rate',            dict(ylabel='TXs/min',           xlabel='time (s)', title='Rate of transaction processing', legend=False, figsize=(10,6), grid=True), True),
+    ]
+
+
+def main(release, start_time, window_size, test_case):
+    prometheus = prometheus_query.Prometheus(PROMETHEUS_URL)
+
+    end_time = pd.to_datetime(start_time) + pd.Timedelta(**dict(seconds=window_size))
+    time_window = (start_time, end_time.strftime('%Y-%m-%dT%H:%M:%SZ'))
+
+    ext_end_time = pd.to_datetime(start_time) + pd.Timedelta(**dict(seconds=window_size+50))
+    ext_time_window = (start_time, ext_end_time.strftime('%Y-%m-%dT%H:%M:%SZ'))
+
+    # Select queries depending on the test case.
+    match test_case:
+        case "200_nodes": 
+            queries = queries_200_nodes(time_window, ext_time_window)
+        case "rotating": 
+            queries = queries_rotating(time_window)
+        case "vote_extensions": 
+            queries = queries_vote_extensions(time_window)
+        case _:
+            print(f"Error: Unknown test case {test_case}")
+            return
+
+    imgs_dir = os.path.join(IMAGES_DIR, test_case)
+    if not os.path.exists(imgs_dir):
+        os.makedirs(imgs_dir)
+
+    # Query Prometheus and plot images.
+    for (query, file_name, pandas_params, plot_average) in queries:
+        print(f"query: {query}")
+
+        df = prometheus.query_range(*query)
+        #Tweak the x ticks
+        df = df.set_index(md.date2num(df.index))
+
+        if df.empty:
+            print('No data found! Check the timestamps or the query.')
+            continue
+        
+        pandas_params["title"] += "  -  " + release
+        ax = df.plot(**pandas_params)
+        if plot_average:
+            average = df.mean(axis=1)
+            df['__average__'] = average
+            pandas_params['lw'] = 8
+            pandas_params['style'] = ['--']
+            pandas_params['color'] = ['red']
+            ax = df['__average__'].plot(**pandas_params)
+
+        ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M:%S'))
+        plt.savefig(os.path.join(imgs_dir, file_name + '.png'))
+        plt.plot()
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 4 or not (sys.argv[1] and sys.argv[2] and sys.argv[3] and sys.argv[4]):
+        usage()
+
+    release = sys.argv[1]
+    start_time = sys.argv[2]
+    window_size = sys.argv[3]
+    test_case = sys.argv[4]
+    main(release, start_time, int(window_size), test_case)

From 53154027b0baec597617d4d5588cd93c33e0970a Mon Sep 17 00:00:00 2001
From: hvanz <hernan.vanzetto@gmail.com>
Date: Thu, 21 Mar 2024 17:32:23 +0100
Subject: [PATCH 2/9] comment

---
 scripts/qa/reporting/prometheus_plotter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/qa/reporting/prometheus_plotter.py b/scripts/qa/reporting/prometheus_plotter.py
index 1c5e6bcc4d..317e6a77a6 100644
--- a/scripts/qa/reporting/prometheus_plotter.py
+++ b/scripts/qa/reporting/prometheus_plotter.py
@@ -1,4 +1,4 @@
-# pip install numpy pandas matplotlib requests 
+# pip install requests matplotlib numpy pandas prometheus-pandas
 import os
 import requests 
 import sys

From 063edcd2b7529652d5167ec3efdcedcd832867dc Mon Sep 17 00:00:00 2001
From: hvanz <hernan.vanzetto@gmail.com>
Date: Fri, 22 Mar 2024 00:32:23 +0100
Subject: [PATCH 3/9] update latency scripts

---
 scripts/qa/reporting/README.md             |  51 ++--
 scripts/qa/reporting/latency_plotter.py    | 292 +++++++++++----------
 scripts/qa/reporting/latency_throughput.py |  50 ++--
 3 files changed, 198 insertions(+), 195 deletions(-)

diff --git a/scripts/qa/reporting/README.md b/scripts/qa/reporting/README.md
index 1bd0067a6b..2f5e6e70ac 100644
--- a/scripts/qa/reporting/README.md
+++ b/scripts/qa/reporting/README.md
@@ -17,8 +17,8 @@ This directory contains some utility scripts used in the reporting/QA.
 
 ## Setup
 
-Execute the following within this directory (the same directory as the
-`latency_throughput.py` file).
+Before running the Python scripts, execute the following within this directory (the same directory
+as the `latency_throughput.py` file).
 
 ```bash
 # Create a virtual environment into which to install your dependencies
@@ -32,46 +32,35 @@ pip install -r requirements.txt
 ```
 
 ## Latency vs Throughput Plotting
-To show the instructions and parameter options, execute 
 
+To show the instructions and parameter options, execute 
 ```bash
-./latency_throughput.py --help
+python3 latency_throughput.py --help
 ```
+Be sure that the virtual environment is enabled before running the script.
 
-Example:
-
+For example, the following command will generate a PNG file called `cmt_v1.png` in the current
+directory based on the `raw.csv` file generated by the reporting tool. The `-t` flag overrides the
+default title at the top of the plot.
 ```bash
-# Do the following while ensuring that the virtual environment is activated (see
-# the Setup steps).
-#
-# This will generate a plot in a PNG file called 'tm034.png' in the current
-# directory based on the reporting tool CSV output in the "raw.csv" file. The
-# '-t' flag overrides the default title at the top of the plot.
-
-./latency_throughput.py \
-    -t 'CometBFT v0.34.x Latency vs Throughput' \
-    ./tm034.png \
-    /path/to/csv/files/raw.csv
+./latency_throughput.py -t 'CometBFT v1.x Latency vs Throughput' ./cmt_v1.png /path/to/results/raw.csv
 ```
 
 ## Latency vs Throughput Plotting (version 2)
-Example:
 
+The `latency_plotter.py` script generates a series of plots in the `imgs` folder.
+Plots include combined experiment plots and experiments as subplots.
+- `all_experiments`: plots of all experiments as individual subplots.
+- `all_configs`: plots of all experiments, grouped by configuration (r,c).
+- `cXrY.png`: Independent plot of experiments of configuration (c=X,r=Y) as different curves.
+- `cXrY_merged.png`: Independent plot of experiments of configuration (c=X,r=Y) combined as single curve.
+- `e_ID.png`: independent plot with just experiment with id ID as a single curve.
+
+Example:
 ```bash
-# Do the following while ensuring that the virtual environment is activated (see
-# the Setup steps).
-#
-# This will generate a series of plots in the `imgs` folder.
-# Plots include combined experiment plots and experiments as subplots.
-# - all_experiments - plots of all experiments as individual subplots.
-# - all_configs - plots of all experiments, grouped by configuration (r,c).
-# cXrY.png - Independent plot of experiments of configuration (c=X,r=Y) as different curves.
-# cXrY_merged.png - Independent plot of experiments of configuration (c=X,r=Y) combined as single curve.
-# e_ID.png - independent plot with just experiment with id ID as a single curve.
-
-mkdir -p imgs
-python3 latency_plotter.py /path/to/csv/files/raw.csv
+python3 latency_plotter.py v1.0.0-alpha.2 /path/to/results/raw.csv
 ```
+Be sure that the virtual environment is enabled before running the script.
 
 ## Prometheus metrics
 
diff --git a/scripts/qa/reporting/latency_plotter.py b/scripts/qa/reporting/latency_plotter.py
index 3b42eedff8..426d53c931 100644
--- a/scripts/qa/reporting/latency_plotter.py
+++ b/scripts/qa/reporting/latency_plotter.py
@@ -1,150 +1,168 @@
 import sys
 import os
-from datetime import datetime
 import pytz
+from datetime import datetime
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
-
 import numpy as np
 import pandas as pd
 
-release = 'v0.38.0-alpha2'
+IMAGES_DIR = 'imgs'
+
+
+def usage():
+    print(f"Usage: {sys.argv[0]} release_name raw_csv_path")
+    exit(1)
+
 
 #FIXME: figure out in which timezone prometheus was running to adjust to UTC.
-tz = pytz.timezone('America/Sao_Paulo')
-
-if len(sys.argv) != 2:
-    print('Pls provide the raw.csv file')
-    exit()
-else:
-    csvpath = sys.argv[1]
-    if not os.path.exists(csvpath):
-       print('Pls provide a valid the raw.csv file')
-       exit()
+tz = pytz.timezone('UTC')
+
+
+def plot_all_experiments(release, csv):
+    # Group by experiment
+    groups = csv.groupby(['experiment_id'])
+
+    # number of rows and columns in the graph
+    ncols = 2 if groups.ngroups > 1 else 1
+    nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=False)
+    fig.tight_layout(pad=5.0)
+
+    # Plot experiments as subplots 
+    for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
+        group = groups.get_group(key)
+        ax.set_ylabel('latency (s)')
+        ax.set_xlabel('experiment time (s)')
+        ax.set_title(key)
+        ax.grid(True)
+
+        # Group by connection number and transaction rate
+        paramGroups = group.groupby(['connections','rate'])
+        for (subKey) in paramGroups.groups.keys():
+            subGroup = paramGroups.get_group(subKey)
+            startTime = subGroup.block_time.min()
+            endTime = subGroup.block_time.max()
+            localStartTime = tz.localize(datetime.fromtimestamp(startTime)).astimezone(pytz.utc)
+            localEndTime  = tz.localize(datetime.fromtimestamp(endTime)).astimezone(pytz.utc)
+            subGroup.block_time.apply(lambda x: x - startTime )
+            mean = subGroup.duration_ns.mean()
+            print('exp', key ,'start', localEndTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'end', localStartTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'duration', endTime - startTime, "mean", mean)
+
+            (con,rate) = subKey
+            label = 'c='+str(con) + ' r='+ str(rate)
+            ax.axhline(y = mean, color = 'r', linestyle = '-', label="mean")
+            ax.scatter(subGroup.block_time, subGroup.duration_ns, label=label)
+        ax.legend()
+
+        # Save individual axes
+        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+        img_path = os.path.join(IMAGES_DIR, f'e_{key}.png')
+        fig.savefig(img_path, bbox_inches=extent.expanded(1.2, 1.3))
+
+    fig.suptitle('Vote Extensions Testnet - ' + release)
+
+    # Save the figure with subplots
+    fig.savefig(os.path.join(IMAGES_DIR, 'all_experiments.png'))
+
+
+def plot_all_configs(release, csv):
+    # Group by configuration
+    groups = csv.groupby(['connections','rate'])
+
+    # number of rows and columns in the graph
+    ncols = 2 if groups.ngroups > 1 else 1
+    nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
+    fig.tight_layout(pad=5.0)
+
+    # Plot configurations as subplots 
+    for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
+        group = groups.get_group(key)
+        ax.set_ylabel('latency (s)')
+        ax.set_xlabel('experiment time (s)')
+        ax.grid(True)
+        (con,rate) = key
+        label = 'c='+str(con) + ' r='+ str(rate)
+        ax.set_title(label)
+
+        # Group by experiment 
+        paramGroups = group.groupby(['experiment_id'])
+        for (subKey) in paramGroups.groups.keys():
+            subGroup = paramGroups.get_group((subKey,))
+            startTime = subGroup.block_time.min()
+            subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
+            ax.scatter(subGroupMod, subGroup.duration_ns, label=label)
+        #ax.legend()
         
-    print(csvpath)
-
-path = os.path.join('imgs')
-
-#Load the CSV
-csv = pd.read_csv(csvpath)
-
-#Transform ns to s in the latency/duration
-csv['duration_ns'] = csv['duration_ns'].apply(lambda x: x/10**9)
-csv['block_time'] = csv['block_time'].apply(lambda x: x/10**9)
-
-#Group by experiment
-groups = csv.groupby(['experiment_id'])
-
-#number of rows and columns in the graph
-ncols = 2 if groups.ngroups > 1 else 1
-nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
-fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=False)
-fig.tight_layout(pad=5.0)
-
-
-#Plot experiments as subplots 
-for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
-    group = groups.get_group(key)
-    ax.set_ylabel('latency (s)')
-    ax.set_xlabel('experiment time (s)')
-    ax.set_title(key)
-    ax.grid(True)
-
-    #Group by connection number and transaction rate
-    paramGroups = group.groupby(['connections','rate'])
-    for (subKey) in paramGroups.groups.keys():
-        subGroup = paramGroups.get_group(subKey)
-        startTime = subGroup.block_time.min()
-        endTime = subGroup.block_time.max()
-        localStartTime = tz.localize(datetime.fromtimestamp(startTime)).astimezone(pytz.utc)
-        localEndTime  = tz.localize(datetime.fromtimestamp(endTime)).astimezone(pytz.utc)
-        subGroup.block_time.apply(lambda x: x - startTime )
-        mean = subGroup.duration_ns.mean()
-        print('exp', key ,'start', localEndTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'end', localStartTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'duration', endTime - startTime, "mean", mean)
-
-        (con,rate) = subKey
+
+        #Save individual axes
+        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+        img_path = os.path.join(IMAGES_DIR, f'c{con}r{rate}.png')
+        fig.savefig(img_path, bbox_inches=extent.expanded(1.2, 1.3))
+
+    fig.suptitle('Vote Extensions Testnet - ' + release)
+
+    # Save the figure with subplots
+    fig.savefig(os.path.join(IMAGES_DIR, 'all_configs.png'))
+
+
+def plot_merged(release, csv):
+    # Group by configuration
+    groups = csv.groupby(['connections','rate'])
+
+    # number of rows and columns in the graph
+    ncols = 2 if groups.ngroups > 1 else 1
+    nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
+    fig.tight_layout(pad=5.0)
+
+    # Plot configurations as subplots 
+    for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
+        group = groups.get_group(key)
+        ax.set_ylabel('latency (s)')
+        ax.set_xlabel('experiment time (s)')
+        ax.grid(True)
+        (con,rate) = key
         label = 'c='+str(con) + ' r='+ str(rate)
-        ax.axhline(y = mean, color = 'r', linestyle = '-', label="mean")
-        ax.scatter(subGroup.block_time, subGroup.duration_ns, label=label)
-    ax.legend()
-
-    #Save individual axes
-    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
-    fig.savefig(os.path.join(path,'e_'+key + '.png'), bbox_inches=extent.expanded(1.2, 1.3))
-
-fig.suptitle('Vote Extensions Testnet - ' + release)
-
-# Save the figure with subplots
-fig.savefig(os.path.join(path,'all_experiments.png'))
-
-
-
-#Group by configuration
-groups = csv.groupby(['connections','rate'])
-
-#number of rows and columns in the graph
-ncols = 2 if groups.ngroups > 1 else 1
-nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
-fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
-fig.tight_layout(pad=5.0)
-
-#Plot configurations as subplots 
-for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
-    group = groups.get_group(key)
-    ax.set_ylabel('latency (s)')
-    ax.set_xlabel('experiment time (s)')
-    ax.grid(True)
-    (con,rate) = key
-    label = 'c='+str(con) + ' r='+ str(rate)
-    ax.set_title(label)
-
-    #Group by experiment 
-    paramGroups = group.groupby(['experiment_id'])
-    for (subKey) in paramGroups.groups.keys():
-        subGroup = paramGroups.get_group(subKey)
-        startTime = subGroup.block_time.min()
-        subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
-        ax.scatter(subGroupMod, subGroup.duration_ns, label=label)
-    #ax.legend()
-    
-
-    #Save individual axes
-    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
-    fig.savefig(os.path.join(path,'c'+str(con) + 'r'+ str(rate) + '.png'), bbox_inches=extent.expanded(1.2, 1.3))
-
-fig.suptitle('Vote Extensions Testnet - ' + release)
-
-
-# Save the figure with subplots
-fig.savefig(os.path.join(path,'all_configs.png'))
-
-
-fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
-fig.tight_layout(pad=5.0)
-
-#Plot configurations as subplots 
-for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
-    group = groups.get_group(key)
-    ax.set_ylabel('latency (s)')
-    ax.set_xlabel('experiment time (s)')
-    ax.grid(True)
-    (con,rate) = key
-    label = 'c='+str(con) + ' r='+ str(rate)
-    ax.set_title(label)
-
-    #Group by experiment, but merge them as a single experiment
-    paramGroups = group.groupby(['experiment_id'])
-    for (subKey) in paramGroups.groups.keys():
-        subGroup = paramGroups.get_group(subKey)
-        startTime = subGroup.block_time.min()
-        subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
-        ax.scatter(subGroupMod, subGroup.duration_ns, marker='o',c='#1f77b4')
-    
-    #Save individual axes
-    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
-    (con,rate) = key
-    fig.savefig(os.path.join(path,'c'+str(con) + 'r'+ str(rate) + '_merged.png'), bbox_inches=extent)
-
-plt.show()
+        ax.set_title(label)
+
+        # Group by experiment, but merge them as a single experiment
+        paramGroups = group.groupby(['experiment_id'])
+        for (subKey) in paramGroups.groups.keys():
+            subGroup = paramGroups.get_group((subKey,))
+            startTime = subGroup.block_time.min()
+            subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
+            ax.scatter(subGroupMod, subGroup.duration_ns, marker='o',c='#1f77b4')
+        
+        # Save individual axes
+        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+        (con, rate) = key
+        img_path = os.path.join(IMAGES_DIR, f'c{con}r{rate}_merged.png')
+        fig.savefig(img_path, bbox_inches=extent)
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2 or not (sys.argv[1] and sys.argv[2]):
+        usage()
+    release = sys.argv[1]
+    csv_path = sys.argv[2]
+
+    if not os.path.exists(csv_path):
+        print('Please provide a valid raw.csv file')
+        exit()
+    csv = pd.read_csv(csv_path)
+
+    # Transform ns to s in the latency/duration
+    csv['duration_ns'] = csv['duration_ns'].apply(lambda x: x/10**9)
+    csv['block_time'] = csv['block_time'].apply(lambda x: x/10**9)
+
+    if not os.path.exists(IMAGES_DIR):
+        os.makedirs(IMAGES_DIR)
+
+    plot_all_experiments(release, csv)
+    plot_all_configs(release, csv)
+    plot_merged(release, csv)
diff --git a/scripts/qa/reporting/latency_throughput.py b/scripts/qa/reporting/latency_throughput.py
index adaa4b76ca..75bb744fdc 100755
--- a/scripts/qa/reporting/latency_throughput.py
+++ b/scripts/qa/reporting/latency_throughput.py
@@ -15,33 +15,8 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-DEFAULT_TITLE = "CometBFT latency vs throughput"
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Renders a latency vs throughput diagram "
-        "for a set of transactions provided by the loadtime reporting tool",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-t',
-                        '--title',
-                        default=DEFAULT_TITLE,
-                        help='Plot title')
-    parser.add_argument('output_image',
-                        help='Output image file (in PNG format)')
-    parser.add_argument(
-        'input_csv_file',
-        nargs='+',
-        help="CSV input file from which to read transaction data "
-        "- must have been generated by the loadtime reporting tool")
-    args = parser.parse_args()
 
-    logging.basicConfig(format='%(levelname)s\t%(message)s',
-                        stream=sys.stdout,
-                        level=logging.INFO)
-    plot_latency_vs_throughput(args.input_csv_file,
-                               args.output_image,
-                               title=args.title)
+DEFAULT_TITLE = "CometBFT latency vs throughput"
 
 
 def plot_latency_vs_throughput(input_files, output_image, title=DEFAULT_TITLE):
@@ -167,4 +142,25 @@ def compute_experiments_stats(experiments):
 
 
 if __name__ == "__main__":
-    main()
+    parser = argparse.ArgumentParser(
+        description="Renders a latency vs throughput diagram "
+        "for a set of transactions provided by the loadtime reporting tool",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('-t',
+                        '--title',
+                        default=DEFAULT_TITLE,
+                        help='Plot title')
+    parser.add_argument('output_image',
+                        help='Output image file (in PNG format)')
+    parser.add_argument(
+        'input_csv_file',
+        nargs='+',
+        help="CSV input file from which to read transaction data "
+        "- must have been generated by the loadtime reporting tool")
+    args = parser.parse_args()
+
+    logging.basicConfig(format='%(levelname)s\t%(message)s',
+                        stream=sys.stdout,
+                        level=logging.INFO)
+    
+    plot_latency_vs_throughput(args.input_csv_file, args.output_image, title=args.title)

From ce8451724179dc1b8f9586a4c6c6d203ce71a913 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hern=C3=A1n=20Vanzetto?=
 <15466498+hvanz@users.noreply.github.com>
Date: Mon, 25 Mar 2024 16:04:09 +0100
Subject: [PATCH 4/9] Update scripts/qa/reporting/README.md

Co-authored-by: lasaro <lasaro@informal.systems>
---
 scripts/qa/reporting/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/qa/reporting/README.md b/scripts/qa/reporting/README.md
index 2f5e6e70ac..4d85ec7ced 100644
--- a/scripts/qa/reporting/README.md
+++ b/scripts/qa/reporting/README.md
@@ -43,7 +43,7 @@ For example, the following command will generate a PNG file called `cmt_v1.png`
 directory based on the `raw.csv` file generated by the reporting tool. The `-t` flag overrides the
 default title at the top of the plot.
 ```bash
-./latency_throughput.py -t 'CometBFT v1.x Latency vs Throughput' ./cmt_v1.png /path/to/results/raw.csv
+python3 latency_throughput.py -t 'CometBFT v1.x Latency vs Throughput' ./cmt_v1.png /path/to/results/raw.csv
 ```
 
 ## Latency vs Throughput Plotting (version 2)

From c1eca942b9ab6368f777f2c5ac3a1157f8316905 Mon Sep 17 00:00:00 2001
From: hvanz <hernan.vanzetto@gmail.com>
Date: Tue, 26 Mar 2024 08:45:25 +0100
Subject: [PATCH 5/9] Make python files executable

---
 scripts/qa/reporting/README.md             | 10 +++++-----
 scripts/qa/reporting/latency_plotter.py    |  2 ++
 scripts/qa/reporting/latency_throughput.py |  1 +
 scripts/qa/reporting/prometheus_plotter.py |  3 +++
 4 files changed, 11 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 scripts/qa/reporting/latency_plotter.py
 mode change 100644 => 100755 scripts/qa/reporting/prometheus_plotter.py

diff --git a/scripts/qa/reporting/README.md b/scripts/qa/reporting/README.md
index 4d85ec7ced..a285d8c2f0 100644
--- a/scripts/qa/reporting/README.md
+++ b/scripts/qa/reporting/README.md
@@ -1,6 +1,6 @@
 # Reporting Scripts
 
-This directory contains some utility scripts used in the reporting/QA.
+This directory contains some utility scripts used for generating reports of QA processes.
 
 * [`latency_throughput.py`](./latency_throughput.py) is a Python script that uses
     [matplotlib] to plot a graph of transaction latency vs throughput rate based on
@@ -35,7 +35,7 @@ pip install -r requirements.txt
 
 To show the instructions and parameter options, execute 
 ```bash
-python3 latency_throughput.py --help
+./latency_throughput.py --help
 ```
 Be sure that the virtual environment is enabled before running the script.
 
@@ -43,7 +43,7 @@ For example, the following command will generate a PNG file called `cmt_v1.png`
 directory based on the `raw.csv` file generated by the reporting tool. The `-t` flag overrides the
 default title at the top of the plot.
 ```bash
-python3 latency_throughput.py -t 'CometBFT v1.x Latency vs Throughput' ./cmt_v1.png /path/to/results/raw.csv
+./latency_throughput.py -t 'CometBFT v1.x Latency vs Throughput' ./cmt_v1.png /path/to/results/raw.csv
 ```
 
 ## Latency vs Throughput Plotting (version 2)
@@ -58,7 +58,7 @@ Plots include combined experiment plots and experiments as subplots.
 
 Example:
 ```bash
-python3 latency_plotter.py v1.0.0-alpha.2 /path/to/results/raw.csv
+./latency_plotter.py v1.0.0-alpha.2 /path/to/results/raw.csv
 ```
 Be sure that the virtual environment is enabled before running the script.
 
@@ -70,7 +70,7 @@ Before running the script, check that a Prometheus server in `localhost:9090`. T
 
 Run the script from the virtual environment as follows:
 ```bash
-python3 prometheus_plotter.py <release_name> <start_time> <window_size> <test_case>
+./prometheus_plotter.py <release_name> <start_time> <window_size> <test_case>
 ```
 
 For details and examples of how to run the script, just run `python3 prometheus_plotter.py` 
diff --git a/scripts/qa/reporting/latency_plotter.py b/scripts/qa/reporting/latency_plotter.py
old mode 100644
new mode 100755
index 426d53c931..a5c6b320d6
--- a/scripts/qa/reporting/latency_plotter.py
+++ b/scripts/qa/reporting/latency_plotter.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import sys
 import os
 import pytz
diff --git a/scripts/qa/reporting/latency_throughput.py b/scripts/qa/reporting/latency_throughput.py
index 75bb744fdc..c048068171 100755
--- a/scripts/qa/reporting/latency_throughput.py
+++ b/scripts/qa/reporting/latency_throughput.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+
 """
 A simple script to parse the CSV output from the loadtime reporting tool (see
 https://github.com/cometbft/cometbft/tree/main/test/loadtime/cmd/report).
diff --git a/scripts/qa/reporting/prometheus_plotter.py b/scripts/qa/reporting/prometheus_plotter.py
old mode 100644
new mode 100755
index 317e6a77a6..8dbe0ed3a0
--- a/scripts/qa/reporting/prometheus_plotter.py
+++ b/scripts/qa/reporting/prometheus_plotter.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env python3
+
+# Requirements:
 # pip install requests matplotlib numpy pandas prometheus-pandas
 import os
 import requests 

From 10b97c8fa92cd9c484556f2b22cd752d04c0bcb2 Mon Sep 17 00:00:00 2001
From: lasarojc <lasaro@informal.systems>
Date: Tue, 26 Mar 2024 12:13:14 -0300
Subject: [PATCH 6/9] Fix start time of experiments to 0.

---
 scripts/qa/reporting/latency_plotter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/qa/reporting/latency_plotter.py b/scripts/qa/reporting/latency_plotter.py
index a5c6b320d6..80684cb1e1 100755
--- a/scripts/qa/reporting/latency_plotter.py
+++ b/scripts/qa/reporting/latency_plotter.py
@@ -46,10 +46,10 @@ def plot_all_experiments(release, csv):
             subGroup = paramGroups.get_group(subKey)
             startTime = subGroup.block_time.min()
             endTime = subGroup.block_time.max()
+            subGroup.block_time = subGroup.block_time.apply(lambda x: x - startTime )
+            mean = subGroup.duration_ns.mean()
             localStartTime = tz.localize(datetime.fromtimestamp(startTime)).astimezone(pytz.utc)
             localEndTime  = tz.localize(datetime.fromtimestamp(endTime)).astimezone(pytz.utc)
-            subGroup.block_time.apply(lambda x: x - startTime )
-            mean = subGroup.duration_ns.mean()
             print('exp', key ,'start', localEndTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'end', localStartTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'duration', endTime - startTime, "mean", mean)
 
             (con,rate) = subKey

From 76a7c3c719f5a35a76ad2149d44a38265995b423 Mon Sep 17 00:00:00 2001
From: hvanz <hernan.vanzetto@gmail.com>
Date: Wed, 27 Mar 2024 08:53:56 +0100
Subject: [PATCH 7/9] fix logging message

---
 scripts/qa/reporting/latency_plotter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/qa/reporting/latency_plotter.py b/scripts/qa/reporting/latency_plotter.py
index 80684cb1e1..2cedc24106 100755
--- a/scripts/qa/reporting/latency_plotter.py
+++ b/scripts/qa/reporting/latency_plotter.py
@@ -50,7 +50,7 @@ def plot_all_experiments(release, csv):
             mean = subGroup.duration_ns.mean()
             localStartTime = tz.localize(datetime.fromtimestamp(startTime)).astimezone(pytz.utc)
             localEndTime  = tz.localize(datetime.fromtimestamp(endTime)).astimezone(pytz.utc)
-            print('exp', key ,'start', localEndTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'end', localStartTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'duration', endTime - startTime, "mean", mean)
+            print('experiment', key ,'start', localStartTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'end', localEndTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'duration', endTime - startTime, "mean", mean)
 
             (con,rate) = subKey
             label = 'c='+str(con) + ' r='+ str(rate)

From baa63780c2630ce868698b5e6bc79e52dd869f07 Mon Sep 17 00:00:00 2001
From: hvanz <hernan.vanzetto@gmail.com>
Date: Wed, 27 Mar 2024 11:18:27 +0100
Subject: [PATCH 8/9] fix arguments check

---
 scripts/qa/reporting/prometheus_plotter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/qa/reporting/prometheus_plotter.py b/scripts/qa/reporting/prometheus_plotter.py
index 8dbe0ed3a0..0f80625e74 100755
--- a/scripts/qa/reporting/prometheus_plotter.py
+++ b/scripts/qa/reporting/prometheus_plotter.py
@@ -142,7 +142,7 @@ def main(release, start_time, window_size, test_case):
 
 
 if __name__ == "__main__":
-    if len(sys.argv) < 4 or not (sys.argv[1] and sys.argv[2] and sys.argv[3] and sys.argv[4]):
+    if len(sys.argv) < 5 or not (sys.argv[1] and sys.argv[2] and sys.argv[3] and sys.argv[4]):
         usage()
 
     release = sys.argv[1]

From 50e7e72d4b6c567632b4631db5f56d8bf346a2b2 Mon Sep 17 00:00:00 2001
From: hvanz <hernan.vanzetto@gmail.com>
Date: Thu, 4 Apr 2024 18:20:34 +0200
Subject: [PATCH 9/9] revert changes to method.md

---
 docs/references/qa/method.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/docs/references/qa/method.md b/docs/references/qa/method.md
index e55ba5d947..4127473fe5 100644
--- a/docs/references/qa/method.md
+++ b/docs/references/qa/method.md
@@ -159,14 +159,12 @@ The CometBFT team should improve it at every iteration to increase the amount of
 [`latency_throughput.py`]: ../../../scripts/qa/reporting/README.md#Latency-vs-Throughput-Plotting
 [`latency_plotter.py`]: ../../../scripts/qa/reporting/README.md#Latency-vs-Throughput-Plotting-version-2
 
-#### Extract Prometheus Metrics
+#### Extracting Prometheus Metrics
 
 1. Stop the prometheus server if it is running as a service (e.g. a `systemd` unit).
-2. Unzip the prometheus database retrieved from the testnet.
+2. Unzip the prometheus database retrieved from the testnet, and move it to replace the
+   local prometheus database.
 3. Start the prometheus server and make sure no error logs appear at start up.
-    ```bash
-    prometheus --storage.tsdb.path=path/to/prometheus/data --config.file=path/to/prometheus.yml
-    ```
 4. Identify the time window you want to plot in your graphs.
 5. Execute the [`prometheus_plotter.py`] script for the time window.