cometbft · hvanz · Apr 4, 2024 · Mar 21, 2024 · Mar 21, 2024 · Mar 21, 2024
@@ -17,8 +17,8 @@ This directory contains some utility scripts used in the reporting/QA.
 
 ## Setup
 
-Execute the following within this directory (the same directory as the
-`latency_throughput.py` file).
+Before running the Python scripts, execute the following within this directory (the same directory
+as the `latency_throughput.py` file).
 
 ```bash
 # Create a virtual environment into which to install your dependencies
@@ -32,46 +32,35 @@ pip install -r requirements.txt
 ```
 
 ## Latency vs Throughput Plotting
-To show the instructions and parameter options, execute 
 
+To show the instructions and parameter options, execute 
 ```bash
-./latency_throughput.py --help
+python3 latency_throughput.py --help
 ```
+Be sure that the virtual environment is enabled before running the script.
 
-Example:
-
+For example, the following command will generate a PNG file called `cmt_v1.png` in the current
+directory based on the `raw.csv` file generated by the reporting tool. The `-t` flag overrides the
+default title at the top of the plot.
 ```bash
-# Do the following while ensuring that the virtual environment is activated (see
-# the Setup steps).
-#
-# This will generate a plot in a PNG file called 'tm034.png' in the current
-# directory based on the reporting tool CSV output in the "raw.csv" file. The
-# '-t' flag overrides the default title at the top of the plot.
-
-./latency_throughput.py \
-    -t 'CometBFT v0.34.x Latency vs Throughput' \
-    ./tm034.png \
-    /path/to/csv/files/raw.csv
+./latency_throughput.py -t 'CometBFT v1.x Latency vs Throughput' ./cmt_v1.png /path/to/results/raw.csv
 ```
 
 ## Latency vs Throughput Plotting (version 2)
-Example:
 
+The `latency_plotter.py` script generates a series of plots in the `imgs` folder.
+Plots include combined experiment plots and experiments as subplots.
+- `all_experiments`: plots of all experiments as individual subplots.
+- `all_configs`: plots of all experiments, grouped by configuration (r,c).
+- `cXrY.png`: Independent plot of experiments of configuration (c=X,r=Y) as different curves.
+- `cXrY_merged.png`: Independent plot of experiments of configuration (c=X,r=Y) combined as single curve.
+- `e_ID.png`: independent plot with just experiment with id ID as a single curve.
+
+Example:
 ```bash
-# Do the following while ensuring that the virtual environment is activated (see
-# the Setup steps).
-#
-# This will generate a series of plots in the `imgs` folder.
-# Plots include combined experiment plots and experiments as subplots.
-# - all_experiments - plots of all experiments as individual subplots.
-# - all_configs - plots of all experiments, grouped by configuration (r,c).
-# cXrY.png - Independent plot of experiments of configuration (c=X,r=Y) as different curves.
-# cXrY_merged.png - Independent plot of experiments of configuration (c=X,r=Y) combined as single curve.
-# e_ID.png - independent plot with just experiment with id ID as a single curve.
-
-mkdir -p imgs
-python3 latency_plotter.py /path/to/csv/files/raw.csv
+python3 latency_plotter.py v1.0.0-alpha.2 /path/to/results/raw.csv
 ```
+Be sure that the virtual environment is enabled before running the script.
 
 ## Prometheus metrics
 

@@ -1,150 +1,168 @@
 import sys
 import os
-from datetime import datetime
 import pytz
+from datetime import datetime
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
-
 import numpy as np
 import pandas as pd
 
-release = 'v0.38.0-alpha2'
+IMAGES_DIR = 'imgs'
+
+
+def usage():
+    print(f"Usage: {sys.argv[0]} release_name raw_csv_path")
+    exit(1)
+
 
 #FIXME: figure out in which timezone prometheus was running to adjust to UTC.
-tz = pytz.timezone('America/Sao_Paulo')
-
-if len(sys.argv) != 2:
-    print('Pls provide the raw.csv file')
-    exit()
-else:
-    csvpath = sys.argv[1]
-    if not os.path.exists(csvpath):
-       print('Pls provide a valid the raw.csv file')
-       exit()
+tz = pytz.timezone('UTC')
+
+
+def plot_all_experiments(release, csv):
+    # Group by experiment
+    groups = csv.groupby(['experiment_id'])
+
+    # number of rows and columns in the graph
+    ncols = 2 if groups.ngroups > 1 else 1
+    nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=False)
+    fig.tight_layout(pad=5.0)
+
+    # Plot experiments as subplots 
+    for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
+        group = groups.get_group(key)
+        ax.set_ylabel('latency (s)')
+        ax.set_xlabel('experiment time (s)')
+        ax.set_title(key)
+        ax.grid(True)
+
+        # Group by connection number and transaction rate
+        paramGroups = group.groupby(['connections','rate'])
+        for (subKey) in paramGroups.groups.keys():
+            subGroup = paramGroups.get_group(subKey)
+            startTime = subGroup.block_time.min()
+            endTime = subGroup.block_time.max()
+            localStartTime = tz.localize(datetime.fromtimestamp(startTime)).astimezone(pytz.utc)
+            localEndTime  = tz.localize(datetime.fromtimestamp(endTime)).astimezone(pytz.utc)
+            subGroup.block_time.apply(lambda x: x - startTime )
+            mean = subGroup.duration_ns.mean()
+            print('exp', key ,'start', localEndTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'end', localStartTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'duration', endTime - startTime, "mean", mean)
+
+            (con,rate) = subKey
+            label = 'c='+str(con) + ' r='+ str(rate)
+            ax.axhline(y = mean, color = 'r', linestyle = '-', label="mean")
+            ax.scatter(subGroup.block_time, subGroup.duration_ns, label=label)
+        ax.legend()
+
+        # Save individual axes
+        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+        img_path = os.path.join(IMAGES_DIR, f'e_{key}.png')
+        fig.savefig(img_path, bbox_inches=extent.expanded(1.2, 1.3))
+
+    fig.suptitle('Vote Extensions Testnet - ' + release)
+
+    # Save the figure with subplots
+    fig.savefig(os.path.join(IMAGES_DIR, 'all_experiments.png'))
+
+
+def plot_all_configs(release, csv):
+    # Group by configuration
+    groups = csv.groupby(['connections','rate'])
+
+    # number of rows and columns in the graph
+    ncols = 2 if groups.ngroups > 1 else 1
+    nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
+    fig.tight_layout(pad=5.0)
+
+    # Plot configurations as subplots 
+    for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
+        group = groups.get_group(key)
+        ax.set_ylabel('latency (s)')
+        ax.set_xlabel('experiment time (s)')
+        ax.grid(True)
+        (con,rate) = key
+        label = 'c='+str(con) + ' r='+ str(rate)
+        ax.set_title(label)
+
+        # Group by experiment 
+        paramGroups = group.groupby(['experiment_id'])
+        for (subKey) in paramGroups.groups.keys():
+            subGroup = paramGroups.get_group((subKey,))
+            startTime = subGroup.block_time.min()
+            subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
+            ax.scatter(subGroupMod, subGroup.duration_ns, label=label)
+        #ax.legend()
 
-    print(csvpath)
-
-path = os.path.join('imgs')
-
-#Load the CSV
-csv = pd.read_csv(csvpath)
-
-#Transform ns to s in the latency/duration
-csv['duration_ns'] = csv['duration_ns'].apply(lambda x: x/10**9)
-csv['block_time'] = csv['block_time'].apply(lambda x: x/10**9)
-
-#Group by experiment
-groups = csv.groupby(['experiment_id'])
-
-#number of rows and columns in the graph
-ncols = 2 if groups.ngroups > 1 else 1
-nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
-fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=False)
-fig.tight_layout(pad=5.0)
-
-
-#Plot experiments as subplots 
-for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
-    group = groups.get_group(key)
-    ax.set_ylabel('latency (s)')
-    ax.set_xlabel('experiment time (s)')
-    ax.set_title(key)
-    ax.grid(True)
-
-    #Group by connection number and transaction rate
-    paramGroups = group.groupby(['connections','rate'])
-    for (subKey) in paramGroups.groups.keys():
-        subGroup = paramGroups.get_group(subKey)
-        startTime = subGroup.block_time.min()
-        endTime = subGroup.block_time.max()
-        localStartTime = tz.localize(datetime.fromtimestamp(startTime)).astimezone(pytz.utc)
-        localEndTime  = tz.localize(datetime.fromtimestamp(endTime)).astimezone(pytz.utc)
-        subGroup.block_time.apply(lambda x: x - startTime )
-        mean = subGroup.duration_ns.mean()
-        print('exp', key ,'start', localEndTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'end', localStartTime.strftime("%Y-%m-%dT%H:%M:%SZ"), 'duration', endTime - startTime, "mean", mean)
-
-        (con,rate) = subKey
+
+        #Save individual axes
+        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+        img_path = os.path.join(IMAGES_DIR, f'c{con}r{rate}.png')
+        fig.savefig(img_path, bbox_inches=extent.expanded(1.2, 1.3))
+
+    fig.suptitle('Vote Extensions Testnet - ' + release)
+
+    # Save the figure with subplots
+    fig.savefig(os.path.join(IMAGES_DIR, 'all_configs.png'))
+
+
+def plot_merged(release, csv):
+    # Group by configuration
+    groups = csv.groupby(['connections','rate'])
+
+    # number of rows and columns in the graph
+    ncols = 2 if groups.ngroups > 1 else 1
+    nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
+    fig.tight_layout(pad=5.0)
+
+    # Plot configurations as subplots 
+    for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
+        group = groups.get_group(key)
+        ax.set_ylabel('latency (s)')
+        ax.set_xlabel('experiment time (s)')
+        ax.grid(True)
+        (con,rate) = key
         label = 'c='+str(con) + ' r='+ str(rate)
-        ax.axhline(y = mean, color = 'r', linestyle = '-', label="mean")
-        ax.scatter(subGroup.block_time, subGroup.duration_ns, label=label)
-    ax.legend()
-
-    #Save individual axes
-    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
-    fig.savefig(os.path.join(path,'e_'+key + '.png'), bbox_inches=extent.expanded(1.2, 1.3))
-
-fig.suptitle('Vote Extensions Testnet - ' + release)
-
-# Save the figure with subplots
-fig.savefig(os.path.join(path,'all_experiments.png'))
-
-
-
-#Group by configuration
-groups = csv.groupby(['connections','rate'])
-
-#number of rows and columns in the graph
-ncols = 2 if groups.ngroups > 1 else 1
-nrows = int( np.ceil(groups.ngroups / ncols)) if groups.ngroups > 1 else 1
-fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
-fig.tight_layout(pad=5.0)
-
-#Plot configurations as subplots 
-for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
-    group = groups.get_group(key)
-    ax.set_ylabel('latency (s)')
-    ax.set_xlabel('experiment time (s)')
-    ax.grid(True)
-    (con,rate) = key
-    label = 'c='+str(con) + ' r='+ str(rate)
-    ax.set_title(label)
-
-    #Group by experiment 
-    paramGroups = group.groupby(['experiment_id'])
-    for (subKey) in paramGroups.groups.keys():
-        subGroup = paramGroups.get_group(subKey)
-        startTime = subGroup.block_time.min()
-        subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
-        ax.scatter(subGroupMod, subGroup.duration_ns, label=label)
-    #ax.legend()
-
-
-    #Save individual axes
-    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
-    fig.savefig(os.path.join(path,'c'+str(con) + 'r'+ str(rate) + '.png'), bbox_inches=extent.expanded(1.2, 1.3))
-
-fig.suptitle('Vote Extensions Testnet - ' + release)
-
-
-# Save the figure with subplots
-fig.savefig(os.path.join(path,'all_configs.png'))
-
-
-fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6*ncols, 4*nrows), sharey=True)
-fig.tight_layout(pad=5.0)
-
-#Plot configurations as subplots 
-for (key,ax) in zip(groups.groups.keys(), [axes] if ncols == 1 else axes.flatten()):
-    group = groups.get_group(key)
-    ax.set_ylabel('latency (s)')
-    ax.set_xlabel('experiment time (s)')
-    ax.grid(True)
-    (con,rate) = key
-    label = 'c='+str(con) + ' r='+ str(rate)
-    ax.set_title(label)
-
-    #Group by experiment, but merge them as a single experiment
-    paramGroups = group.groupby(['experiment_id'])
-    for (subKey) in paramGroups.groups.keys():
-        subGroup = paramGroups.get_group(subKey)
-        startTime = subGroup.block_time.min()
-        subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
-        ax.scatter(subGroupMod, subGroup.duration_ns, marker='o',c='#1f77b4')
-
-    #Save individual axes
-    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
-    (con,rate) = key
-    fig.savefig(os.path.join(path,'c'+str(con) + 'r'+ str(rate) + '_merged.png'), bbox_inches=extent)
-
-plt.show()
+        ax.set_title(label)
+
+        # Group by experiment, but merge them as a single experiment
+        paramGroups = group.groupby(['experiment_id'])
+        for (subKey) in paramGroups.groups.keys():
+            subGroup = paramGroups.get_group((subKey,))
+            startTime = subGroup.block_time.min()
+            subGroupMod = subGroup.block_time.apply(lambda x: x - startTime)
+            ax.scatter(subGroupMod, subGroup.duration_ns, marker='o',c='#1f77b4')
+
+        # Save individual axes
+        extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
+        (con, rate) = key
+        img_path = os.path.join(IMAGES_DIR, f'c{con}r{rate}_merged.png')
+        fig.savefig(img_path, bbox_inches=extent)
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2 or not (sys.argv[1] and sys.argv[2]):
+        usage()
+    release = sys.argv[1]
+    csv_path = sys.argv[2]
+
+    if not os.path.exists(csv_path):
+        print('Please provide a valid raw.csv file')
+        exit()
+    csv = pd.read_csv(csv_path)
+
+    # Transform ns to s in the latency/duration
+    csv['duration_ns'] = csv['duration_ns'].apply(lambda x: x/10**9)
+    csv['block_time'] = csv['block_time'].apply(lambda x: x/10**9)
+
+    if not os.path.exists(IMAGES_DIR):
+        os.makedirs(IMAGES_DIR)
+
+    plot_all_experiments(release, csv)
+    plot_all_configs(release, csv)
+    plot_merged(release, csv)