This notebook reads data from the `out` folder and visualizes the results of the JoularJX .csv files with matplotlib and seaborn.

First, we import the necessary libraries.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

We want to read all joularjx-*-filtered-methods-energy.csv from the `out` folder and its subfolders.

In [None]:
out_folder = 'out'
# List all .csv files in the out folder and its subfolders
csv_files = []
for root, dirs, files in os.walk(out_folder):
    for file in files:
        if file.endswith('-filtered-methods-energy.csv'):
            csv_files.append(os.path.relpath(os.path.join(root, file), out_folder)) # Store relative path to out_folder
print(f'Found {len(csv_files)} .csv files:')
csv_files.sort()
for f in csv_files:
    print(f)

The `out` folder contains the results all experiments.
Each experiment folder contains multiple test executions, containing multiple .csv files, one for each LakesideMutual service.

1. customer-core
2. customer-management
3. customer-self-service
4. policy-management

The generated files are named as follows: joularjx-\<id\>-filtered-methods-energy.csv, where \<id>\ is a generated joularjx id.
Each .csv file has two columns, the first column contains method names in the format com.lakesidemutual.\<servicename\>.interfaces.\<class\>.\<method\> and the second column contains the total energy consumption in Joules for that method.
Each row represents a method and the energy consumption for its execution.

We want to read all joularjx-\<id\>-filtered-methods-energy.csv from the `out/<test-run-identifier>` folders, then access the first column to extract the service name and the method name, and the second column to access the energy consumption.
The delimiter in the .csv files is a comma.
We store the data in a two level dictionary, where the first level key is the service name and the second level key is the method name, and the value is the energy consumption for that method.

In [None]:
# Initialize the multi-layer dictionary
experiments_data = {}

for csv_file in csv_files:
    # Split the relative path
    parts = csv_file.split(os.sep)
    if len(parts) < 2:
        continue  # skip any unexpected file structure

    experiment_id = parts[0]
    test_run_id = parts[1]

    # Initialize dictionaries if needed
    experiments_data.setdefault(experiment_id, {})
    experiments_data[experiment_id].setdefault(test_run_id, {})

    # Read the CSV
    df = pd.read_csv(os.path.join(out_folder, csv_file), delimiter=',', header=None, names=['method_full', 'energy'])

    for _, row in df.iterrows():
        # Extract service name and method name
        try:
            parts = row['method_full'].split('.')
            service_name = parts[2]  # com.lakesidemutual.<service>
            method_name = parts[-1]  # last element
        except IndexError:
            continue  # skip malformed rows

        # Initialize service dict if needed
        experiments_data[experiment_id][test_run_id].setdefault(service_name, {})

        # Store energy
        experiments_data[experiment_id][test_run_id][service_name][method_name] = row['energy']

# Example: print energy consumption for one experiment and test run
for exp_id, test_runs in experiments_data.items():
    for test_id, services in test_runs.items():
        print(f"Experiment: {exp_id}, Test run: {test_id}")
        for svc, methods in services.items():
            print(f"  Service: {svc}, Methods: {len(methods)}")
            for mth, energy in methods.items():
                print(f"    {mth}: {energy} J")

We want to visualize the energy consumption for the entire application in a boxplot.
The energy consumption of each test run is aggregated by summing the energy consumption of all methods in all services.

In [None]:
# Prepare a DataFrame for visualization
plot_data = []

for exp_id, test_runs in experiments_data.items():
    for test_id, services in test_runs.items():
        # Sum energy for all methods in all services for this test run
        total_energy = sum(
            energy
            for svc_methods in services.values()
            for energy in svc_methods.values()
        )
        plot_data.append({
            "experiment": exp_id,
            "test_run": test_id,
            "total_energy": total_energy
        })

# Convert to DataFrame
df_plot = pd.DataFrame(plot_data)

# White background, accessible colors
sns.set_theme(style="whitegrid", context="talk", palette="colorblind")

# Loop through experiments
for exp_id in df_plot["experiment"].unique():
    # Filter for this experiment
    df_exp = df_plot[df_plot["experiment"] == exp_id]

    if df_exp.empty:
        print(f"No data found for experiment {exp_id}, skipping.")
        continue

    # Create a new figure
    fig, ax = plt.subplots(figsize=(12, 7), dpi=120)
    # Boxplot (one per experiment)
    sns.boxplot(x="experiment", y="total_energy", data=df_exp, ax=ax, width=0.6)

    # Overlay all test runs as points
    sns.swarmplot(x="experiment", y="total_energy", data=df_exp,
                  color="black", size=5, alpha=0.7, ax=ax)

    ax.set_ylabel("Total Energy (Joules)")
    ax.set_xlabel("Experiment")
    ax.set_title(f"Total Energy Consumption – {exp_id}", fontsize=16, fontweight="bold")

    sns.despine(ax=ax, offset=10, trim=True)
    fig.tight_layout()
    plt.show()

    # Create output folder: out/<experiment-id>/plots
    plot_folder = os.path.join(out_folder, exp_id, "plots")
    os.makedirs(plot_folder, exist_ok=True)

    # Save as PNG and SVG
    png_path = os.path.join(plot_folder, f"{exp_id}_total_energy_boxplot.png")
    svg_path = os.path.join(plot_folder, f"{exp_id}_total_energy_boxplot.svg")

    fig.savefig(png_path, dpi=300, bbox_inches="tight")
    fig.savefig(svg_path, bbox_inches="tight")
    plt.close(fig)

    print(f"Saved plots for experiment '{exp_id}' to {plot_folder}")


In [None]:
# White background, accessible colors, good for documentation
sns.set_theme(style="whitegrid", context="talk", palette="colorblind")
# Create a new figure
fig, ax = plt.subplots(figsize=(12, 7), dpi=120)
# Boxplot (one per experiment)
sns.boxplot(x="experiment", y="total_energy", data=df_plot, ax=ax, width=0.6)

# Overlay all test runs as points
sns.swarmplot(x="experiment", y="total_energy", data=df_plot,
              color="black", size=5, alpha=0.7, ax=ax)

ax.set_ylabel("Total Energy (Joules)")
ax.set_xlabel("Experiment")
ax.set_title(f"Total Energy Consumption per Experiment", fontsize=16, fontweight="bold")

sns.despine(ax=ax, offset=10, trim=True)
fig.tight_layout()
plt.show()

Now we want to visualize the energy consumption for each service in a boxplot.
The energy consumption of each service is aggregated by summing the energy consumption of all methods in that service.
Again, each test run should be represented as a point in the boxplot.

In [None]:
# Prepare DataFrame for per-service analysis
service_plot_data = []

for exp_id, test_runs in experiments_data.items():
    for test_id, services in test_runs.items():
        for svc, methods in services.items():
            total_service_energy = sum(methods.values())
            service_plot_data.append({
                "experiment": exp_id,
                "test_run": test_id,
                "service": svc,
                "total_energy": total_service_energy
            })

df_service_plot = pd.DataFrame(service_plot_data)

sns.set_theme(style="whitegrid", context="talk", palette="colorblind")

for exp_id in df_service_plot["experiment"].unique():
    df_exp = df_service_plot[df_service_plot["experiment"] == exp_id]

    if df_exp.empty:
        continue

    fig, ax = plt.subplots(figsize=(14, 7), dpi=120)

    # Boxplot: one per service
    sns.boxplot(x="service", y="total_energy", data=df_exp, ax=ax, width=0.6)

    # Overlay points (each test run)
    sns.swarmplot(x="service", y="total_energy", data=df_exp,
                  color="black", size=5, alpha=0.7, ax=ax)

    ax.set_ylabel("Total Energy (Joules)")
    ax.set_xlabel("Service")
    ax.set_title(f"Service-Level Energy Consumption – {exp_id}",
                 fontsize=16, fontweight="bold")
    ax.tick_params(axis="x", rotation=45)  # rotate service labels
    sns.despine(ax=ax, offset=10, trim=True)
    fig.tight_layout()
    plt.show()

    # Save into experiment folder
    plot_folder = os.path.join(out_folder, exp_id, "plots")
    os.makedirs(plot_folder, exist_ok=True)

    png_path = os.path.join(plot_folder, f"{exp_id}_all_services_total_energy_boxplot.png")
    svg_path = os.path.join(plot_folder, f"{exp_id}_all_services_total_energy_boxplot.svg")

    fig.savefig(png_path, dpi=300, bbox_inches="tight")
    fig.savefig(svg_path, bbox_inches="tight")
    plt.close(fig)

    print(f"Saved service-level plots for experiment '{exp_id}' in {plot_folder}")


We want to aggregate the energy consumption for each method across all test runs and calculate the average energy consumption.
We create a new dictionary of DataFrames, where the key is the service name and the value is a DataFrame containing the method names and their corresponding total energy consumption across all test runs.

In [None]:
method_avg_data = []

for exp_id, test_runs in experiments_data.items():
    # Temporary accumulator: {service: {method: [energies...]}}
    service_method_acc = {}

    for test_id, services in test_runs.items():
        for svc, methods in services.items():
            if svc not in service_method_acc:
                service_method_acc[svc] = {}
            for method, energy in methods.items():
                service_method_acc[svc].setdefault(method, []).append(energy)

    # Compute averages per service/method
    for svc, methods in service_method_acc.items():
        for method, energies in methods.items():
            avg_energy = sum(energies) / len(energies)
            method_avg_data.append({
                "experiment": exp_id,
                "service": svc,
                "method": method,
                "avg_energy": avg_energy
            })

df_method_avg = pd.DataFrame(method_avg_data)

# Combined regex for all methods to exclude
pattern = r"^(?:<init>|setCallbacks|apply|lambda.*|CGLIB\$.*)$"
# Filter DataFrame using regex, keeping only rows that do NOT match
df_method_avg_filtered = df_method_avg[~df_method_avg["method"].str.contains(pattern, regex=True)]

sns.set_theme(style="whitegrid", context="talk", palette="colorblind")

for exp_id in df_method_avg_filtered["experiment"].unique():
    df_exp = df_method_avg_filtered[df_method_avg_filtered["experiment"] == exp_id]

    if df_exp.empty:
        continue

    fig, ax = plt.subplots(figsize=(14, 7), dpi=120)

    # Grouped bar plot: services on x-axis, methods as hue
    sns.barplot(x="service", y="avg_energy", hue="method", data=df_exp, ax=ax)

    ax.set_ylabel("Average Energy (Joules)")
    ax.set_xlabel("Service")
    ax.set_title(f"Average Method Energy Consumption per Service – {exp_id}",
                 fontsize=16, fontweight="bold")
    ax.tick_params(axis="x", rotation=45)
    sns.despine(ax=ax, offset=10, trim=True)
    ax.legend(title="Method", bbox_to_anchor=(1.05, 1), loc='upper left')
    fig.tight_layout()
    plt.show()

    # Save into experiment folder
    plot_folder = os.path.join(out_folder, exp_id, "plots")
    os.makedirs(plot_folder, exist_ok=True)

    png_path = os.path.join(plot_folder, f"{exp_id}_all_services_methods_avg_energy_barplot.png")
    svg_path = os.path.join(plot_folder, f"{exp_id}_all_services_methods_avg_energy_barplot.svg")

    fig.savefig(png_path, dpi=300, bbox_inches="tight")
    fig.savefig(svg_path, bbox_inches="tight")
    plt.close(fig)

    print(f"Saved method-level grouped bar plots for experiment '{exp_id}' in {plot_folder}")

Now we want to visualize the average energy consumption for each method in each service using a grouped bar chart.
The horizontal axis represents the energy consumption in Joules, and the vertical axis represents the method names.

In [None]:
sns.set_theme(style="whitegrid", context="talk", palette="colorblind")

for exp_id in df_method_avg_filtered["experiment"].unique():
    df_exp = df_method_avg_filtered[df_method_avg_filtered["experiment"] == exp_id]

    if df_exp.empty:
        continue

    # Loop over all services in this experiment
    for svc in df_exp["service"].unique():
        df_svc = df_exp[df_exp["service"] == svc]

        fig, ax = plt.subplots(figsize=(10, 6), dpi=120)

        # Grouped bar plot: only this service, methods as hue
        sns.barplot(y="avg_energy", hue="method", data=df_svc, ax=ax)

        ax.set_ylabel("Average Energy (Joules)")
        ax.set_xlabel("Methods")
        ax.set_title(f"Method Energy Consumption – {exp_id} / {svc}",
                     fontsize=16, fontweight="bold")
        sns.despine(ax=ax, offset=10, trim=True)

        # Legend stays for all methods of this service
        ax.legend(title="Method", bbox_to_anchor=(1.05, 1), loc='upper left')

        fig.tight_layout()
        plt.show()

        # Save plot into experiment/service folder
        plot_folder = os.path.join(out_folder, exp_id, "plots")
        os.makedirs(plot_folder, exist_ok=True)

        png_path = os.path.join(plot_folder, f"{exp_id}_{svc}_methods_avg_energy_barplot.png")
        svg_path = os.path.join(plot_folder, f"{exp_id}_{svc}_methods_avg_energy_barplot.svg")

        fig.savefig(png_path, dpi=300, bbox_inches="tight")
        fig.savefig(svg_path, bbox_inches="tight")
        plt.close(fig)

        print(f"Saved method-level grouped bar plot for experiment '{exp_id}', service '{svc}' in {plot_folder}")
