In [1]:
import importlib
from main import TimeLoopExperimentController
import architectures.architecture_strategy
import architectures.architecture_constants
from architecture_results.derived_metrics_evaluator import DerivedMetricsEvaluator
import matplot_results_plotter.matplot_results_plotter
from architectures.architecture_constants import Architecture, GPUMemoryScale, RackSize, PEsConfig, base_config, resnet_18_layers

import matplotlib.pyplot as plt
from typing import Sequence, Tuple, Optional
from matplotlib.ticker import ScalarFormatter

importlib.reload(architectures.architecture_strategy)
importlib.reload(architectures.architecture_constants)
importlib.reload(matplot_results_plotter.matplot_results_plotter)

<module 'matplot_results_plotter.matplot_results_plotter' from '/home/workspace/matplot_results_plotter/matplot_results_plotter.py'>

In [2]:
def plot_grouped_bar_chart(
    x_labels: Sequence[str],
    series_1: Sequence[float],
    series_2: Sequence[float],
    series_labels: Tuple[str, str] = ("Series 1", "Series 2"),
    colors: Tuple[str, str] = ("tab:red", "tab:green"),
    chart_title: str = "",
    y_axis_title: str = "",
    use_scientific: bool = True
):
    """
    Draw a grouped‑bar chart with two bars per category.

    Parameters
    ----------
    x_labels : list[str]
        Category names shown on the x‑axis (length N).
    series_1, series_2 : list[float]
        Heights for the two series (each of length N, same order as x_labels).
    series_labels : (str, str), optional
        Legend labels for the two series.
    colors : (str, str), optional
        Matplotlib color specs for the bars.
    chart_title : str, optional
        Title displayed above the chart.
    y_axis_title : str, optional
        Label for the y‑axis.
    use_scientific : bool, optional
        • True  → show axis in scientific notation (e.g. 1 e6)  
        • False → show full integers (e.g. 1000000) with no commas
    """
    if len(series_1) != len(series_2) or len(series_1) != len(x_labels):
        raise ValueError("x_labels, series_1, and series_2 must all be the same length.")
    if len(series_1) == 0:
        raise ValueError("Provide at least one category.")

    n = len(x_labels)
    bar_width = 0.35
    x = range(n)

    fig, ax = plt.subplots()

    # Side‑by‑side bars ---------------------------------------------------------
    ax.bar([p - bar_width / 2 for p in x], series_1,
           width=bar_width, label=series_labels[0], color=colors[0])
    ax.bar([p + bar_width / 2 for p in x], series_2,
           width=bar_width, label=series_labels[1], color=colors[1])

    # Axis & title formatting ---------------------------------------------------
    ax.set_xticks(list(x))
    ax.set_xticklabels(x_labels)
    ax.set_ylabel(y_axis_title)
    ax.set_title(chart_title)
    ax.legend()

    # --- control y‑axis number formatting -------------------------------------
    if use_scientific:
        ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))  # 1 e6
    else:
        ax.ticklabel_format(axis='y', style='plain')                  # 1000000
        fmt = ScalarFormatter(useOffset=False)
        fmt.set_scientific(False)
        ax.yaxis.set_major_formatter(fmt)

    # Layout tweaks ------------------------------------------------------------
    ax.margins(y=0.1)
    plt.tight_layout()
    plt.show()


def plot_three_bar_chart(x_labels, y_values, chart_title, y_axis_title):
    """
    Draw a simple bar chart with three bars.

    Parameters
    ----------
    x_labels : list[str]
        The categorical labels shown on the x‑axis (length must be 3).
    y_values : list[float] | tuple[float]
        Heights of the three bars (same length as x_labels).
    chart_title : str
        Title displayed above the chart.
    """
    if len(x_labels) != 3 or len(y_values) != 3:
        raise ValueError("Provide exactly three x labels and three y values.")

    fig, ax = plt.subplots()

    bar_positions = range(3)    
    ax.bar(bar_positions, y_values)

    # Axis & title formatting
    ax.set_xticks(bar_positions)
    ax.set_xticklabels(x_labels)
    ax.set_ylabel(y_axis_title)
    ax.set_title(chart_title)

    # Nice layout tweaks
    ax.margins(y=0.1)
    plt.tight_layout()
    plt.show()

In [3]:
#goal: find latency that match, compare the memories -> result should be that TP uses much less memory to achieve a similar latency. 
all_dp_star_latencies = {}
all_tp_star_latencies = {}
all_dp_ring_latencies = {}
all_tp_ring_latencies = {}


for memory in GPUMemoryScale:
    for rack_size in RackSize:
        for pe_config in PEsConfig:
           
            dp_estimator = DerivedMetricsEvaluator(Architecture.Data_Parallel, memory, rack_size, pe_config,'persisted_results/results_May 03, 2025 04:16:57 PM EST')
            dp_config_str = f"{Architecture.Data_Parallel.name}, {gpu_architecture.name}, {num_gpus.name}, {pe_config.name}"
            tp_estimator = DerivedMetricsEvaluator(Architecture.Tensor_Parallel, memory, rack_size, pe_config,'persisted_results/results_May 03, 2025 04:16:57 PM EST')
            tp_config_str = f"{Architecture.Tensor_Parallel.name}, {gpu_architecture.name}, {num_gpus.name}, {pe_config.name}"
            
            all_dp_star_latencies[dp_config_str] = dp_estimator.derive_total_star_results()['bottlenecked_latency']
            all_tp_star_latencies[tp_config_str] = tp_estimator.derive_total_star_results()['bottlenecked_latency']
            all_dp_ring_latencies[dp_config_str] = dp_estimator.derive_total_ring_results()['bottlenecked_latency']
            all_tp_ring_latencies[tp_config_str] = tp_estimator.derive_total_ring_results()['bottlenecked_latency']


            

DerivedMetricsEvaluator created for Data_Parallel, MEMORY_4MB, RACK_1, PE_1


NameError: name 'gpu_architecture' is not defined

In [4]:
from collections import defaultdict

# Store latency and memory as tuples
dp_star_latencies = defaultdict(list)
tp_star_latencies = defaultdict(list)
dp_ring_latencies = defaultdict(list)
tp_ring_latencies = defaultdict(list)

for memory in [GPUMemoryScale.MEMORY_16MB, GPUMemoryScale.MEMORY_16MB, GPUMemoryScale.MEMORY_1024MB]:
    for rack_size in [RackSize.RACK_4, RackSize.RACK_8]:
        for pe_config in PEsConfig:
            # try:
                dp_estimator = DerivedMetricsEvaluator(Architecture.Data_Parallel, memory, rack_size, pe_config, 'persisted_results/results_May 03, 2025 04:16:57 PM EST')
                tp_estimator = DerivedMetricsEvaluator(Architecture.Tensor_Parallel, memory, rack_size, pe_config, 'persisted_results/results_May 03, 2025 04:16:57 PM EST')
                
                config_key = (rack_size.name, pe_config.name)  # Use same rack & PE config to match
                
                dp_star = dp_estimator.derive_total_star_results()['bottlenecked_latency']
                tp_star = tp_estimator.derive_total_star_results()['bottlenecked_latency']
                dp_ring = dp_estimator.derive_total_ring_results()['bottlenecked_latency']
                tp_ring = tp_estimator.derive_total_ring_results()['bottlenecked_latency']
    
                dp_star_latencies[config_key].append((dp_star, memory))
                tp_star_latencies[config_key].append((tp_star, memory))
                dp_ring_latencies[config_key].append((dp_ring, memory))
                tp_ring_latencies[config_key].append((tp_ring, memory))
            # except:
            #     continue

DerivedMetricsEvaluator created for Data_Parallel, MEMORY_16MB, RACK_4, PE_1
DerivedMetricsEvaluator created for Tensor_Parallel, MEMORY_16MB, RACK_4, PE_1
seeing num_gpus:  4
seeing num_gpus:  4
DerivedMetricsEvaluator created for Data_Parallel, MEMORY_16MB, RACK_4, PE_4
DerivedMetricsEvaluator created for Tensor_Parallel, MEMORY_16MB, RACK_4, PE_4
seeing num_gpus:  4
seeing num_gpus:  4
DerivedMetricsEvaluator created for Data_Parallel, MEMORY_16MB, RACK_4, PE_16
DerivedMetricsEvaluator created for Tensor_Parallel, MEMORY_16MB, RACK_4, PE_16
seeing num_gpus:  4
seeing num_gpus:  4
DerivedMetricsEvaluator created for Data_Parallel, MEMORY_16MB, RACK_8, PE_1
DerivedMetricsEvaluator created for Tensor_Parallel, MEMORY_16MB, RACK_8, PE_1
seeing num_gpus:  8
seeing num_gpus:  8
DerivedMetricsEvaluator created for Data_Parallel, MEMORY_16MB, RACK_8, PE_4
DerivedMetricsEvaluator created for Tensor_Parallel, MEMORY_16MB, RACK_8, PE_4
seeing num_gpus:  8
seeing num_gpus:  8
DerivedMetricsEval

In [23]:
# print(dp_star_latencies)
# print(tp_star_latencies)
# print(dp_ring_latencies)
# print(tp_ring_latencies)

def latency_in_cyles_to_ms(x):

    return x/1000000
    
def compare_latencies(dp_dict, tp_dict, threshold=0.5):
    match_count = 0
    tp_better_count = 0

    print("\n--- Matching Latencies (within ±{}%) ---".format(threshold * 100))
    print("{:<20} {:>10} {:>15} {:>10} {:>15} {:>10}".format(
        "Config", "DP Lat", "DP Mem (MB)", "TP Lat", "TP Mem (MB)", "TP Better?"
    ))
    print("-" * 80)

    for config_key in dp_dict:
        dp_results = dp_dict[config_key]
        tp_results = tp_dict.get(config_key, [])
        
        for dp_latency, dp_memory in dp_results:
            for tp_latency, tp_memory in tp_results:
                if abs(dp_latency - tp_latency) / dp_latency <= threshold:
                    match_count += 1
                    tp_better = tp_memory.size_in_mb < dp_memory.size_in_mb
                    if tp_better:
                        tp_better_count += 1

                        print("{:<20} {:>10.2f} ms {:>15.1f} {:>10.2f} ms {:>15.1f} {:>10}".format(
                            str(config_key),
                            latency_in_cyles_to_ms(dp_latency),
                            dp_memory.size_in_mb,
                            latency_in_cyles_to_ms(tp_latency),
                            tp_memory.size_in_mb,
                            "✅" if tp_better else "❌"
                        ))

    print("\nSummary:")
    print(f"- Total matching configurations: {match_count}")
    print(f"- TP used less memory in {tp_better_count} cases ({(tp_better_count / match_count * 100 if match_count else 0):.1f}%)")


print("-------STAR-------")
compare_latencies(dp_star_latencies, tp_star_latencies)
print("-------RING-------")
compare_latencies(dp_ring_latencies, tp_ring_latencies)

-------STAR-------

--- Matching Latencies (within ±50.0%) ---
Config                   DP Lat     DP Mem (MB)     TP Lat     TP Mem (MB) TP Better?
--------------------------------------------------------------------------------
('RACK_4', 'PE_1')     13911.20 ms          1024.0   13911.20 ms            16.0          ✅
('RACK_4', 'PE_1')     13911.20 ms          1024.0   13911.20 ms            16.0          ✅
('RACK_4', 'PE_4')      3949.85 ms          1024.0    3949.85 ms            16.0          ✅
('RACK_4', 'PE_4')      3949.85 ms          1024.0    3949.85 ms            16.0          ✅
('RACK_4', 'PE_16')      908.79 ms          1024.0    1223.49 ms            16.0          ✅
('RACK_4', 'PE_16')      908.79 ms          1024.0    1223.49 ms            16.0          ✅
('RACK_8', 'PE_1')      6955.60 ms          1024.0    6955.60 ms            16.0          ✅
('RACK_8', 'PE_1')      6955.60 ms          1024.0    6955.60 ms            16.0          ✅
('RACK_8', 'PE_4')      1974.93 ms