In [1]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import re
import os
import seaborn as sns


# set the legend frame
plt.rcParams['legend.frameon'] = True
plt.rcParams['legend.facecolor'] = 'white'
plt.rcParams['legend.edgecolor'] = 'black'
plt.rcParams['legend.framealpha'] = 1

# set the line width
plt.rcParams['lines.linewidth'] = 2.0

# set the point size
plt.rcParams['lines.markersize'] = 8

# set the error bar capsize
plt.rcParams['errorbar.capsize'] = 5

# set the font size
plt.rcParams['font.size'] = 16

# set the figure dpi
#plt.rcParams['figure.dpi'] = 192

# set the save figure format
plt.rcParams['savefig.format'] = 'pdf'

sns.set_style("whitegrid")
sns.set_style("ticks")
sns.set_context("paper")


In [3]:
targets = ["llvm", "opencl", "cuda"]
networks = ["mlp", "mlp_wide", "mlp_deep", "mlp_deep_wide", "mlp_conv", "conv1d", "conv2d", "depthwise_conv1d", "separable_conv2d", "separable_conv1d", "mlp_piecewise", "mlp_repeat", "mlp_big", "mlp_piecewise2", "lstm", "mlp_small"]
old_cluster = 7476309 
old_clusters = [7476308, 7476309]
version = "0.0.1"
old_cluster2 = 7473667
versions = ["0.0.0", "0.1.0", "0.1.1", "0.0.1"]
old_cluster2s = [7473664,7473665,7473666,7473667]
if version[2] == "0":
    windowsize = 3
else:
    windowsize = 4

In [None]:
best = []
beststd = []
fpga = []
for i, network in enumerate(networks):
    fig , ax = plt.subplots(figsize=(10, 6))
    for j, target in enumerate(targets):
        files = glob.glob(f'../../final/benchmarks/{network}-*{version}-{target}-{old_cluster}*d.perf')
        if files == []:
            best.append(np.nan)
            beststd.append(np.nan)
            continue
        #files = glob.glob(f'../../final/benchmarks/{network}-*-{target}-{old_cluster}*d.perf')
        #print(files)
        batch_size = []
        data = []
        for file in files:
            data.append(np.loadtxt(file, delimiter=','))
            #print(f"Loaded {file}")
            batch_size.append(int(re.findall(r'-\d+', file)[0][1:]))
            #print(f"Batch size {batch_size[-1]}")
        me = np.array([np.mean(d) for d in data])
        st = np.array([np.std(d) for d in data])
        batch_size = np.array(batch_size)
        ax.errorbar(batch_size, me/batch_size*1e6, yerr=st/batch_size*1e6, fmt=['s', 'o', '*'][j], label=target)
        # Make best of each target in a box and place it at 10 and bachsize of 4096
        ax.text(4096, np.exp(j*0.35+1), fr"{target}: {np.amin(me/batch_size*1e6):.2f} $\mu$ s", ha='center', va='center', bbox=dict(facecolor='white', edgecolor='black', alpha=1))
        best.append(np.amin(me/batch_size*1e6))
        beststd.append((st/batch_size*1e6)[np.argmin(me/batch_size*1e6)])
        print(f"Best {np.amin(me/batch_size*1e6)} with std {(st/batch_size*1e6)[np.argmin(me/batch_size*1e6)]} for {network} {target}")
    if os.path.exists(f"hls4ml/{network}_{version}_{old_cluster2}_hls4ml_prj/myproject_prj/solution1/syn/report/myproject_csynth.rpt"):
        hls4ml_rpt = f"hls4ml/{network}_{version}_{old_cluster2}_hls4ml_prj/myproject_prj/solution1/syn/report/myproject_csynth.rpt"
        # Find the max latency
        with open(hls4ml_rpt) as f:
            for j, line in enumerate(f):
                if "+ Latency:" in line:
                    # Get the latency 5 lines down
                    print(f"Found latency in {hls4ml_rpt}")
                    for _ in range(6):
                        line = next(f)
                    latency = re.findall(r'\d+', line)[-1]
                    ax.plot([0,65565], [int(latency)*5e-9*1e6, int(latency)*5e-9*1e6], 'r', label="FPGA")
                    ax.text(4096, np.exp(3*0.35+1), fr"FPGA: {int(latency)*5e-9*1e6:.2f} $\mu$ s", ha='center', va='center', bbox=dict(facecolor='white', edgecolor='black', alpha=1))
                    fpga.append(int(latency)*5e-9*1e6)
    else:
        fpga.append(np.nan)
    #ax.plot(batch_size, np.ones_like(batch_size)*44*5e-9, 'r', label="FPGA")
    title = f"{network} performance for window size of {windowsize}" + (" inside-out" if version[-1] == "0" else " outside-in") +  (" with float32" if old_cluster == 7476308 else " with float16")
    ax.set_title(title)
    ax.set_xlim(0.6, 50000)
    ax.legend()
    ax.set_xlabel("Batch size")
    ax.set_ylabel(r"Time per sample ($\mu$s)")
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.grid(True)
    plt.tight_layout()
    #plt.show()
    if old_cluster == 7476308:
        plt.savefig(f"../../final/runtime/{network}_runtime_{version}.png")
    else:
        plt.savefig(f"../../final/runtime/{network}_runtime_{version}_fp16.png")
    plt.close()



In [None]:
#make a bar plot of the stds using seaborn
import pandas as pd
def custom_errorbar(data):
    return data['STD']
data = []
for i, network in enumerate(networks):
    for j, target in enumerate(targets):
        if np.isnan(best[i*3+j]):
            continue
        data.append([network, target, best[i*3+j], beststd[i*3+j]])
    if not np.isnan(fpga[i]):
        data.append([network, "FPGA", fpga[i], 0])
df = pd.DataFrame(data, columns=['Network', 'Target', 'Time', 'sd'])
fig, ax = plt.subplots(1, figsize=(18, 6))
sns.barplot(df, x="Network", y="Time", ax=ax, errorbar='sd', linewidth=1.5, edgecolor="black", hue="Target")
ax.set_title(f"Best performance per network for window size of {windowsize}"+ (" inside-out" if version[2] == "0" else " outside-in") + (" with float32" if old_cluster == 7476308 else " with float16"), fontsize=20)
ax.set_xlabel("Network", fontsize=18)
ax.set_ylabel(r"Time per sample ($\mu$s)", fontsize=18)
ax.set_xticklabels(networks, rotation=45, ha="right")
ax.grid()
for p in ax.patches:
    if not p.get_height() and not p.get_x() and not p.get_width():
        continue
    ax.annotate(format(p.get_height(), '.2g'), 
                (p.get_x() + p.get_width() / 2., np.amin([p.get_height(),1])-0.01 if p.get_height() > 0.1 else p.get_height()+0.1), 
                ha = 'center', va = 'center', 
                xytext = (0, -10), 
                textcoords = 'offset points',
                color='white' if p.get_height()>0.1 else 'black', rotation=90)
plt.tight_layout()
# set the size of the labels
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.legend(fontsize=14)
sns.despine(top=True, right=False)
ax.set_ylim(0,1)
plt.show()
if old_cluster == 7476308:
    plt.savefig(f"../../final/runtime/best_performance_{version}.png", bbox_inches='tight')
else:
    plt.savefig(f"../../final/runtime/best_performance_{version}_fp16.png", bbox_inches='tight')
plt.close()




In [None]:
for old_cluster in old_clusters:
    for (old_cluster2,version) in zip(old_cluster2s,versions):
        print(version)
        if version[2] == "0":
            windowsize = 3
        else:
            windowsize = 4
        best = []
        beststd = []
        fpga = []
        for i, network in enumerate(networks):
            fig , ax = plt.subplots(figsize=(10, 6))
            for j, target in enumerate(targets):
                files = glob.glob(f'../../final/benchmarks/{network}-*{version}-{target}-{old_cluster}*d.perf')
                if files == []:
                    best.append(np.nan)
                    beststd.append(np.nan)
                    continue
                #files = glob.glob(f'../../final/benchmarks/{network}-*-{target}-{old_cluster}*d.perf')
                #print(files)
                batch_size = []
                data = []
                for file in files:
                    data.append(np.loadtxt(file, delimiter=','))
                    #print(f"Loaded {file}")
                    batch_size.append(int(re.findall(r'-\d+', file)[0][1:]))
                    #print(f"Batch size {batch_size[-1]}")
                me = np.array([np.mean(d) for d in data])
                st = np.array([np.std(d) for d in data])
                batch_size = np.array(batch_size)
                ax.errorbar(batch_size, me/batch_size*1e6, yerr=st/batch_size*1e6, fmt=['s', 'o', '*'][j], label=target)
                # Make best of each target in a box and place it at 10 and bachsize of 4096
                ax.text(4096, np.exp(j*0.35+1), fr"{target}: {np.amin(me/batch_size*1e6):.2f} $\mu$ s", ha='center', va='center', bbox=dict(facecolor='white', edgecolor='black', alpha=1))
                best.append(np.amin(me/batch_size*1e6))
                beststd.append((st/batch_size*1e6)[np.argmin(me/batch_size*1e6)])
                print(f"Best {np.amin(me/batch_size*1e6)} with std {(st/batch_size*1e6)[np.argmin(me/batch_size*1e6)]} for {network} {target}")
            if os.path.exists(f"hls4ml/{network}_{version}_{old_cluster2}_hls4ml_prj/myproject_prj/solution1/syn/report/myproject_csynth.rpt"):
                hls4ml_rpt = f"hls4ml/{network}_{version}_{old_cluster2}_hls4ml_prj/myproject_prj/solution1/syn/report/myproject_csynth.rpt"
                # Find the max latency
                with open(hls4ml_rpt) as f:
                    for j, line in enumerate(f):
                        if "+ Latency:" in line:
                            # Get the latency 5 lines down
                            print(f"Found latency in {hls4ml_rpt}")
                            for _ in range(6):
                                line = next(f)
                            latency = re.findall(r'\d+', line)[-1]
                            ax.plot([0,65565], [int(latency)*5e-9*1e6, int(latency)*5e-9*1e6], 'r', label="FPGA")
                            ax.text(4096, np.exp(3*0.35+1), fr"FPGA: {int(latency)*5e-9*1e6:.2f} $\mu$ s", ha='center', va='center', bbox=dict(facecolor='white', edgecolor='black', alpha=1))
                            fpga.append(int(latency)*5e-9*1e6)
            else:
                fpga.append(np.nan)
            #ax.plot(batch_size, np.ones_like(batch_size)*44*5e-9, 'r', label="FPGA")
            title = f"{network} performance for window size of {windowsize}" + (" inside-out" if version[-1] == "0" else " outside-in") +  (" with float32" if old_cluster == 7476308 else " with float16")
            ax.set_title(title, fontsize=18)
            ax.set_xlim(0.6, 50000)
            ax.legend()
            ax.set_xlabel("Batch size", fontsize=16)
            ax.set_ylabel(r"Time per sample ($\mu$s)", fontsize=16)
            ax.set_xscale('log')
            ax.set_yscale('log')
            ax.grid(True)
            ax.tick_params(axis='both', which='major', labelsize=16)
            ax.tick_params(axis='both', which='minor', labelsize=16)
            plt.tight_layout()
            #plt.show()
            if old_cluster == 7476308:
                plt.savefig(f"../../final/runtime/{network}_runtime_{version}.png")
            else:
                plt.savefig(f"../../final/runtime/{network}_runtime_{version}_fp16.png")
            plt.close()



        #make a bar plot of the stds using seaborn
        import pandas as pd
        def custom_errorbar(data):
            return data['STD']
        data = []
        for i, network in enumerate(networks):
            for j, target in enumerate(targets):
                if np.isnan(best[i*3+j]):
                    continue
                data.append([network, target, best[i*3+j], beststd[i*3+j]])
            if not np.isnan(fpga[i]):
                data.append([network, "FPGA", fpga[i], 0])
        df = pd.DataFrame(data, columns=['Network', 'Target', 'Time', 'sd'])
        fig, ax = plt.subplots(1, figsize=(10, 24))
        sns.barplot(df, x="Time", y="Network", ax=ax, errorbar='sd', linewidth=1.5, edgecolor="black", hue="Target")
        ax.set_title(f"Best performance per network for window size of {windowsize}"+ (" inside-out" if version[-1] == "0" else " outside-in") + (" with float32" if old_cluster == 7476308 else " with float16"), fontsize=20)
        ax.set_ylabel("Network", fontsize=18)
        ax.set_xlabel(r"Time per sample ($\mu$s)", fontsize=18)
        #ax.set_yticklabels(networks, rotation=45, ha="right")
        ax.grid()
        for p in ax.patches:
            if not p.get_height() and not p.get_x() and not p.get_width():
                continue
            ax.annotate(
                format(p.get_width(), '.3g'),# Format annotation text
                (np.amin([p.get_width(),0.9]), p.get_y() + p.get_height() / 2),  # Annotation position
                ha='left', va='center',  # Horizontal and vertical alignment
                xytext=(5, 0),  # Offset text slightly to the right
                textcoords='offset points',
                color='black' if p.get_width()<0.9 else 'white', fontsize=16
            )
        #plt.tight_layout()
        # set the size of the labels
        plt.legend(fontsize=16)
        ax.tick_params(axis='both', which='major', labelsize=16)
        ax.tick_params(axis='both', which='minor', labelsize=16)
        sns.despine(top=False, right=True)
        ax.set_xlim(0,1)
        #plt.show()
        if old_cluster == 7476308:
            plt.savefig(f"../../final/runtime/best_performance_{version}.png", bbox_inches='tight')
        else:
            plt.savefig(f"../../final/runtime/best_performance_{version}_fp16.png", bbox_inches='tight')
        plt.close()




0.0.0
Best 0.18998639180083818 with std 0.01183186193862318 for mlp llvm
Best 0.0891574991861979 with std 1.2482988812159646e-05 for mlp opencl
Best 0.08768797452714709 with std 1.8953538392327445e-05 for mlp cuda
Found latency in hls4ml/mlp_0.0.0_7473664_hls4ml_prj/myproject_prj/solution1/syn/report/myproject_csynth.rpt
Best 0.6566710707855223 with std 0.036135541672952534 for mlp_wide llvm
Best 0.3426804846191406 with std 1.8184356468586166e-05 for mlp_wide opencl
Best 0.34123679016113284 with std 8.735119012879098e-05 for mlp_wide cuda
Found latency in hls4ml/mlp_wide_0.0.0_7473664_hls4ml_prj/myproject_prj/solution1/syn/report/myproject_csynth.rpt
Best 0.2618947772502899 with std 0.008988020523140112 for mlp_deep llvm
Best 0.174031441921658 with std 1.8390907689453648e-05 for mlp_deep opencl
Best 0.1724416987101237 with std 1.4268995752758505e-05 for mlp_deep cuda
Found latency in hls4ml/mlp_deep_0.0.0_7473664_hls4ml_prj/myproject_prj/solution1/syn/report/myproject_csynth.rpt
Best 0