In [1]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import column,row
from bokeh.models import Div
from bokeh.palettes import Spectral6

import json
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np

import os

TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"

output_notebook();


----------------------------------------
# Functions
----------------------------------------

In [2]:
def load_data(server, names):
    # names = ['cpu', 'gpu', 'sycl_cpu', 'sycl_gpu']
    # data = load_data("chifflot-8", names)
    data = dict()
    for name in names:
        data_aux = json.load(open(server + '/results_' + name + '.json', 'r'))
        labels = [key for key, value in data_aux.items()]
        data[name] = [data_aux, labels]      
    return data

In [3]:
def papi_load_data(server, names):
    # names = ['cpu', 'gpu', 'sycl_cpu', 'sycl_gpu']
    # data = extract_data(data['cpu'])
    data = dict()
    for name in names:
        data[name] = json.load(open(server + '/papi/' + name + '.json', 'r')) ['threads']['0']['regions']
    return data

In [4]:
def perf_load_data(path, names):
    perf_df = pd.DataFrame(columns=names)
    dir_list = os.listdir(path)
    for file in dir_list:
        path_tmp = path + file
        i = 1
        if os.path.isdir(path_tmp) == False:
            with open (path_tmp, "r") as myfile:
                Lines = myfile.readlines()
                count = 0
                for line in Lines:
                    count += 1
                    if ( count == 6 ):
                        energy_pkg = line.strip().split()[0].replace(',', '')
                    if ( count == 7 ):
                        energy_ram = line.strip().split()[0].replace(',', '')
                    if ( count == 9 ):
                        time_elapsed = line.strip().split()[0].replace(',', '')
                index = path_tmp.strip().split('/')[2].split('.')[0].split('_')[1:4]
                if (len(index) == 2):
                    perf_df.loc[int(index[1]), index[0]] = [energy_pkg, energy_ram, time_elapsed]
                else:
                    perf_df.loc[int(index[2]), index[0] + "_" + index[1] ] = [energy_pkg, energy_ram, time_elapsed]       
    
    perf_df = perf_df.sort_index(ascending=True)

    return perf_df

In [5]:
def extract_data(data_):
    # data_cpu[0] -> data
    # data_cpu[1] -> labels
    data_total = dict()
    for label in data_[1]:
        data_aux = data_[0][label]
        sizes = [key for key, value in data_aux.items()]
        data = pd.DataFrame(columns=['times', 'errors'])
        for size in sizes: 
            data.loc[int(size), 'times'] = data_aux[size]['time']
            data.loc[int(size), 'errors'] = data_aux[size]['error']
        data = data.sort_index(ascending=True)
        data_total[label] = data
    return data_total

In [6]:
def extract_data_papi(iterations, sizes, data_source):
    # - GPU miliWatts
    # - CPU nanoJoules
    data_papi = dict()
    
    for label in data_source.keys():
        data = data_source[label]
        data_papi_df = pd.DataFrame(columns=['energy_0', 'energy_1', 'dram_energy_0', 'dram_energy_1', 'real_time_sec'])
        for key in data.keys():
            data_aux = data[key]
            for item in data_aux:
                if item == 'rapl:::PACKAGE_ENERGY:PACKAGE0':
                    data_papi_df.loc[sizes[int(key)], 'energy_0'] = (int(data_aux[item])*1e-9)/iterations
                if item == 'rapl:::PACKAGE_ENERGY:PACKAGE1':
                    data_papi_df.loc[sizes[int(key)], 'energy_1'] = (int(data_aux[item])*1e-9)/iterations
                if item == 'rapl:::DRAM_ENERGY:PACKAGE0':
                    data_papi_df.loc[sizes[int(key)], 'dram_energy_0'] = (int(data_aux[item])*1e-9)/iterations
                if item == 'rapl:::DRAM_ENERGY:PACKAGE1':
                    data_papi_df.loc[sizes[int(key)], 'dram_energy_1'] = (int(data_aux[item])*1e-9)/iterations
                if item == 'nvml:::Tesla_V100-PCIE-32GB:device_0:power':
                    data_papi_df.loc[sizes[int(key)], 'gpu_tesla_v100_power'] = (int(data_aux[item])*1e-3)/iterations
                if item == 'nvml:::NVIDIA_A100-PCIE-40GB:device_0:power':
                    data_papi_df.loc[sizes[int(key)], 'gpu_amper_A100_power'] = (int(data_aux[item])*1e-3)/iterations               
                if item == 'real_time_nsec':
                    data_papi_df.loc[sizes[int(key)], 'real_time_sec'] = (int(data_aux[item])*1e-9)/iterations
        data_papi[label] = data_papi_df
        
    return data_papi

In [7]:
def plot_data(data_list=[dict()], custom=0, names=[]):
    
    colors = {'avx2':'darkcyan', 'avx512':'mediumturquoise', 'mkl':'blue', 'openMP':'orange', 'sycl_cpu':'magenta', 
              'cuBLAS':'mediumseagreen', 'cuBLAS_Tensor':'brown', 'cuBLAS_async':'dodgerblue', 'cudaNaive':'deeppink', 
              'cudaTiled':'darkviolet', 'sycl_gpu': 'goldenrod'}
    
    p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    
    p1.xaxis.axis_label = 'N'
    p1.yaxis.axis_label = 'Time ( ms )'
    p2.xaxis.axis_label = 'N'
    p2.yaxis.axis_label = 'MSE'

    if custom == 0:
        for data in data_list:
            for key in data.keys():
                if key != 'serial':
                    data[key]['size'] = data[key].index
                    data_source = ColumnDataSource(data[key])
                    p1.square(source=data_source, x='size', y='times', legend_label=key, fill_color=colors[key], line_color=colors[key])
                    p1.line(source=data_source, x='size', y='times', line_color=colors[key])
                    p2.square(source=data_source, x='size', y='errors', legend_label=key, color=colors[key])
        p1.legend.location = "top_left"
        p2.legend.location = "top_left"
        show(gridplot([p1, p2], ncols=2))
    else:
        if names:
            not_found = list()
            for data in data_list:
                for name in names:
                    for key in data.keys():
                        if key == name:
                            data[name]['size'] = data[name].index
                            data_source = ColumnDataSource(data[name])
                            p1.square(source=data_source, x='size', y='times', legend_label=name, fill_color=colors[name], line_color=colors[name])
                            p1.line(source=data_source, x='size', y='times', line_color=colors[name])
                            p2.square(source=data_source, x='size', y='errors', legend_label=name, color=colors[name])
                            break
            p1.legend.location = "top_left"
            p2.legend.location = "top_left"
            show(gridplot([p1, p2], ncols=2))                   
        else:
            print("Error, list of names is empty!")
# show(column(Div(text=" Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU"), row(p1, p2)))    

In [8]:
def plot_data_papi(data_source, names=[], pos=0):
    
    colors = {'avx2':'darkcyan', 'avx512':'mediumturquoise', 'mkl':'blue', 'openmp':'orange', 'sycl_cpu':'magenta', 
              'cublas':'mediumseagreen', 'cublas_tensor':'brown', 'cublas_async':'dodgerblue', 'cuda_naive':'deeppink', 
              'cuda_tiled':'darkviolet', 'sycl_gpu': 'goldenrod'}
    
    names_cpu = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
    names_gpu = ['cuda_naive', 'cuda_tiled', 'cublas', 'cublas_async', 'cublas_tensor', 'sycl_gpu']
    
    p1 = figure(title="Energy Average", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    p2 = figure(title="Power Consumption", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    p1.xaxis.axis_label = 'N'
    p1.yaxis.axis_label = 'Energy ( Joules )'
    p2.xaxis.axis_label = 'N'
    p2.yaxis.axis_label = 'Power Consumption ( Watts )'
        
    if names:
        for key in names:
            data = data_source[key]
            data_aux = pd.DataFrame(index=data.index)
            data_aux['size'] = data.index
            data_aux['power'] = ( (data['energy_0'] + data['energy_1'] + data['dram_energy_0'] + data['dram_energy_1']) / 4.0).tolist()
            data_aux['energy'] = ( data_aux['power'] / data['real_time_sec'] ).tolist()

            # if GPU works
            for label in names_gpu:
                if key == label:
                    if 'gpu_tesla_v100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_tesla_v100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_tesla_v100_power'] ).tolist()
                    elif 'gpu_amper_A100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_amper_A100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_amper_A100_power'] ).tolist()
                    
            
            data_plot = ColumnDataSource(data_aux)
            
            p1.square(source=data_plot, x='size', y='power', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p1.line(source=data_plot, x='size', y='power', line_color=colors[key])
            p2.square(source=data_plot, x='size', y='energy', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p2.line(source=data_plot, x='size', y='energy', line_color=colors[key])
        
    else:
        for key in data_source.keys():
            data = data_source[key]
            data_aux = pd.DataFrame(index=data.index)
            data_aux['size'] = data.index
            data_aux['power'] = ( (data['energy_0'] + data['energy_1'] + data['dram_energy_0'] + data['dram_energy_1']) / 4.0).tolist()
            data_aux['energy'] = ( data_aux['power'] / data['real_time_sec'] ).tolist()

            # if GPU works
            for label in names_gpu:
                if key == label:
                    if 'gpu_tesla_v100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_tesla_v100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_tesla_v100_power'] ).tolist()
                    elif 'gpu_amper_A100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_amper_A100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_amper_A100_power'] ).tolist()
            
            data_plot = ColumnDataSource(data_aux)
            
            p1.square(source=data_plot, x='size', y='power', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p1.line(source=data_plot, x='size', y='power', line_color=colors[key])
            p2.square(source=data_plot, x='size', y='energy', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p2.line(source=data_plot, x='size', y='energy', line_color=colors[key])
    if pos == 0:
        p1.legend.location = "top_left"
        p2.legend.location = "top_right"
    if pos == 1:
        p1.legend.location = "top_left"
        p2.legend.location = "bottom_right"
    if pos == 2:
        p1.legend.location = "top_left"
        p2.legend.location = "center_right"
    
    show(gridplot([p1, p2], ncols=2))

In [9]:
def plot_data_perf(data_source, names, sizes, iterations, pos=0):
    # --------------------------------------------------------------------------------------------------------------
    # Notes:
    # - power/energy-pkg/ Joules
    # - power/energy-ram/ Joules                                            
    # - time elapsed seconds
    # - power/energy-pkg  : power consumption of all cores + LLC cache
    # - power/energy-dram : power consumption of DRAM (servers only)
    # --------------------------------------------------------------------------------------------------------------
    
    colors = {'avx2':'darkcyan', 'avx512':'mediumturquoise', 'mkl':'blue', 'openmp':'orange', 'sycl_cpu':'magenta', 
              'cublas':'mediumseagreen', 'cublas_tensor':'brown', 'cublas_async':'dodgerblue', 'cuda_naive':'deeppink', 
              'cuda_tiled':'darkviolet', 'sycl_gpu': 'goldenrod'}
        
    names_cpu = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
    names_gpu = ['cuda_naive', 'cuda_tiled', 'cublas', 'cublas_async', 'cublas_tensor', 'sycl_gpu']
    
    p1 = figure(title="Energy Average", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    p2 = figure(title="Power Consumption", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    p1.xaxis.axis_label = 'N'
    p1.yaxis.axis_label = 'Energy ( Joules )'
    p2.xaxis.axis_label = 'N'
    p2.yaxis.axis_label = 'Power Consumption ( Watts )'
    
    for name in names:
        data_aux = pd.DataFrame(index=sizes, columns=['power', 'time'])
        data_aux['size'] = data_source[name].to_frame().index
        for size in sizes:
            data_aux.loc[int(size), 'power'] = float(data_source[name][size][0]) + float(data_source[name][size][1])
            data_aux.loc[int(size), 'time'] = float(data_source[name][size][2])
            data_aux['energy'] = (data_aux['power'] / data_aux['time']).to_list()
        data_plot = ColumnDataSource(data_aux)
    
        p1.square(source=data_plot, x='size', y='power', legend_label=name, fill_color=colors[name], line_color=colors[name])
        p1.line(source=data_plot, x='size', y='power', line_color=colors[name])
        p2.square(source=data_plot, x='size', y='energy', legend_label=name, fill_color=colors[name], line_color=colors[name])
        p2.line(source=data_plot, x='size', y='energy', line_color=colors[name])

    p1.legend.location = "top_left"
    p2.legend.location = "top_left"
        
    show(gridplot([p1, p2], ncols=2))

----------------------------------------
# chifflot-8.lille

- PowerEdge R740
- Intel(R) Xeon(R) Gold 6126 CPU @ 2.60GHz
- L1 cache: size: 768KiB
- L2 cache: size: 12MiB
- L3 cache: size: 19MiB
- Memory: size: 192GiB, capabilities: ecc, DR4 Synchronous Registered (Buffered) 2666 MHz (0.4 ns)
- pci: 3.0 16x 16 GB/s
- Tesla V100-PCIE-32GB 
- Driver Version: 550.54.14
- CUDA Version: 12.4
----------------------------------------

## Data sources - Performance

In [10]:
names = ['cpu', 'gpu', 'sycl_cpu', 'sycl_gpu']
data = load_data("chifflot-8", names)

data_cpu = extract_data(data['cpu'])
data_sycl_cpu = extract_data(data['sycl_cpu'])

data_gpu = extract_data(data['gpu'])
data_sycl_gpu = extract_data(data['sycl_gpu'])

### CPU

In [11]:
plot_data([data_cpu, data_sycl_cpu])

In [12]:
plot_data([data_cpu, data_sycl_cpu], 1, ['mkl', 'openMP', 'sycl_cpu'])

### GPU

In [13]:
#Nvidia Tesla V100-PCIE-32GB (32 GiB)
plot_data([data_gpu, data_sycl_gpu])

In [14]:
plot_data([data_gpu, data_sycl_gpu], 1, ['cuBLAS_Tensor', 'cuBLAS_async', 'cuBLAS', 'sycl_gpu'])

### Results comparison CPU Vs GPU - Best times

In [15]:
plot_data([data_cpu, data_sycl_cpu, data_gpu, data_sycl_gpu], 1, ['mkl', 'openMP', 'sycl_cpu', 'cuBLAS_Tensor', 'cuBLAS_async', 'cuBLAS'])

In [16]:
plot_data([data_cpu, data_gpu], 1, ['mkl', 'cuBLAS_Tensor', 'cuBLAS'])

In [17]:
plot_data([data_cpu, data_gpu], 1, ['mkl', 'cuBLAS'])

## Data sources - Energy and Power Consumption

In [18]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'cuda_naive', 'cuda_tiled', 'cublas', 'cublas_async', 'cublas_tensor', 'sycl_cpu', 'sycl_gpu']
perf_chifflot8_data = perf_load_data("chifflot-8/perf/", names)
papi_chifflot8_data = papi_load_data("chifflot-8", names)

###  Energy Consumption - PAPI

In [19]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - Los datos de GPU esta en miliWatts
# - Los datos en CPU esta en nanoJoules
# --------------------------------------------------------------------------------------------------------------
iterations = 20
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
papi_chifflot8_data_plot = extract_data_papi(iterations, sizes, papi_chifflot8_data)

In [20]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_chifflot8_data_plot, names)

In [21]:
names = ['mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_chifflot8_data_plot, names, 1)

In [22]:
names = ['cublas', 'cublas_async', 'sycl_gpu']
plot_data_papi(papi_chifflot8_data_plot, names, 1)

In [23]:
names = ['cublas', 'cublas_tensor']
plot_data_papi(papi_chifflot8_data_plot, names)

#### Final

In [24]:
names = ['mkl', 'sycl_cpu', 'cublas', 'cublas_tensor']
plot_data_papi(papi_chifflot8_data_plot, names, 2)

###  Energy Consumption/CPU - PERF

In [25]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - power/energy-pkg/ Joules
# - power/energy-ram/ Joules                                            
# - time elapsed seconds
# - power/energy-pkg  : power consumption of all cores + LLC cache
# - power/energy-dram : power consumption of DRAM (servers only)
# --------------------------------------------------------------------------------------------------------------
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
iterations = 20
plot_data_perf(perf_chifflot8_data, names, sizes, iterations)

In [26]:
def plot_data_papi(data_source, names=[], pos=0):
    
    colors = {'avx2':'darkcyan', 'avx512':'mediumturquoise', 'mkl':'blue', 'openmp':'orange', 'sycl_cpu':'magenta', 
              'cublas':'mediumseagreen', 'cublas_tensor':'brown', 'cublas_async':'dodgerblue', 'cuda_naive':'deeppink', 
              'cuda_tiled':'darkviolet', 'sycl_gpu': 'goldenrod'}
    
    names_cpu = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
    names_gpu = ['cuda_naive', 'cuda_tiled', 'cublas', 'cublas_async', 'cublas_tensor', 'sycl_gpu']
    
    p1 = figure(title="Energy Average", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    p2 = figure(title="Power Consumption", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
    p1.xaxis.axis_label = 'N'
    p1.yaxis.axis_label = 'Energy ( Joules )'
    p2.xaxis.axis_label = 'N'
    p2.yaxis.axis_label = 'Power Consumption ( Watts )'
        
    if names:
        for key in names:
            data = data_source[key]
            data_aux = pd.DataFrame(index=data.index)
            data_aux['size'] = data.index
            data_aux['power'] = ( (data['energy_0'] + data['energy_1'] + data['dram_energy_0'] + data['dram_energy_1']) / 4.0).tolist()
            data_aux['energy'] = ( data_aux['power'] / data['real_time_sec'] ).tolist()

            # if GPU works
            for label in names_gpu:
                if key == label:
                    if 'gpu_tesla_v100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_tesla_v100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_tesla_v100_power'] ).tolist()
                    elif 'gpu_amper_A100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_amper_A100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_amper_A100_power'] ).tolist()
                    
            
            data_plot = ColumnDataSource(data_aux)
            
            p1.square(source=data_plot, x='size', y='power', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p1.line(source=data_plot, x='size', y='power', line_color=colors[key])
            p2.square(source=data_plot, x='size', y='energy', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p2.line(source=data_plot, x='size', y='energy', line_color=colors[key])
        
    else:
        for key in data_source.keys():
            data = data_source[key]
            data_aux = pd.DataFrame(index=data.index)
            data_aux['size'] = data.index
            data_aux['power'] = ( (data['energy_0'] + data['energy_1'] + data['dram_energy_0'] + data['dram_energy_1']) / 4.0).tolist()
            data_aux['energy'] = ( data_aux['power'] / data['real_time_sec'] ).tolist()

            # if GPU works
            for label in names_gpu:
                if key == label:
                    if 'gpu_tesla_v100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_tesla_v100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_tesla_v100_power'] ).tolist()
                    elif 'gpu_amper_A100_power' in data.columns:
                        data_aux['power'] = ( data_aux['power'] + (data['gpu_amper_A100_power'] * data['real_time_sec']) ).tolist()
                        data_aux['energy'] = ( data_aux['energy'] +  data['gpu_amper_A100_power'] ).tolist()
            
            data_plot = ColumnDataSource(data_aux)
            
            p1.square(source=data_plot, x='size', y='power', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p1.line(source=data_plot, x='size', y='power', line_color=colors[key])
            p2.square(source=data_plot, x='size', y='energy', legend_label=key, fill_color=colors[key], line_color=colors[key])
            p2.line(source=data_plot, x='size', y='energy', line_color=colors[key])
    if pos == 0:
        p1.legend.location = "top_left"
        p2.legend.location = "top_right"
    if pos == 1:
        p1.legend.location = "top_left"
        p2.legend.location = "bottom_right"
    
    show(gridplot([p1, p2], ncols=2))

----------------------------------------
# IntelXeon4

- Intel Corporation D50DNP1SBB
- Intel(R) Xeon(R) Platinum 8480+ 2.0 GHz 112 cores, 2 sockets
- L1 cache: size: 5.3 MiB
- L2 cache: size: 224 MiB
- L3 cache: size: 210 MiB
- Memory: size: 512 GiB memory, DIMM Synchronous Registered (Buffered) 4800 MHz (0.2 ns)
- pci: PCIe 5.0 x32 128 GB/s
----------------------------------------

## Data sources - Performance

In [27]:
names = ['cpu', 'sycl_cpu']
data = load_data("IntelXeon4", names)

data_cpu = extract_data(data['cpu'])
data_sycl_cpu = extract_data(data['sycl_cpu'])

### CPU

In [28]:
plot_data([data_cpu, data_sycl_cpu])

In [29]:
plot_data([data_cpu, data_sycl_cpu], 1, ['mkl', 'openMP', 'sycl_cpu'])

## Data sources - Energy and Power Consumption

In [30]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
perf_IntelXeon4_data = perf_load_data("IntelXeon4/perf/", names)

###  Energy Consumption/CPU - PERF

In [31]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - power/energy-pkg/ Joules
# - power/energy-ram/ Joules                                            
# - time elapsed seconds
# - power/energy-pkg  : power consumption of all cores + LLC cache
# - power/energy-dram : power consumption of DRAM (servers only)
# --------------------------------------------------------------------------------------------------------------
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
iterations = 20
plot_data_perf(perf_IntelXeon4_data, names, sizes, iterations)

----------------------------------------
# pacca01
- HPE ProLiant XL220n Gen10 Plus (P19879-B21)
- Intel(R) Xeon(R) Gold 5320 CPU @ 2.20GHz, 104 cores, 2 sockets
- L1 cache: size: 48 K
- L2 cache: size: 1280K
- L3 cache: size: 39936K
- Memory: size: 256 GiB memory, DIMM DDR4 Synchronous Registered (Buffered) 3200 MHz (0.3 ns)
- pci: PCIe 4.0 x16 64 GB/s
----------------------------------------

## Data sources - Performance

In [32]:
names = ['cpu', 'sycl_cpu']
data = load_data("pacca01", names)

data_cpu = extract_data(data['cpu'])
data_sycl_cpu = extract_data(data['sycl_cpu'])

### CPU

In [33]:
plot_data([data_cpu, data_sycl_cpu])

In [34]:
plot_data([data_cpu, data_sycl_cpu], 1, ['mkl', 'openMP', 'sycl_cpu'])

## Data sources - Energy and Power Consumption

In [35]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
perf_pacca01_data = perf_load_data("pacca01/perf/", names)
papi_pacca01_data = papi_load_data("pacca01", names)

###  Energy Consumption - PAPI

In [36]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - Los datos de GPU esta en miliWatts
# - Los datos en CPU esta en nanoJoules
# --------------------------------------------------------------------------------------------------------------
iterations = 20
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
papi_pacca01_data_plot = extract_data_papi(iterations, sizes, papi_pacca01_data)

In [37]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_pacca01_data_plot, names, 1)

In [38]:
names = ['mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_pacca01_data_plot, names, 1)

In [39]:
###  Energy Consumption/CPU - PERF
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - power/energy-pkg/ Joules
# - power/energy-ram/ Joules                                            
# - time elapsed seconds
# - power/energy-pkg  : power consumption of all cores + LLC cache
# - power/energy-dram : power consumption of DRAM (servers only)
# --------------------------------------------------------------------------------------------------------------
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
iterations = 20
plot_data_perf(perf_pacca01_data, names, sizes, iterations)

----------------------------------------
# paccaA100
- HPE ProLiant XL290n Gen10 Plus (P19880-B21)
- Intel(R) Xeon(R) Gold 5315Y CPU @ 3.20GHz, 32 cores, 2 sockets
- L1 cache: size: 48 K
- L2 cache: size: 1280K
- L3 cache: size: 12288K
- Memory: size: 256 GiB memory, DIMM DDR4 Synchronous Registered (Buffered) 3200 MHz (0.3 ns)
- pci: PCIe 4.0 x16 64 GB/s
- NVIDIA A100-PCIE-40GB
- Driver Version: 550.54.14
- CUDA Version: 12.4
----------------------------------------

## Data sources - Performance

In [40]:
names = ['cpu', 'gpu', 'sycl_cpu', 'sycl_gpu']
data = load_data("paccaA100", names)

data_cpu = extract_data(data['cpu'])
data_sycl_cpu = extract_data(data['sycl_cpu'])

data_gpu = extract_data(data['gpu'])
data_sycl_gpu = extract_data(data['sycl_gpu'])

### CPU

In [41]:
plot_data([data_cpu, data_sycl_cpu])

In [42]:
plot_data([data_cpu, data_sycl_cpu], 1, ['mkl', 'openMP', 'sycl_cpu'])

### GPU

In [43]:
plot_data([data_gpu, data_sycl_gpu])

In [44]:
plot_data([data_gpu, data_sycl_gpu], 1, ['cuBLAS_Tensor', 'cuBLAS_async', 'cuBLAS', 'sycl_gpu'])

### Results comparison CPU Vs GPU - Best times

In [45]:
plot_data([data_cpu, data_sycl_cpu, data_gpu, data_sycl_gpu], 1, ['mkl', 'openMP', 'sycl_cpu', 'cuBLAS_Tensor', 'cuBLAS_async', 'cuBLAS'])

In [46]:
plot_data([data_cpu, data_gpu], 1, ['mkl', 'cuBLAS_Tensor', 'cuBLAS'])

In [47]:
plot_data([data_cpu, data_gpu], 1, ['mkl', 'cuBLAS'])

## Data sources - Energy and Power Consumption

In [48]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'cuda_naive', 'cuda_tiled', 'cublas', 'cublas_async', 'cublas_tensor', 'sycl_cpu', 'sycl_gpu']
perf_paccaA100_data = perf_load_data("paccaA100/perf/", names)
papi_paccaA100_data = papi_load_data("paccaA100", names)

###  Energy Consumption - PAPI

In [49]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - Los datos de GPU esta en miliWatts
# - Los datos en CPU esta en nanoJoules
# --------------------------------------------------------------------------------------------------------------
iterations = 20
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
papi_paccaA100_data_plot = extract_data_papi(iterations, sizes, papi_paccaA100_data)

In [50]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_paccaA100_data_plot, names, 1)

In [51]:
names = ['mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_paccaA100_data_plot, names, 1)

In [52]:
names = ['cublas', 'cublas_async', 'sycl_gpu']
plot_data_papi(papi_paccaA100_data_plot, names, 0)

In [53]:
names = ['cublas', 'cublas_tensor']
plot_data_papi(papi_paccaA100_data_plot, names, 1)

#### Final

In [54]:
names = ['mkl', 'sycl_cpu', 'cublas', 'cublas_tensor']
plot_data_papi(papi_paccaA100_data_plot, names, 1)

###  Energy Consumption/CPU - PERF

In [55]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - power/energy-pkg/ Joules
# - power/energy-ram/ Joules                                            
# - time elapsed seconds
# - power/energy-pkg  : power consumption of all cores + LLC cache
# - power/energy-dram : power consumption of DRAM (servers only)
# --------------------------------------------------------------------------------------------------------------
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
iterations = 20
plot_data_perf(perf_chifflot8_data, names, sizes, iterations)

----------------------------------------
# roazhon13-2
- PowerEdge C6420 (SKU=0757;ModelName=PowerEdge C6420)
- Intel(R) Xeon(R) Gold 6254 CPU @ 3.10GHz, 72 cores, 2 sockets
- L1 cache: size: 1.1 MiB
- L2 cache: size: 36 MiB
- L3 cache: size: 49.5 MiB
- Memory: size: 384 GiB memory, DIMM DDR4 Synchronous Registered (Buffered) 2933 MHz (0.3 ns)
- pci: PCIe 3.0 x16 16 GB/s
----------------------------------------

## Data sources - Performance

In [56]:
names = ['cpu', 'sycl_cpu']
data = load_data("roazhon13-2", names)

data_cpu = extract_data(data['cpu'])
data_sycl_cpu = extract_data(data['sycl_cpu'])

### CPU

In [57]:
plot_data([data_cpu, data_sycl_cpu])

In [58]:
plot_data([data_cpu, data_sycl_cpu], 1, ['mkl', 'openMP', 'sycl_cpu'])

## Data sources - Energy and Power Consumption

In [59]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
perf_roazhon13_data = perf_load_data("roazhon13-2/perf/", names)
papi_roazhon13_data = papi_load_data("roazhon13-2", names)

###  Energy Consumption - PAPI

In [60]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - Los datos de GPU esta en miliWatts
# - Los datos en CPU esta en nanoJoules
# --------------------------------------------------------------------------------------------------------------
iterations = 20
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
papi_roazhon13_data_plot = extract_data_papi(iterations, sizes, papi_roazhon13_data)

In [61]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_roazhon13_data_plot, names)

In [62]:
names = ['mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_roazhon13_data_plot, names, 1)

###  Energy Consumption/CPU - PERF

In [63]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - power/energy-pkg/ Joules
# - power/energy-ram/ Joules                                            
# - time elapsed seconds
# - power/energy-pkg  : power consumption of all cores + LLC cache
# - power/energy-dram : power consumption of DRAM (servers only)
# --------------------------------------------------------------------------------------------------------------
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
iterations = 20
plot_data_perf(perf_roazhon13_data, names, sizes, iterations)

----------------------------------------
# montcalm-10
- HPE ProLiant DL360 Gen10 Plus (P28948-B21)
- Intel(R) Xeon(R) Silver 4314 CPU @ 2.40GHz, 64 cores, 2 sockets
- L1 cache: size: 1.5 MiB
- L2 cache: size: 40 MiB
- L3 cache: size: 48 MiB
- Memory: size: 256 GiB memory, DIMM DDR4 Synchronous Registered (Buffered) 3200 MHz (0.3 ns)
- pci: PCIe 4.0 x16 64 GB/s
----------------------------------------

## Data sources - Performance

In [64]:
names = ['cpu', 'sycl_cpu']
data = load_data("montcalm-10", names)

data_cpu = extract_data(data['cpu'])
data_sycl_cpu = extract_data(data['sycl_cpu'])

### CPU

In [65]:
plot_data([data_cpu, data_sycl_cpu])

In [66]:
plot_data([data_cpu, data_sycl_cpu], 1, ['mkl', 'openMP', 'sycl_cpu'])

## Data sources - Energy and Power Consumption

In [67]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
perf_montcalm10_data = perf_load_data("montcalm-10/perf/", names)
papi_montcalm10_data = papi_load_data("montcalm-10", names)

###  Energy Consumption - PAPI

In [68]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - Los datos de GPU esta en miliWatts
# - Los datos en CPU esta en nanoJoules
# --------------------------------------------------------------------------------------------------------------
iterations = 20
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
papi_montcalm10_data_plot = extract_data_papi(iterations, sizes, papi_montcalm10_data)

In [69]:
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_montcalm10_data_plot, names)

In [70]:
names = ['mkl', 'openmp', 'sycl_cpu']
plot_data_papi(papi_montcalm10_data_plot, names, 1)

###  Energy Consumption/CPU - PERF

In [71]:
# --------------------------------------------------------------------------------------------------------------
# Notes:
# - Los datos se tomaron 20 veces, de debe promediar
# - power/energy-pkg/ Joules
# - power/energy-ram/ Joules                                            
# - time elapsed seconds
# - power/energy-pkg  : power consumption of all cores + LLC cache
# - power/energy-dram : power consumption of DRAM (servers only)
# --------------------------------------------------------------------------------------------------------------
names = ['avx2', 'avx512', 'mkl', 'openmp', 'sycl_cpu']
sizes = [32, 48, 64, 80, 96, 112, 128, 256, 384, 512, 640, 768, 896, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
iterations = 20
plot_data_perf(perf_montcalm10_data, names, sizes, iterations)