In [1]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import column,row
from bokeh.models import Div
from bokeh.palettes import Spectral6

import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"

output_notebook();

# chifflot-8.lille

- PowerEdge R740
- Intel(R) Xeon(R) Gold 6126 CPU @ 2.60GHz
- L1 cache: size: 768KiB
- L2 cache: size: 12MiB
- L3 cache: size: 19MiB
- Memory: size: 192GiB, capabilities: ecc, DR4 Synchronous Registered (Buffered) 2666 MHz (0.4 ns)
- pci: 3.0 16x 16 GB/s
- Tesla V100-PCIE-32GB 
- Driver Version: 550.54.14
- CUDA Version: 12.4

## Data sources - Performance

In [2]:
data_chifflot8_cpu = json.load(open('chifflot-8/results_cpu.json', 'r'))
labels_chifflot8_cpu = [key for key, value in data_chifflot8_cpu.items()]

data_chifflot8_gpu = json.load(open('chifflot-8/results_gpu.json', 'r'))
labels_chifflot8_gpu = [key for key, value in data_chifflot8_gpu.items()]

data_chifflot8_sycl_cpu = json.load(open('chifflot-8/results_sycl_cpu.json', 'r'))
labels_chifflot8_sycl_cpu = [key for key, value in data_chifflot8_sycl_cpu.items()]

data_chifflot8_sycl_gpu = json.load(open('chifflot-8/results_sycl_gpu.json', 'r'))
labels_chifflot8_sycl_gpu = [key for key, value in data_chifflot8_sycl_gpu.items()]

### CPU

In [8]:
# Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU

colors = ['darkcyan', 'mediumturquoise', 'blue', 'orange', 'red']
i = 0

p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")

p1.xaxis.axis_label = 'N'
p1.yaxis.axis_label = 'Time ( ms )'
p2.xaxis.axis_label = 'N'
p2.yaxis.axis_label = 'MSE'

errors_total = list()

for label in labels_chifflot8_cpu:
    times = list()
    errors = list()
    data_err = data_chifflot8_cpu[label]
    sizes = [key for key, value in data_err.items()]
    for size in sizes:
        times.append(data_err[size]['time'])
        errors.append(data_err[size]['error'])
    sizes = list(map(int, sizes))
    
    data_plot = pd.DataFrame({'size':sizes, 'time':times, 'error':errors})
    
    errors_total += [errors]
    errors = list(map(float, errors))
    errors_t = dict()
    errors_t['sizes'] = sizes
    for key, value in zip(labels_chifflot8_cpu, errors_total):
        errors_t[key] = value
        
    source_chifflot8_cpu_errors = ColumnDataSource(pd.DataFrame(errors_t))
    
    data_plot['error_low'] = data_plot['time'] - np.array(errors)
    data_plot['error_high'] = data_plot['time'] + np.array(errors)
    data_plot = data_plot.sort_values(by=['size'], ascending=True)
    data_plot = data_plot.reset_index(drop=True)
    
    source_chifflot8_cpu = ColumnDataSource(data_plot)

    if label != 'serial':
        p1.square(source=source_chifflot8_cpu, x='size', y='time', legend_label=labels_chifflot8_cpu[i], fill_color=colors[i], line_color=colors[i])
        p1.line(source=source_chifflot8_cpu, x='size', y='time', line_color=colors[i])
    
    if label != 'serial':
        p2.square(source=source_chifflot8_cpu_errors, x='sizes', y=label, legend_label=labels_chifflot8_cpu[i], color=colors[i])
    
    if label == 'mkl':
        data_mkl_chifflot8 = data_plot.copy(deep=True)
        error_mkl_chifflot8 = pd.DataFrame(errors_t)
        
    if label == 'openMP':
        data_openMP_chifflot8 = data_plot.copy(deep=True)
        error_openMP_chifflot8 = pd.DataFrame(errors_t)
        
    i=i+1

label = labels_chifflot8_sycl_cpu[0]
times = list()
errors = list()
data_err = data_chifflot8_sycl_cpu[label]
sizes = [key for key, value in data_err.items()]
for size in sizes:
    times.append(data_err[size]['time'])
    errors.append(data_err[size]['error'])
sizes = list(map(int, sizes))

data_plot = pd.DataFrame({'size':sizes, 'time':times, 'error':errors})

errors_total += [errors]
errors = list(map(float, errors))
errors_t = dict()
errors_t['sizes'] = sizes
for key, value in zip(labels_chifflot8_sycl_cpu, errors_total):
    errors_t[key] = value
    
source_chifflot8_sycl_cpu_errors = ColumnDataSource(pd.DataFrame(errors_t))

data_plot['error_low'] = data_plot['time'] - np.array(errors)
data_plot['error_high'] = data_plot['time'] + np.array(errors)
data_plot = data_plot.sort_values(by=['size'], ascending=True)
data_plot = data_plot.reset_index(drop=True)

source_sycl_chifflot8 = ColumnDataSource(data_plot)

p1.square(source=source_sycl_chifflot8, x='size', y='time', legend_label=labels_chifflot8_sycl_cpu[0], fill_color='magenta', line_color='magenta')
p1.line(source=source_sycl_chifflot8, x='size', y='time', line_color='magenta')
p2.square(source=source_chifflot8_sycl_cpu_errors, x='sizes', y=label, legend_label=labels_chifflot8_sycl_cpu[0], color='magenta')


if label == 'sycl_cpu':
    data_sycl_cpu_chifflot8 = data_plot.copy(deep=True)
    error_sycl_cpu_chifflot8 = pd.DataFrame(errors_t)


maxi = max(max([sublist[:] for sublist in errors_total]))

p2.y_range.end = maxi*1.1
p1.legend.location = "top_left"
p2.legend.location = "top_left"

#show(gridplot([p1, p2], ncols=2))
show(column(Div(text=" Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU"), row(p1, p2)))

In [9]:
# SyCL
p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")

p1.xaxis.axis_label = 'N'
p1.yaxis.axis_label = 'Time ( ms )'
p2.xaxis.axis_label = 'N'
p2.yaxis.axis_label = 'MSE'

source_mkl_chifflot8 = ColumnDataSource(data_mkl_chifflot8)
source_mkl_errors_chifflot8 = ColumnDataSource(error_mkl_chifflot8)

source_openmp_chifflot8 = ColumnDataSource(data_openMP_chifflot8)
source_openmp_errors_chifflot8 = ColumnDataSource(error_openMP_chifflot8)

source_sycl_cpu_chifflot8 = ColumnDataSource(data_sycl_cpu_chifflot8)
source_sycl_cpu_errors_chifflot8 = ColumnDataSource(error_sycl_cpu_chifflot8)


p1.square(source=source_mkl_chifflot8, x='size', y='time', legend_label='mkl', fill_color='blue', line_color='blue')
p1.line(source=source_mkl_chifflot8, x='size', y='time', line_color='blue')

p1.square(source=source_openmp_chifflot8, x='size', y='time', legend_label='openMP', fill_color='orange', line_color='orange')
p1.line(source=source_openmp_chifflot8, x='size', y='time', line_color='orange')

p1.square(source=source_sycl_cpu_chifflot8, x='size', y='time', legend_label='sycl_cpu', fill_color='magenta', line_color='magenta')
p1.line(source=source_sycl_cpu_chifflot8, x='size', y='time', line_color='magenta')

p1.legend.location = "top_left"

p2.square(source=source_mkl_errors_chifflot8, x='sizes', y='mkl', legend_label='mkl', color='blue', fill_alpha=0.4)
p2.square(source=source_openmp_errors_chifflot8, x='sizes', y='openMP', legend_label='openMP', color='darkturquoise', fill_alpha=0.4)
p2.square(source=source_sycl_cpu_errors_chifflot8, x='sizes', y='sycl_cpu', legend_label='syCL_cpu', color='magenta', fill_alpha=0.4)


p2.legend.location = "top_left"


show(column(Div(text=" Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU"), row(p1, p2)))

### GPU

In [10]:
#Nvidia Tesla V100-PCIE-32GB (32 GiB)
i = 0

colors = ['mediumseagreen', 'brown', 'dodgerblue', 'deeppink', 'darkviolet']

p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")

p1.xaxis.axis_label = 'N'
p1.yaxis.axis_label = 'Time ( ms )'
p2.xaxis.axis_label = 'N'
p2.yaxis.axis_label = 'MSE'

errors_total = list()

for label in labels_chifflot8_gpu:
    times = list()
    errors = list()
    data_err = data_chifflot8_gpu[label]
    sizes = [key for key, value in data_err.items()]
    for size in sizes:
        times.append(data_err[size]['time'])
        errors.append(data_err[size]['error'])
    sizes = list(map(int, sizes))

    data_plot = pd.DataFrame({'size':sizes, 'time':times, 'error':errors})
    # Add error bars to the DataFrame
    errors_total += [errors]
    errors = list(map(float, errors))
    errors_t = dict()
    errors_t['sizes'] = sizes
    for key, value in zip(labels_chifflot8_gpu, errors_total):
        errors_t[key] = value
        
    source_chifflot8_gpu_errors = ColumnDataSource(pd.DataFrame(errors_t))
    
    data_plot['error_low'] = data_plot['time'] - np.array(errors)
    data_plot['error_high'] = data_plot['time'] + np.array(errors)
    data_plot = data_plot.sort_values(by=['size'], ascending = True)
    data_plot = data_plot.reset_index(drop=True)
    
    source_chifflot8_gpu = ColumnDataSource(data_plot)
    
    p1.square(source=source_chifflot8_gpu, x='size', y='time', legend_label=labels_chifflot8_gpu[i], fill_color=colors[i], line_color=colors[i])
    p1.line(source=source_chifflot8_gpu, x='size', y='time', line_color=colors[i])
    
    p2.square(source=source_chifflot8_gpu_errors, x='sizes', y=label, legend_label=labels_chifflot8_gpu[i], color=colors[i])
        #p2.line(source=source_gpu_errors, x='sizes', y=label, legend_label=labels[i], color=colors[i])
    
    if label == 'cuBLAS_Tensor':
        data_cublas_tensor_chifflot8 = data_plot.copy(deep = True)
        error_cublas_tensor_chifflot8 = pd.DataFrame(errors_t)
    if label == 'cuBLAS':
        data_cublas_chifflot8 = data_plot.copy(deep = True)
        error_cublas_chifflot8 = pd.DataFrame(errors_t)
    if label == 'cuBLAS_async':
        data_cublas_async_chifflot8 = data_plot.copy(deep = True)
        error_cublas_async_chifflot8 = pd.DataFrame(errors_t)
        
    i=i+1

label = labels_chifflot8_sycl_gpu[0]
times = list()
errors = list()
data_err = data_chifflot8_sycl_gpu[label]
sizes = [key for key, value in data_err.items()]
for size in sizes:
    times.append(data_err[size]['time'])
    errors.append(data_err[size]['error'])
sizes = list(map(int, sizes))

data_plot = pd.DataFrame({'size':sizes, 'time':times, 'error':errors})

# Add error bars to the DataFrame
errors_total += [errors]
errors = list(map(float, errors))
errors_t = dict()
errors_t['sizes'] = sizes
for key, value in zip(labels_chifflot8_sycl_gpu, errors_total):
    errors_t[key] = value
    
source_sycl_gpu_chifflot8_errors = ColumnDataSource(pd.DataFrame(errors_t))

# Add error bars to the DataFrame
data_plot['error_low'] = data_plot['time'] - np.array(errors)
data_plot['error_high'] = data_plot['time'] + np.array(errors)
data_plot = data_plot.sort_values(by=['size'], ascending = True)
data_plot = data_plot.reset_index(drop=True)

source_sycl_gpu_chifflot8 = ColumnDataSource(data_plot)

p1.square(source=source_sycl_gpu_chifflot8, x='size', y='time', legend_label=labels_chifflot8_sycl_gpu[0], fill_color='goldenrod', line_color='goldenrod')
p1.line(source=source_sycl_gpu_chifflot8, x='size', y='time', line_color='goldenrod')
p2.square(source=source_sycl_gpu_chifflot8_errors, x='sizes', y=label, legend_label=label, color='goldenrod', fill_alpha=0.4)

maxi = max(max([sublist[:] for sublist in errors_total]))


p2.y_range.end = maxi*1.1

p1.legend.location = "top_left"
p2.legend.location = "top_left"

show(column(Div(text="Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU - Nvidia Tesla V100-PCIE-32GB (32 GiB)"), row(p1, p2)))

In [6]:
p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")

p1.xaxis.axis_label = 'N'
p1.yaxis.axis_label = 'Time ( ms )'
p2.xaxis.axis_label = 'N'
p2.yaxis.axis_label = 'MSE'


source_cublasT_chifflot8 = ColumnDataSource(data_cublas_tensor_chifflot8)
source_cublasT_errors_chifflot8 = ColumnDataSource(error_cublas_tensor_chifflot8)

source_cublasA_chifflot8 = ColumnDataSource(data_cublas_async_chifflot8)
source_cublasA_errors_chifflot8 = ColumnDataSource(error_cublas_async_chifflot8)

source_cublas_chifflot8 = ColumnDataSource(data_cublas_chifflot8)
source_cublas_errors_chifflot8 = ColumnDataSource(error_cublas_chifflot8)


p1.square(source=source_cublasT_chifflot8, x='size', y='time', legend_label='cuBLAS_Tensor', fill_color='brown', line_color='brown')
p1.line(source=source_cublasT_chifflot8, x='size', y='time', line_color='brown')

p1.square(source=source_cublasA_chifflot8, x='size', y='time', legend_label='cuBLAS_async', fill_color='dodgerblue', line_color='dodgerblue')
p1.line(source=source_cublasA_chifflot8, x='size', y='time', line_color='dodgerblue')

p1.square(source=source_cublas_chifflot8, x='size', y='time', legend_label='cuBLAS', fill_color='mediumseagreen', line_color='mediumseagreen')
p1.line(source=source_cublas_chifflot8, x='size', y='time', line_color='mediumseagreen')

p1.square(source=source_sycl_gpu_chifflot8, x='size', y='time', legend_label=labels_chifflot8_sycl_gpu[0], fill_color='goldenrod', line_color='goldenrod')
p1.line(source=source_sycl_gpu_chifflot8, x='size', y='time', line_color='goldenrod')


p1.legend.location = "top_left"

p2.square(source=source_cublasT_errors_chifflot8, x='sizes', y='cuBLAS_Tensor', legend_label='cuBLAS_Tensor', color='brown', fill_alpha=0.4)
p2.square(source=source_cublasA_errors_chifflot8, x='sizes', y='cuBLAS_async', legend_label='cuBLAS_async', color='dodgerblue', fill_alpha=0.4)
p2.square(source=source_cublas_errors_chifflot8, x='sizes', y='cuBLAS', legend_label='cuBLAS', color='mediumseagreen', fill_alpha=0.4)
p2.square(source=source_sycl_gpu_chifflot8_errors, x='sizes', y='sycl_gpu', legend_label='sycl_gpu', color='goldenrod', fill_alpha=0.4)


#p2.y_range.end = maxi*1.1
p2.legend.location = "top_left"

#show(gridplot([p1, p2], ncols=2))
show(column(Div(text="Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU - Nvidia Tesla V100-PCIE-32GB (32 GiB)"), row(p1, p2)))

### Results comparison CPU Vs GPU - Best times

In [16]:
p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")

p1.xaxis.axis_label = 'N'
p1.yaxis.axis_label = 'Time ( ms )'
p2.xaxis.axis_label = 'N'
p2.yaxis.axis_label = 'MSE'

p1.square(source=source_mkl_chifflot8, x='size', y='time', legend_label='mkl', fill_color='blue', line_color='blue')
p1.line(source=source_mkl_chifflot8, x='size', y='time', line_color='blue')

p1.square(source=source_sycl_cpu_chifflot8, x='size', y='time', legend_label='sycl_cpu', fill_color='magenta', line_color='magenta')
p1.line(source=source_sycl_cpu_chifflot8, x='size', y='time', line_color='magenta')

p1.square(source=source_cublasT_chifflot8, x='size', y='time', legend_label='cuBLAS_Tensor', fill_color='brown', line_color='brown')
p1.line(source=source_cublasT_chifflot8, x='size', y='time', line_color='brown')

p1.square(source=source_cublas_chifflot8, x='size', y='time', legend_label='cuBLAS', fill_color='mediumseagreen', line_color='mediumseagreen')
p1.line(source=source_cublas_chifflot8, x='size', y='time', line_color='mediumseagreen')

p1.square(source=source_sycl_gpu_chifflot8, x='size', y='time', legend_label=labels_chifflot8_sycl_gpu[0], fill_color='goldenrod', line_color='goldenrod')
p1.line(source=source_sycl_gpu_chifflot8, x='size', y='time', line_color='goldenrod')

p1.legend.location = "top_left"

p2.square(source=source_mkl_errors_chifflot8, x='sizes', y='mkl', legend_label='mkl', color='blue', fill_alpha=0.4)
p2.square(source=source_sycl_cpu_errors_chifflot8, x='sizes', y='sycl_cpu', legend_label='syCL_cpu', color='magenta', fill_alpha=0.4)

p2.square(source=source_cublasT_errors_chifflot8, x='sizes', y='cuBLAS_Tensor', legend_label='cuBLAS_Tensor', color='brown', fill_alpha=0.4)
p2.square(source=source_cublas_errors_chifflot8, x='sizes', y='cuBLAS', legend_label='cuBLAS', color='mediumseagreen', fill_alpha=0.4)
p2.square(source=source_sycl_gpu_chifflot8_errors, x='sizes', y='sycl_gpu', legend_label='sycl_gpu', color='goldenrod', fill_alpha=0.4)

p2.legend.location = "top_left"



show(column(Div(text="Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU - Nvidia Tesla V100-PCIE-32GB (32 GiB)"), row(p1, p2)))

In [17]:
p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")

p1.xaxis.axis_label = 'N'
p1.yaxis.axis_label = 'Time ( ms )'
p2.xaxis.axis_label = 'N'
p2.yaxis.axis_label = 'MSE'

p1.square(source=source_mkl_chifflot8, x='size', y='time', legend_label='mkl', fill_color='blue', line_color='blue')
p1.line(source=source_mkl_chifflot8, x='size', y='time', line_color='blue')

p1.square(source=source_cublasT_chifflot8, x='size', y='time', legend_label='cuBLAS_Tensor', fill_color='brown', line_color='brown')
p1.line(source=source_cublasT_chifflot8, x='size', y='time', line_color='brown')

p1.square(source=source_cublas_chifflot8, x='size', y='time', legend_label='cuBLAS', fill_color='mediumseagreen', line_color='mediumseagreen')
p1.line(source=source_cublas_chifflot8, x='size', y='time', line_color='mediumseagreen')


p1.legend.location = "top_left"

p2.square(source=source_mkl_errors_chifflot8, x='sizes', y='mkl', legend_label='mkl', color='blue', fill_alpha=0.4)

p2.square(source=source_cublasT_errors_chifflot8, x='sizes', y='cuBLAS_Tensor', legend_label='cuBLAS_Tensor', color='brown', fill_alpha=0.4)
p2.square(source=source_cublas_errors_chifflot8, x='sizes', y='cuBLAS', legend_label='cuBLAS', color='mediumseagreen', fill_alpha=0.4)

p2.legend.location = "top_left"



show(column(Div(text="Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU - Nvidia Tesla V100-PCIE-32GB (32 GiB)"), row(p1, p2)))

In [18]:
p1 = figure(title="Time", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")
p2 = figure(title="Error", tools=TOOLS, width=450, height=450, background_fill_color="#fafafa")

p1.xaxis.axis_label = 'N'
p1.yaxis.axis_label = 'Time ( ms )'
p2.xaxis.axis_label = 'N'
p2.yaxis.axis_label = 'MSE'

p1.square(source=source_mkl_chifflot8, x='size', y='time', legend_label='mkl', fill_color='blue', line_color='blue')
p1.line(source=source_mkl_chifflot8, x='size', y='time', line_color='blue')

p1.square(source=source_cublas_chifflot8, x='size', y='time', legend_label='cuBLAS', fill_color='mediumseagreen', line_color='mediumseagreen')
p1.line(source=source_cublas_chifflot8, x='size', y='time', line_color='mediumseagreen')

p1.legend.location = "top_left"

p2.square(source=source_mkl_errors_chifflot8, x='sizes', y='mkl', legend_label='mkl', color='blue', fill_alpha=0.4)
p2.square(source=source_cublas_errors_chifflot8, x='sizes', y='cuBLAS', legend_label='cuBLAS', color='mediumseagreen', fill_alpha=0.4)

p2.legend.location = "top_left"


show(column(Div(text="Intel Xeon Gold 6126 (Skylake), x86_64, 2.60GHz, 2 CPUs/node, 12 cores/CPU - Nvidia Tesla V100-PCIE-32GB (32 GiB)"), row(p1, p2)))

###  Energy Consuption - PAPI - Best times