In [2]:
import dill as pickle  # Use dill instead of pickle

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.tree import export_graphviz
import graphviz
import matplotlib.pyplot as plt

In [9]:
# Load the objects from the file
with open('objects.pkl', 'rb') as f:
    objects_to_load = pickle.load(f)

ranges_dev = objects_to_load['ranges_dev']
ranges_dev_backup = objects_to_load['ranges_dev_backup']
y_limit_dictionary = objects_to_load['y_limit_dictionary']
ranges_impl_arch = objects_to_load['ranges_impl_arch']
ranges_impl_arch_backup = objects_to_load['ranges_impl_arch_backup']
ranges_memr = objects_to_load['ranges_memr']
ranges_anr = objects_to_load['ranges_anr']
ranges_skew = objects_to_load['ranges_skew']
ranges_ann = objects_to_load['ranges_ann']
ranges_crs = objects_to_load['ranges_crs']
ranges_size = objects_to_load['ranges_size']
ranges_regularity = objects_to_load['ranges_regularity']
cat_list = objects_to_load['cat_list']
ranges_list = objects_to_load['ranges_list']

# functions
set_category = objects_to_load['set_category']
set_category2 = objects_to_load['set_category2']
filter_ranges_impl_arch = objects_to_load['filter_ranges_impl_arch']
calculate_format_wins = objects_to_load['calculate_format_wins']

---
# Load benchmark dataframes

In [5]:
%%time
groupreps = pd.read_csv('synthetic_benchmarks_all-devices_all.csv')
print('groupreps:', groupreps.shape)
group_system_best = pd.read_csv('synthetic_benchmarks_all-devices_best-of.csv')
print('group_system_best:', group_system_best.shape)

def remove_devices(df, devices_to_discard):
    for dtd in devices_to_discard:
        df = df[df['System'] != dtd]
    return df

devices_to_discard = []

groupreps = remove_devices(groupreps, devices_to_discard)
group_system_best = remove_devices(group_system_best, devices_to_discard)

print('\nAfter discarding specific devices')
print('groupreps:', groupreps.shape)
print('group_system_best:', group_system_best.shape)

groupreps: (1222089, 38)
group_system_best: (190519, 38)

After discarding specific devices
groupreps: (1222089, 38)
group_system_best: (190519, 38)
CPU times: user 5.16 s, sys: 702 ms, total: 5.86 s
Wall time: 6.01 s


# Decision tree classifier (format_name)

In [12]:
ranges_dev = ranges_dev_backup
# ranges_dev = ['AMD-EPYC-64']
# ranges_dev = ['Grace-H100']

features_list = ['mem_footprint', 'avg_nnz_per_row', 'skew', 'avg_num_neighbours', 'cross_row_similarity']
groupdata = group_system_best
print('Max performance per System')
for system in ranges_dev:
    tmp = groupdata[groupdata['System']==system]
    # tmp = tmp[tmp['format_name'].isin(['cu-CSR', 'Ada-CSR', 'cu-COO', 'DASP'])]
    # tmp = tmp[tmp['format_name'].isin(['Ada-CSR', 'cu-COO', 'DASP'])]
    X_train, X_test, y_train, y_test = train_test_split(tmp[features_list], tmp['format_name'], random_state=0)
    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(system, '\t', accuracy_score(y_test, y_pred))

#     fig = plt.figure(figsize=(15,10))
#     _ = tree.plot_tree(clf, 
#                        feature_names=features_list,  
#                        class_names='format_name',
#                        filled=True)

#     dot_data = export_graphviz(clf, out_file=None, 
#                                feature_names=features_list,  
#                                class_names=clf.classes_,  
#                                filled=True, rounded=True,  
#                                special_characters=True)  
#     graph = graphviz.Source(dot_data)  
#     graph.render("decision_tree")
#     graph.view()

Max performance per System
NVIDIA-P100 	 0.8806451612903226
NVIDIA-V100 	 0.8729908603844941
NVIDIA-A100 	 0.9096589429835981
NVIDIA-H100 	 0.8932599003409389
AMD-MI250 	 0.8418568056648308
AMD-EPYC-24 	 0.8100104821802935
AMD-EPYC-64 	 0.7789142407553108
INTEL-XEON-14 	 0.8071278825995807
INTEL-ICY-16 	 0.8397587201678468
INTEL-SAPPHIRE-56 	 0.8502491476527668
ARM-NEON-80 	 0.7455450733752621
ARM-GRACE-72 	 0.7435090479937058
IBM-POWER9-32 	 0.835167714884696


# Decision tree regressor (gflops)

In [15]:
features_list = ['mem_footprint', 'avg_nnz_per_row', 'skew', 'avg_num_neighbours', 'cross_row_similarity']
groupdata = group_system_best

validation_features_df = pd.read_csv('../benchmark_results/validation_matrices_features.csv', sep='\t')
validation_features_df = validation_features_df[['matrix', 'mem_footprint', 'nnz-r-avg', 'skew_coeff', 'num-neigh-avg', 'cross_row_sim-avg']]
validation_features_df.rename(columns={'nnz-r-avg':'avg_nnz_per_row', 
                                       'skew_coeff':'skew', 
                                       'num-neigh-avg':'avg_num_neighbours', 
                                       'cross_row_sim-avg':'cross_row_similarity'}, 
                              inplace=True)
# print(validation_features_df)

print('Max performance per System')
for system in ranges_dev:
    tmp = groupdata[groupdata['System']==system]  
    X_train, X_test, y_train, y_test = train_test_split(tmp[features_list], tmp['gflops'], random_state=0)
    clf = DecisionTreeRegressor(random_state=0)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred2 = clf.predict(validation_features_df[features_list])
    # print(X_test)
    np.set_printoptions(precision=2)
    # print(y_pred)
    # print(np.asarray(y_test))
    r2 = r2_score(y_pred, y_test)
    print(system, r2)
#     mse = mean_squared_error(y_test, y_pred)
#     rmse = np.sqrt(mse)
#     tmp_max = tmp['gflops'].max()
#     print(f'RMSE: {rmse:.2f} (while max perf for {system} is {tmp_max:.2f})')

#     fig = plt.figure(figsize=(15,10))
#     _ = tree.plot_tree(clf, 
#                        feature_names=features_list,  
#                        class_names='format_name',
#                        filled=True)

#     dot_data = export_graphviz(clf, out_file=None, 
#                                feature_names=features_list,  
#                                filled=True, rounded=True,  
#                                special_characters=True)  
#     graph = graphviz.Source(dot_data)  
#     graph.render("decision_tree")
#     graph.view()

Max performance per System
NVIDIA-P100 0.920778334485196
NVIDIA-V100 0.88202719893277
NVIDIA-A100 0.8998111282109865
NVIDIA-H100 0.9488798138422984
AMD-MI250 0.892949671353204
AMD-EPYC-24 0.9822253313125049
AMD-EPYC-64 0.957222001297772
INTEL-XEON-14 0.9543999427971586
INTEL-ICY-16 0.968659750284039
INTEL-SAPPHIRE-56 0.9698380506420259
ARM-NEON-80 0.9646741625867808
ARM-GRACE-72 0.9612445482899971
IBM-POWER9-32 0.949290902597442


In [None]:
for index, row in validation_features_df.iterrows():
    print(round(y_pred2[index],2), '\t', row['matrix'])