# SGEMM GPU kernel performance


This data set measures the running time of a matrix-matrix product A*B = C, where all matrices have size 2048 x 2048, using a parameterizable SGEMM GPU kernel with 261400 possible parameter combinations. For each tested combination, 4 runs were performed and their results are reported as the 4 last columns. All times are measured in milliseconds
      
Attribute Information:<br>
Independent variables:
**MWG, NWG:** per-matrix 2D tiling at workgroup level: {16, 32, 64, 128} (integer)<br>
3 **KWG:** inner dimension of 2D tiling at workgroup level: {16, 32} (integer)<br>
4-5 **MDIMC, NDIMC:** local workgroup size: {8, 16, 32} (integer)<br>
6-7 **MDIMA, NDIMB:** local memory shape: {8, 16, 32} (integer)<br>
8 **KWI:** kernel loop unrolling factor: {2, 8} (integer)<br>
9-10 **VWM, VWN:** per-matrix vector widths for loading and storing: {1, 2, 4, 8} (integer)<br>
11-12 **STRM, STRN**: enable stride for accessing off-chip memory within a single thread: {0, 1} (categorical)<br>
13-14 **SA, SB**: per-matrix manual caching of the 2D workgroup tile: {0, 1} (categorical)<br>

Output:<br>
15-18 **Run1, Run2, Run3, Run4:** performance times in milliseconds for 4 independent runs using the same parameters. They range between 13.25 and 3397.08.   

Preprocessing:<br>
Compute the mean of the four Output columns in one column "Run"

### Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from scipy import stats
from sklearn import preprocessing, datasets, linear_model, metrics
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, Normalizer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

import sys
sys.path.insert(1, '../RegressionAlgorithms/')
from knn import *
import linearRegressionNumpy

### Get the Data

In [None]:
data = pd.read_csv('sgemm_product.csv', delimiter = ',', engine='python')

In [None]:
data

### Basic Data Information 

In [None]:
data.info()

In [None]:
data.describe()

*Check for missing values*

In [None]:
print(data.isnull().sum(axis=0))

*Merge the Runs Columns*

In [None]:
df = pd.DataFrame(data,columns=['Run1 (ms)','Run2 (ms)','Run3 (ms)', 'Run4 (ms)'])
av_row = df.mean(axis=1)

data.drop(columns=['Run1 (ms)','Run2 (ms)','Run3 (ms)', 'Run4 (ms)'])
data['Run'] = av_row

data

### Exploratory Data Analysis

**Run Count**

*Histogram of Run Count Distribution*

In [None]:
fig = plt.figure(figsize = (20,5))
sns.set_style('darkgrid')
bins = np.arange(0, 3400, 100).tolist()
data['Run'].hist(bins=bins)
plt.xticks(bins)
plt.xlabel('Run Count')

**Run Time vs MWG & NWG**

*Box Plot Run Time vs MWG & NWG*

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['MWG'], y=data['Run'])
plt.show()

**Run Time vs KWG**

*Box Plot on Run Time vs KWG*

In [None]:
plt.figure(figsize=(4, 6))
sns.boxplot(x=data['KWG'], y=data['Run'])
plt.show()

**Run Time vs MDIMC & NDIMC**

*Box Plot on Run Time vs MDIMC & NDIMC*

In [None]:
plt.figure(figsize=(6, 6))
sns.boxplot(x=data['NDIMC'], y=data['Run'])
plt.show()

**Run Time vs MDIMA & NDIMA**

*Box Plot on Run Time vs MDIMA & NDIMA*

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['VWM'], y=data['Run'])
plt.show()

**Run Time vs KWI**

*Box Plot on Run Time vs KWI*

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['KWI'], y=data['Run'])
plt.show()

**Run Time vs VWM & VWNN**

*Box Plot on Run Time vs VWM & VWN*

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['VWM'], y=data['Run'])
plt.show()

**Run Time vs STRM & STRN**

*Box Plot on Run Time vs STRM & STRN*

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['STRM'], y=data['Run'])
plt.show()

**Run Time vs SA & SB**

*Box Plot on Run Time vs SA & SB*

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['SA'], y=data['Run'])
plt.show()

## Data Pre-processing

*We don't have nominal value, so no Encoding needed*

**Data Preparation**

In [None]:
X = data.drop('Run', axis=1)
y = data['Run']

*Training and Test Split*

In [None]:
# Split the data in attributes and class as well as training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

## Regression Tasks

*Regression Algorithms from Sklearn*

### Linear Regression

In [None]:
start = time.time()
model = linear_model.LinearRegression().fit(X_train, y_train)
end = time.time()

# Make predictions using the testing set
y_pred1 = model.predict(X_test)

# The coefficients
print('Coefficients: \n', model.coef_, model.intercept_)

**Evaluation metrics**

In [None]:
print('cross validation score: ', cross_val_score(model, X_test, y_pred1, cv=10))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred1))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred1))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred1))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred1)))
print("Time: %0.2f" % (end - start), "seconds")

### KNN Regression

In [None]:
start = time.time()
model = KNeighborsRegressor(n_neighbors=3).fit(X_train, y_train)
end = time.time()

# Make predictions using the testing set
y_pred = model.predict(X_test)

**Evaluation metrics**

In [None]:
print('cross validation score: ', cross_val_score(model, X_test, y_pred, cv=10))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print("Time: %0.2f" % (end - start), "seconds")

### Decision Tree Regression

In [None]:
start = time.time()
model = DecisionTreeRegressor(random_state = 0).fit(X_train, y_train)
end = time.time()

# Make predictions using the testing set
y_pred = model.predict(X_test)

**Evaluation metrics**

In [None]:
print('cross validation score: ', cross_val_score(model, X_test, y_pred, cv=10))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print("Time: %0.2f" % (end - start), "seconds")

### Random Forest Regressor

In [None]:
start = time.time()
model = RandomForestRegressor().fit(X_train, y_train)
end = time.time()

# Make predictions using the testing set
y_pred = model.predict(X_test)

**Evaluation metrics**

In [None]:
print('cross validation score: ', cross_val_score(model, X_test, y_pred, cv=10))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print("Time: %0.2f" % (end - start), "seconds")

# Our Regression Algorithms

### Linear Regression Function (MSE)

In [None]:
try:
    del X_train['bias']
except:
    print('no bias to remove X_train')    
try:
    del X_test['bias']
except:
    print('no bias to remove X_test')
try:
    del X['bias']
except:
    print('no bias to remove X')




print('\n GPU: Linear Regression Function (MSE):')    
alphaMethod = 'const'
mu = 1
convCritList = [1e5, 1e4, 1e3, 1e2, 1e1, 1e0, 1e-1, 1e-2]
print('epsilon       | sum total error:   | sum relative error:  | iterations | Rsquare |    time/s')
for convergenceCriterion in convCritList:
    start = time.time()
    weights, score, iterations = linearRegressionNumpy.linearRegression(X_train, y_train, mu = mu, 
                                                        convergenceCriterion = convergenceCriterion, lossFunction = 'MSE', 
                                                        alphaMethod = alphaMethod, printOutput = False)
    end = time.time()
    yPred2 = linearRegressionNumpy.predictLinearRegression(X_test, weights)



    print('{:13.0E} | {:19}| {:21}| {:11}| {:8.4f}| {:10.5f}'.format(convergenceCriterion, 
                                        str(np.sum(yPred2-y_pred1)), 
                                        str(np.sum((yPred2-y_pred1)/y_pred1)),
                                        str(iterations),
                                        r2_score(y_test, yPred2),
                                        end-start))
    
print('\nFinal weigths for smallest epsilon = {:2.0E}:'.format(convCritList[-1]))
print('weights = ', weights, '\n')

plt.title('GPUPerformance: scikit prediction')
plt.plot(y_pred1)
plt.ylabel('performance time (ms)')
plt.savefig('GPUPerformance_scikit_prediction_MSE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction (MSE)')
plt.plot(yPred2)
plt.ylabel('performance time (ms)')
plt.savefig('GPUPerformance_our_prediction_MSE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction (MSE) vs. scikit prediction')
plt.plot(yPred2-y_pred1)
plt.ylabel('total error')
plt.savefig('GPUPerformance_total_error_MSE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction (MSE) vs. scikit prediction')
plt.plot((yPred2-y_pred1)/y_pred1)
plt.ylabel('relative error')
plt.savefig('GPUPerformance_relative_error_MSE.jpeg', bbox_inches='tight')
plt.show()


**Evaluation metrics**

In [None]:
print('\n GPU: Linear Regression Function (MSE):')
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' %r2_score(y_test, yPred2))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, yPred2))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, yPred2))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, yPred2)))

### Linear Regression Function (MAE)

In [None]:
try:
    del X_train['bias']
except:
    print('no bias to remove X_train')    
try:
    del X_test['bias']
except:
    print('no bias to remove X_test')
try:
    del X['bias']
except:
    print('no bias to remove X')


print('\n GPU: Linear Regression Function (MAE):')   
alphaMethod = 'const'
mu = 1
convCritList = [1e5, 1e4, 1e3, 1e2, 1e1, 1e0, 1e-1, 1e-2, 1e-3, 1e-4]
print('epsilon       | sum total error:   | sum relative error:  | iterations | Rsquare |    time/s')
for convergenceCriterion in convCritList:
    start = time.time()
    weights, score, iterations = linearRegressionNumpy.linearRegression(X_train, y_train, mu = mu, 
                                                        convergenceCriterion = convergenceCriterion, lossFunction = 'MAE', 
                                                        alphaMethod = alphaMethod, printOutput = False)
    end = time.time()
    yPred2 = linearRegressionNumpy.predictLinearRegression(X_test, weights)



    print('{:13.0E} | {:19}| {:21}| {:11}| {:8.4f}| {:10.5f}'.format(convergenceCriterion, 
                                        str(np.sum(yPred2-y_pred1)), 
                                        str(np.sum((yPred2-y_pred1)/y_pred1)),
                                        str(iterations),
                                        r2_score(y_test, yPred2),
                                        end-start))

print('\nFinal weigths for smallest epsilon = {:2.0E}:'.format(convCritList[-1]))
print('weights = ', weights, '\n')

plt.title('GPUPerformance: scikit prediction')
plt.plot(y_pred1)
plt.ylabel('performance time (ms)')
plt.savefig('GPUPerformance_scikit_prediction_MAE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction (MAE)')
plt.plot(yPred2)
plt.ylabel('performance time (ms)')
plt.savefig('GPUPerformance_our_prediction_(MAE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction ((MAE) vs. scikit prediction')
plt.plot(yPred2-y_pred1)
plt.ylabel('total error')
plt.savefig('GPUPerformance_total_error_(MAE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction ((MAE) vs. scikit prediction')
plt.plot((yPred2-y_pred1)/y_pred1)
plt.ylabel('relative error')
plt.savefig('GPUPerformance_relative_error_(MAE.jpeg', bbox_inches='tight')
plt.show()


**Evaluation metrics**

In [None]:
print('\n GPU: Linear Regression Function (MAE):')
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, yPred2))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, yPred2))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, yPred2))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, yPred2)))

### Linear Regression Function (RMSE)

In [None]:
try:
    del X_train['bias']
except:
    print('no bias to remove X_train')    
try:
    del X_test['bias']
except:
    print('no bias to remove X_test')
try:
    del X['bias']
except:
    print('no bias to remove X')


print('\n GPU: Linear Regression Function (RMSE):')
alphaMethod = 'const'
mu = 1
convCritList = [1e5]#, 1e4, 1e3, 1e2, 1e1, 1e0, 1e-1, 1e-2, 1e-3, 1e-4]#, 1e-5]
print('epsilon       | sum total error:   | sum relative error:  | iterations | Rsquare |    time/s')
for convergenceCriterion in convCritList:
    start = time.time()
    weights, score, iterations = linearRegressionNumpy.linearRegression(X_train, y_train, mu = mu, 
                                                        convergenceCriterion = convergenceCriterion, lossFunction = 'RMSE', 
                                                        alphaMethod = alphaMethod, printOutput = False)
    end = time.time()
    yPred2 = linearRegressionNumpy.predictLinearRegression(X_test, weights)



    print('{:13.0E} | {:19}| {:21}| {:11}| {:8.4f}| {:10.5f}'.format(convergenceCriterion, 
                                        str(np.sum(yPred2-y_pred1)), 
                                        str(np.sum((yPred2-y_pred1)/y_pred1)),
                                        str(iterations),
                                        r2_score(y_test, yPred2),
                                        end-start))

print('\nFinal weigths for smallest epsilon = {:2.0E}:'.format(convCritList[-1]))
print('weights = ', weights, '\n')

plt.title('GPUPerformance: scikit prediction')
plt.plot(y_pred1)
plt.ylabel('performance time(ms)')
plt.savefig('GPUPerformance_scikit_prediction_RMSE.jpeg', bbox_inches='tight')
plt.show()

plt.title('SeoulBikeSharing: our prediction (RMSE)')
plt.plot(yPred2)
plt.ylabel('performance time (ms)')
plt.savefig('GPUPerformance_our_prediction_RMSE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction (RMSE) vs. scikit prediction')
plt.plot(yPred2-y_pred1)
plt.ylabel('total error')
plt.savefig('GPUPerformance_total_error_RMSE.jpeg', bbox_inches='tight')
plt.show()

plt.title('GPUPerformance: our prediction (RMSE) vs. scikit prediction')
plt.plot((yPred2-y_pred1)/y_pred1)
plt.ylabel('relative error')
plt.savefig('GPUPerformance_relative_error_RMSE.jpeg', bbox_inches='tight')
plt.show()


**Evaluation metrics**

In [None]:
print('\n GPU: Linear Regression Function (RMSE):')
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, yPred2))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, yPred2))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, yPred2))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, yPred2)))

### KNN

**Dictionary creation to apply the mathematical functions of the algorithm**

Training Data Option:
- 0: All Data (except the target)
- 1: X_train/y_train (train_test_split)

In [None]:
training_data_option = 1

In [None]:
if training_data_option == 0:
    training_data = data
elif training_data_option == 1:
    training_data = data[data.index.isin(X_train.index)]
    test_data = data[data.index.isin(X_test.index)]
    
training_data

In [None]:
if training_data_option == 0:
    training_dictionary = training_data.to_dict('records')
elif training_data_option == 1:
    training_dictionary = training_data.to_dict('records')
    test_dictionary = test_data.to_dict('index')

In [None]:
training_dictionary

In [None]:
len(training_dictionary)

**Forecasting instances**

In [None]:
y_test

**Algorithm parameters**

In [None]:
mode = 1 # 1 = KNeighbors; 2 = RadiusNeighbors
n_neighbours = 5
distance_function = 1 # 1 = Euclidean Distance; 2 = Manhattan Distance
radius = 0 # 0 indicates no radius
label = 'Run'
features = ['Run1 (ms)','Run2 (ms)','Run3 (ms)','Run4 (ms)']

**Algorithm initialization**

In [None]:
knn = KNN(training_dictionary, label, features, mode, n_neighbours, distance_function, radius)

**Execution of the algorithm (forecasting)**

In [None]:
results = []

start = time.time()

if training_data_option == 0:
    for x in y_test.index:
        #print(x)
        target = training_dictionary[x-1]
        #print(target)
        result = knn.run(target)
        #print(result)
        results.append(result)
elif training_data_option == 1:
    for x in y_test.index:
        #print(x)
        target = test_dictionary[x]
        #print(target)
        result = knn.run(target)
        #print(result)
        results.append(result)
    
end = time.time()

**Predictions**

In [None]:
predictions = pd.Series(results,index=y_test.index)

In [None]:
predictions

**Evaluation metrics**

In [None]:
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f' % r2_score(y_test, predictions))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, predictions))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, predictions))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))
print("Time: %0.2f" % (end - start), "seconds")