In [44]:
import pandas as pd
import numpy as np

# Read in Data

In [45]:
# Master dataframe with all columns, including `memory_bound` and `master_index`.
# Each row corresponds to distinct architecture and run

df_master = pd.read_parquet('df_master.parquet')
df_master.master_index = df_master.master_index.astype('int64') # Make sure index is integer
df_master.shape

(78330, 122)

In [46]:
# Combining V100 and P100 on same row for same run
# We are deleting cases where there is no run for either of the architectures
# Every column name is appended with the name of the architecture (e.g. "_V100");
# This includes the `master_index` (e.g `master_index_V100`)

df_joined = pd.read_parquet('df_master_joined.parquet')
df_joined.master_index_P100 = df_joined.master_index_P100.astype('int64') # Make sure index is integer
df_joined.master_index_V100 = df_joined.master_index_V100.astype('int64') # Make sure index is integer
df_joined.shape

(32291, 244)

In [47]:
# This is an "empty" dataframe (meaning no rows), containing
# column names for numerical data only.
# The column nmaes can be used to index the columns of the
# scaled data (in master_scaled_data.npy)

df_columns_only = pd.read_parquet('df_column_reference.parquet')
df_columns_only

Unnamed: 0,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,gld_transactions_per_request,...,cf_fu_utilization,special_fu_utilization,half_precision_fu_utilization,single_precision_fu_utilization,double_precision_fu_utilization,flop_hp_efficiency,flop_sp_efficiency,flop_dp_efficiency,sysmem_read_utilization,sysmem_write_utilization


In [48]:
# This is a 2-D numpy array corresponding to the numerical data in 'df_master.parquet'
# The data has been scaled using the StandardScaler in scikitlearn

# Notes: 
#   - The row indices correspond to the `master_index` column of 'df_master.parquet'
#   - The columns correspond to the columns in 'df_column_reference.parquet'.
#     (e.g. can use `df.get_loc(column-name)` to get the column index)

master_data_scaled = np.load('master_scaled_data.npy')
master_data_scaled.shape

(78330, 116)

# P100 to V100 Memory-bound Classifier

In [49]:
from sklearn.model_selection import train_test_split

df = df_joined.copy()  # Start with all of df_joined

# Target index and values (values are NOT in master_data_scaled)
target_index = df['master_index_V100'].values
target = df['memory_bound_V100'].values

# Training data index and values (values ARE in master_data_scaled)
data_index = df['master_index_P100'].values
data = master_data_scaled[ data_index ]

# Need to drop 'dram_read_throughput' and 'dram_write_throughput'
indices = []
for i, col in enumerate(df_columns_only.columns):
    if col not in ['dram_read_throughput', 'dram_write_throughput']:
        indices.append(i)       
data = data[:,indices]

In [50]:
# Split the data for training
(
    X_train, X_test,
    y_train, y_test,
    #data_index_train, data_index_test,
    #target_index_train, target_index_test
) = train_test_split(
    data,
    target,
    #data_index,
    #target_index,
    random_state=42,
    test_size=.33
)

In [51]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_jobs=2, n_estimators=1000)
model = clf.fit(X_train, y_train)

In [52]:
# Simple training-set accuracy calculation
y_test_predict = clf.predict(X_test)

correct = 0
for p, t in zip(y_test_predict, y_test):
    if p == t:
        correct += 1
accuracy = correct / len(y_test)
accuracy

0.9717556535610397

In [53]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_test_predict)
cm = cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
print(cm)

[[0.98778833 0.01221167]
 [0.17100372 0.82899628]]


In [54]:
all_prediction = clf.predict(data)
all_prediction.sum() / len(all_prediction)

0.0981078319036264

In [55]:
data_index

array([    0,     1,     2, ..., 67322, 67323, 67324])

In [56]:
true_indices = []
for i, (ind0, ind1) in enumerate(zip(data_index, target_index)):
    if all_prediction[i] == 1:
        true_indices.append(ind0)
        true_indices.append(ind1)
true_indices = np.array(true_indices)


# Save true_indices to 'predicted_v100_memory_bound_indices.npy'
np.save('predicted_v100_memory_bound_indices.npy', true_indices)

In [57]:
true_indices

array([   18, 20014,    20, ..., 68255, 45123, 68258])

# Generate "Pridicted" Dataframe

Let's create a dataframe of Cases with V100 memory-bound predictions.

In [58]:
real_index = []
for i, val in enumerate(df_master['master_index']):
    if val in true_indices:
        real_index.append(i)
        
df_predicted = df_master.iloc[real_index].copy()
df_write = df_predicted.drop(['master_index', 'memory_bound'], axis=1).copy()
df_write[df_write['architecture'] == 'V100'].to_csv('predicted_true_v100.csv', index=False)
df_write[df_write['architecture'] == 'P100'].to_csv('predicted_true_p100.csv', index=False)

In [59]:
df_test_p100 = pd.read_csv('predicted_true_p100.csv')
df_test_v100 = pd.read_csv('predicted_true_v100.csv')
df_test_v100.shape

(3168, 120)

In [60]:
pwd

'/Users/yzamora/power/nvidia_gpus/all_apps'

In [61]:
df_test_p100

Unnamed: 0,inst_per_warp,kernelname,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,...,single_precision_fu_utilization,double_precision_fu_utilization,flop_hp_efficiency,flop_sp_efficiency,flop_dp_efficiency,sysmem_read_utilization,sysmem_write_utilization,architecture,application_name,input
0,56.000559,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002107,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.047838,0,1,P100,backprop,-100128_bpnn_adjust_weights_cuda
1,56.000559,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002062,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.047830,0,1,P100,backprop,-100144_bpnn_adjust_weights_cuda
2,56.000559,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.003885,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.047925,0,1,P100,backprop,-100176_bpnn_adjust_weights_cuda
3,56.000559,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002053,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.047793,0,1,P100,backprop,-100224_bpnn_adjust_weights_cuda
4,56.000558,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002061,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.048214,0,1,P100,backprop,-100400_bpnn_adjust_weights_cuda
5,56.000558,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002044,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.048588,0,1,P100,backprop,-100432_bpnn_adjust_weights_cuda
6,56.000555,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002052,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.048593,0,1,P100,backprop,-100832_bpnn_adjust_weights_cuda
7,56.000554,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002062,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.048200,0,1,P100,backprop,-100992_bpnn_adjust_weights_cuda
8,56.000554,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002062,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.047534,0,1,P100,backprop,-101136_bpnn_adjust_weights_cuda
9,56.000548,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.003801,0.0,0.0,0.0,0.0,...,2,1,0.0,0.000000,0.047556,0,1,P100,backprop,-102112_bpnn_adjust_weights_cuda
