In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('all_data.csv', index_col = 0)

# Drop columns with NaN
df = df.dropna(axis=1,how='any')

if False:
    # Turn confusing index string into a unique hash, and drop the string
    # (NOT USED FOR NOW.. )
    import hashlib
    def _my_hash(s):
        return int(
            hashlib.sha256(s.encode('utf-8')).hexdigest(), 16
        ) % 10**12
    hash_list = [_my_hash(val) for val in df.index]
    df['unique_hash'] = hash_list
    df.reset_index(drop=True, inplace=True)
    df = df.set_index('unique_hash')
else:
    df.reset_index(drop=True, inplace=True)

df.head()

Unnamed: 0,inst_per_warp,kernelname,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,...,single_precision_fu_utilization,double_precision_fu_utilization,flop_hp_efficiency,flop_sp_efficiency,flop_dp_efficiency,sysmem_read_utilization,sysmem_write_utilization,architecture,application_name,input
0,56.00056,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002058,0.0,0.0,0.0,0.0,...,2,1,0.0,0.0,0.047434,0,1,P100,backprop,-100000_bpnn_adjust_weights_cuda
1,184.0,bpnn_layerforward_CUDA,1.0,0.943953,0.761888,0.000633,0.645833,0.696429,0.0,0.0,...,6,0,0.0,0.004695,0.0,0,1,P100,backprop,-100000_bpnn_layerforward_CUDA
2,56.0056,bpnn_adjust_weights_cuda,1.0,0.99995,0.999948,0.016234,0.0,0.0,0.0,0.0,...,3,2,0.0,0.0,0.043993,0,1,P100,backprop,-10000_bpnn_adjust_weights_cuda
3,184.0,bpnn_layerforward_CUDA,1.0,0.943953,0.761888,0.006918,0.645833,0.696429,0.0,0.0,...,6,0,0.0,0.003017,0.0,0,1,P100,backprop,-10000_bpnn_layerforward_CUDA
4,56.00056,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002492,0.0,0.0,0.0,0.0,...,2,1,0.0,0.0,0.048306,0,1,P100,backprop,-100016_bpnn_adjust_weights_cuda


In [15]:
# Define peak memory bandwidth
peak_mem_bw = {
    "V100": 898.048 * (1024*1024*1024),
    "P100": 549.0 * (1024*1024*1024),
}
mem_bw_thresh = 0.75

# Add a column specifying if the case is memory bound
df_archs = []
for arch in peak_mem_bw.keys():
    df_tmp = df[df["architecture"] == arch].copy()
    new_col = (
        df_tmp["dram_read_throughput"] + df_tmp["dram_write_throughput"]
    ) / peak_mem_bw[arch]
    new_col = new_col > mem_bw_thresh
    df_tmp["memory_bound"] = new_col
    df_archs.append(df_tmp.copy())
df_merged = pd.concat(df_archs).sort_index()
df_merged.head()

Unnamed: 0,inst_per_warp,kernelname,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,...,double_precision_fu_utilization,flop_hp_efficiency,flop_sp_efficiency,flop_dp_efficiency,sysmem_read_utilization,sysmem_write_utilization,architecture,application_name,input,memory_bound
0,56.00056,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002058,0.0,0.0,0.0,0.0,...,1,0.0,0.0,0.047434,0,1,P100,backprop,-100000_bpnn_adjust_weights_cuda,False
1,184.0,bpnn_layerforward_CUDA,1.0,0.943953,0.761888,0.000633,0.645833,0.696429,0.0,0.0,...,0,0.0,0.004695,0.0,0,1,P100,backprop,-100000_bpnn_layerforward_CUDA,False
2,56.0056,bpnn_adjust_weights_cuda,1.0,0.99995,0.999948,0.016234,0.0,0.0,0.0,0.0,...,2,0.0,0.0,0.043993,0,1,P100,backprop,-10000_bpnn_adjust_weights_cuda,False
3,184.0,bpnn_layerforward_CUDA,1.0,0.943953,0.761888,0.006918,0.645833,0.696429,0.0,0.0,...,0,0.0,0.003017,0.0,0,1,P100,backprop,-10000_bpnn_layerforward_CUDA,False
4,56.00056,bpnn_adjust_weights_cuda,1.0,0.999995,0.999994,0.002492,0.0,0.0,0.0,0.0,...,1,0.0,0.0,0.048306,0,1,P100,backprop,-100016_bpnn_adjust_weights_cuda,False


In [16]:
# Convert bool "memory_bound" column to integers
df_merged["memory_bound"]= df_merged["memory_bound"].astype('int')

In [17]:
# Save intermediate dataframe to parquet file
df_merged.to_parquet('df.clean.parquet.gzip', compression='gzip')

## How to read it back (very fast), if needed:
#df_merged = pd.read_parquet('df.clean.parquet.gzip')

In [18]:
# Dropping everything not "V100"
df_dataset = df_merged[df_merged['architecture'] == 'P100']

# Note: To include "P100" and "V100", use `df_dataset = df_merged.copy()`

In [19]:
# List of columns we want to drop...

# User specified
drop_cols = [
    'memory_bound',
    'dram_read_throughput',
    'dram_write_throughput',
]
# Automatically detect non numerical columns
for col in df_dataset:
    if df_dataset[col].dtype == 'object':
        drop_cols.append(col)

In [20]:
# Get prediction target (memory_bound column):
target = df_dataset['memory_bound'].values
        
# Convert numerical columns to out training/test
data = df_dataset.drop(drop_cols, axis=1).values

In [21]:
# Create reandom list of indices
np.random.seed(42)

# Choose fraction of data for training
# (rest will be for tests, for now)
frac_train = 0.7

# Shuffle data using random permutation
ind = np.random.permutation([i for i in range(len(df_dataset.index))])
n_train = int(frac_train * len(df_dataset.index))
n_test = int((1.0 - frac_train) * len(df_dataset.index))

In [22]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(data)
scaled_data = scaler.transform(data)

# Define training set
X_train = scaled_data[ind[:n_train]]
y_train = target[ind[:n_train]]

# Define test set
X_test = scaled_data[ind[n_train:]]
y_test = target[ind[n_train:]]

In [23]:
# Returns a list of weights for the specified dataset
# indices (`indices`), given the parent dataframe (`df`)
# and a dictionary of weights to apply. Application names
# that are not included in `weights` will be set to 1.0
def get_w_vec(df, indices, weights):
    w = []
    for ind in indices:
        default = 1.0
        name = df.iloc[ind]["application_name"]
        w.append(weights.get(name, default))
    return w

train_weights = get_w_vec(
    df_dataset,
    ind[:n_train],
    {
        "backprop": 1.0,
        "srad": 2.0,
    },
)   

In [24]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Create a random forest Classifier. By convention, clf means 'Classifier'
#clf = RandomForestClassifier(random_state=42)
clf = GridSearchCV(
    RandomForestClassifier(n_estimators=100),
    param_grid=dict(max_features=[110]),
    scoring='accuracy',
    cv=2
)

# Train the Classifier to take the training features and learn how they relate
# to the training y (the species)
clf.fit(X_train, y_train)

GridSearchCV(cv=2, error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_features': [110]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring='accuracy',
       verbose=0)

In [25]:
# Simple training-set accuracy calculation
y_test_predict = clf.predict(X_test)

correct = 0
for p, t in zip(y_test_predict, y_test):
    if p == t:
        correct += 1
accuracy = correct / len(y_test)
accuracy

1.0

In [26]:
sum(y_test) / len(y_test)

0.00010319917440660474