In [1]:
from IPython.core.display import display,HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [11]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from sys import argv
import gzip
import copy
import json
from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA
from ipywidgets import interact_manual, widgets, interactive
from scipy import stats
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from random import randint
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
import random
from scipy import stats
from scipy.spatial import distance
import statistics
import scipy
import paths
import RKS
import RGS
import pickle

def save_variables(proc, filename):
    with open(filename+'.pickle', 'wb') as f:
        pickle.dump(proc, f)
        
def open_variables(filename):
    with open(filename+'.pickle', 'rb') as f:
        proc = pickle.load(f)
    return proc

# RKS --------------------------------------------------------------
# Because the nsight compute program doesn't have an exclusive
# export to CSV option, using the buffer stdout instead, we have to
# get rid of some lines, this function does that. 
def ignore_lines(file):
    # check the first 1000 lines, if the phrase "device__attribute_async_engine_count"
    # is found, else return -99
    # This phrase is always printed by nsight-compute.
    i = 0
    with open(file) as in_file:
        for line in in_file:
            if ("device__attribute_async_engine_count" in line):
                return i
            elif(i > 1000):
                return -99
            i += 1
            
# General open file function, takes in a nsight-compute csv filename, 
# returns a pandas table.
def open_file(filename):
    #print(filename)
    lines_to_ignore = ignore_lines(filename)
    if(lines_to_ignore == -99):
        lines_to_ignore = 0
    table = pd.read_csv(filename, skiprows=lines_to_ignore,low_memory=False)
    if(table.loc[0,'ID'] != 0.0):
        table = table.drop(0)
        table.index = range(len(table))
    # For certain systems nsight-compute prints
    # comma-deliniated numbers, e.g. 1,000 instead of 1000.
    # We need to get rid of that. 
    table = table.replace(',','', regex=True)
    return table

# List of agnostic features to take into consideration when performing
# the PCA (i.e. only these variables are going to be used)
agnostic_features = ['l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum',
                     'l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum',
                     #'l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum',
                     'smsp__inst_executed.sum','smsp__thread_inst_executed_per_inst_executed.ratio',
                     #'smsp__sass_inst_executed_op_global_atom.sum',
                     'smsp__inst_executed_op_global_ld.sum',
                     'smsp__inst_executed_op_global_st.sum',
                     'smsp__inst_executed_op_shared_ld.sum',
                     'smsp__inst_executed_op_shared_st.sum',
                     #'smsp__inst_executed_op_surface_atom.sum',
                     #'smsp__inst_executed_op_surface_ld.sum',
                     #'smsp__inst_executed_op_surface_red.sum',
                     #'smsp__inst_executed_op_surface_st.sum',
                     'sass__inst_executed_global_loads',
                     'sass__inst_executed_global_stores',
                     'launch__grid_size']

# Opens the cycles-profiled-only csvs generated by
# the nsight-compute utility, obtained using the 
# run_hw.py under the accel-sim utility.
def open_cycles(filename):
    #print(filename)
    lines_to_ignore = ignore_cycle_lines(filename)
    if(lines_to_ignore == -99):
        lines_to_ignore = 0
    table = pd.read_csv(filename, skiprows=lines_to_ignore,low_memory=False)
    return table

def ignore_cycle_lines(file):
    # check the first 1000 lines, if the phrase "Section Name" is not found
    # return -99. Same as ignore_lines above, but for the cycles-only version.
    i = 0
    with open(file) as in_file:
        for line in in_file:
            if ("Section Name" in line):
                return i
            elif(i > 1000):
                return -99
            i += 1
            
# Generates Pandas DataFrame, normalizing data, correcting for NaNs, in some systems
# Nsight Compute might return numbers with commas which are interpreted by pandas as strings
# rather than numbers
# Also concatenate Kernel Name, Cycles and Kernel ID to the table.
# No PCA is performed
def generate_DF(table, appendable_columns=['Kernel Name', 'gpc__cycles_elapsed.avg', 'ID']):
    x_large = table.loc[:,agnostic_features].values
    x_large = StandardScaler().fit_transform(x_large)
    temp = table.loc[:,agnostic_features]
    # Check if there is any non-numbers 
    for value in agnostic_features:
        if(temp[value].isnull().values.any()):
            print(value)
    for new_col in appendable_columns:
        temp = table[[new_col]]
        if "Name" not in new_col:
            temp = pd.to_numeric(pd.Series( table.loc[:,new_col] ))
        try:
            x_large = pd.concat([x_large, temp], axis = 1)
        except:
            x_large = pd.concat([pd.DataFrame(x_large), pd.DataFrame(temp)], axis = 1)
    return x_large

# Generates Pandas DataFrame, normalizing data, correcting for NaNs, in some systems
# Nsight Compute might return numbers with commas which are interpreted by pandas as strings
# rather than numbers
# Also concatenate Kernel Name, Cycles and Kernel ID to the table. 
# Same as above but performs PCA
def generate_PCA(table, pca_variation=0.9995, appendable_columns=['Kernel Name', 'gpc__cycles_elapsed.avg', 'ID'], debug=False):
    x_large = table.loc[:,agnostic_features].values
    #x_large = x_large.apply(lambda x: pd.to_numeric(x.astype(str).str.replace(',','')))
    #x_large = x_large.values
    x_large = StandardScaler().fit_transform(x_large)
    temp = table.loc[:,agnostic_features]
    for value in agnostic_features:
        if(temp[value].isnull().values.any()):
            print(value)
    pca_components = pca_variation
    pca = PCA(pca_components)
    principalComponents_Large = pca.fit_transform(x_large)
    principalDf_Large = pd.DataFrame(data = principalComponents_Large, columns = ['principal component '+str(x+1) for x in range(pca.n_components_)])
    for new_col in appendable_columns:
        temp = table[[new_col]]
        if "Name" not in new_col:
            temp = pd.to_numeric(pd.Series( table.loc[:,new_col] ))
        principalDf_Large = pd.concat([principalDf_Large, temp], axis = 1)
    #finalDf_Large = pd.concat([finalDf_Large,truncated_names], axis = 1)
    #finalDf_Large = pd.concat([finalDf_Large, nsight_csv[['ID']]], axis = 1)
    #finalDf_sorted = finalDf_Large#.sort_values('sm__inst_issued.avg.per_cycle_active [inst/cycle]')
    #%matplotlib inline
    #fig = plt.figure(figsize = (10,10))
    #ax = fig.add_subplot(111) 
    #ax.set_title('PCA components - variance', fontsize = 20)
    #plt.bar(range(pca.n_components_), pca.explained_variance_ratio_, color='black')
    #plt.xlabel('PCA features')
    #plt.ylabel('variance %')
    #plt.xticks(range(pca.n_components_))
    #plt.show()
    # Clustering
    #k_means = KMeans(n_clusters=2, random_state=2).fit(principalComponents_Large)
    #finalDf_sorted['Segments'] = k_means.labels_
    #print(principalDf_Large)
    return principalDf_Large, principalComponents_Large 

# Cluster PCA and Dataframe returned from PCA method above
# Runs Kmeans, sweeping the number of clusters, starting_from and ending_at control the specific cluster sweep values.
# Column_variable is the variable by which we calculate our error by.

def kmeans_clustering(dataFrame,principalComponents_dataFrame,starting_from=1, ending_at=20, column_variable='gpc__cycles_elapsed.avg',print_output=False):
    total_runtime = dataFrame[column_variable].sum()
    complete_groups = []
    random_per_K = []
    if(ending_at > len(dataFrame)):
        ending_at = len(dataFrame)+1
    results = {'random_choices_projections':[], 'random_choices_vectors':[], 'random_choices_names':[], 
               'random_choices_id':[], 'random_choices_speedups':[],'first_choices_projections':[],
               'center_choices_projections':[], 'center_choices_vectors':[], 'center_choices_names':[],
               'center_choices_id':[], 'center_choices_speedups':[], 'center_choices_errors': [],
               'complete_groups':[],
               'first_choices_vectors':[], 'first_choices_names':[], 'first_choices_id':[], 'group_count':[], 
               'total_runtime':total_runtime, 'group_number': [], 'errors': [], 'speedups': [], 'number_of_kernels': []}
    for i in range(starting_from, ending_at):
        random_choices = [] # Randomly select a kernel from the group
        random_choices_name = []
        random_choices_id = []
        mean_choices = []   # Select the kernel with the value closest to the mean
        max_choices = []    # Select the largest kernel value
        first_choices = []   # Selects the first chronological value
        first_choices_name = []
        first_choices_id = []
        center_choices_id = []
        closest_to_mean = []
        closest_to_mean_names = []
        group_count = []    # Number of elements inside the cluster i
        complete_groups_df = []
        k_means = KMeans(n_clusters=i, random_state=4).fit(principalComponents_dataFrame)
        dataFrame['Segments'] = k_means.labels_
        #center_ids = cluster_centers(dataFrame,principalComponents_dataFrame,k_means)
        per_group_random = []
        for group in np.unique(k_means.labels_):
            temp_df = dataFrame.loc[dataFrame['Segments'] == group]
            #closest_to_mean.append(temp_df.loc[center_ids[group], column_variable])
            #closest_to_mean_names.append(temp_df.loc[center_ids[group], 'Kernel Name'])
            complete_groups_df.append(temp_df)
            temp_df.index = range(len(temp_df))
            #value_first = temp_df.loc[0,column_variable]
            first_choices.append(temp_df.loc[0,column_variable])
            first_choices_name.append(temp_df.loc[0,'Kernel Name'])
            first_choices_id.append(temp_df.loc[0,'ID'])
            #center_choices_id.append(center_ids[group])
            temp_df_sorted = temp_df.sort_values(column_variable)
            temp_df_sorted.index = range(len(temp_df_sorted))
            group_count.append(len(temp_df))
            random_vals = []
            random_names = []
            random_ids = []
            random_choice = random.randint(0,len(temp_df)-1)
            random_choice_name = ''
            for i in range(10):
                random_choice_ = random.randint(0,len(temp_df))
                random_choice_name = ''
                try:
                    random_vals.append(temp_df_sorted.loc[random_choice_,column_variable] * len(temp_df))
                    random_names.append(temp_df_sorted.loc[random_choice, 'Kernel Name'])
                    random_ids.append(random_choice_)
                except:
                    random_vals.append(temp_df_sorted.loc[0,column_variable] * len(temp_df))
                    random_names.append(temp_df_sorted.loc[0, 'Kernel Name'])
                    random_ids.append(random_choice)
            max_choice = temp_df_sorted.loc[len(temp_df)-1, column_variable]
            try:
                value_mean = temp_df_sorted.loc[int(len(temp_df)/2),column_variable]
            except:
                print(int(len(temp_df)/2))
            try:
                value_random = temp_df_sorted.loc[random_choice,column_variable]
                random_choice_name = temp_df_sorted.loc[random_choice, 'Kernel Name']
            except:
                print("Why would this happen?")
                print("Random Choice: "+str(random_choice))
                print("Length dataFrame: "+str(len(temp_df_sorted)))
                value_random = temp_df_sorted.loc[0,column_variable]
                random_choice_name = temp_df_sorted.loc[0,'Kernel Name']
            #value_random = 0
            random_choices.append(value_random)
            random_choices_id.append(random_choice)
            random_choices_name.append(random_choice_name)
            mean_choices.append(value_mean)
            max_choices.append(max_choice)
            per_group_random.append({'random_vals': random_vals, 'random_names': random_names, 'random_ids': random_ids})
        complete_groups.append(complete_groups_df)
        random_runtime = [random_choices[i] * group_count[i] for i in range(len(random_choices))]
        mean_runtime = [mean_choices[i] * group_count[i] for i in range(len(random_choices))]
        max_runtime = [max_choices[i] * group_count[i] for i in range(len(random_choices))]
        first_runtime = [first_choices[i] * group_count[i] for i in range(len(first_choices))]
        #closest_runtime = [closest_to_mean[i] * group_count[i] for i in range(len(random_choices))]
        random_per_K.append(per_group_random)
        if(print_output):
            print('For '+str(i)+ ' groups')
            print('----------------------------------------------')
            #print(dataFrame[column_variable])
            print('The actual run time is: '+str(total_runtime))
            print('----------------- Projections ----------------')
            print(' ')
            print('The random value run time is: '+str(np.sum(random_runtime)))
            print('The error is '+ str(np.sum(random_runtime) / total_runtime ))
            print('The mean value run time is: '+str(np.sum(mean_runtime)))
            print('The error is '+ str(np.sum(mean_runtime) / total_runtime ))
            print('The max value run time is: '+str(np.sum(max_runtime)))
            print('The error is '+ str(np.sum(max_runtime) / total_runtime ))
            print('The first value run time is: '+str(np.sum(first_runtime)))
            print('The error is '+ str(np.sum(first_runtime) / total_runtime ))
            print('The speedup is '+ str(total_runtime / np.sum(first_choices)))
            print('The reduced number of kernels '+str(len(first_runtime)))
            print('The total number of kernels '+str(len(dataFrame)))
            print('The first choice vector is '+str(first_choices))
            print('The number of elements per group is '+str(group_count))
            print('The names of the first choices are '+str(first_choices_name))
            print('The kernel IDs of the first choices are '+str(first_choices_id))
        #print('Their product is '+str( [first_choices[i] * group_count[i] for i in range(len(first_choices))] ))
            print(' ')
        results['errors'].append(np.abs((np.sum(first_runtime)-total_runtime)) / total_runtime)
        results['speedups'].append(total_runtime / np.sum(first_choices))
        results['first_choices_vectors'].append(first_choices)
        results['first_choices_projections'].append(first_runtime)
        results['first_choices_names'].append(first_choices_name)
        results['first_choices_id'].append(first_choices_id)
        results['random_choices_vectors'].append(random_choices)
        results['random_choices_projections'].append(random_runtime)
        results['random_choices_names'].append(random_choices_name)
        results['random_choices_id'].append(random_choices_id)
        #results['center_choices_projections'].append(closest_runtime)
        #results['center_choices_id'].append(center_choices_id)
        #results['center_choices_vectors'].append(closest_to_mean)
        #results['center_choices_names'].append(closest_to_mean_names)
        #results['center_choices_errors'].append(np.abs((total_runtime - np.sum(closest_runtime))) / total_runtime)
        results['group_count'].append(group_count)
        results['group_number'].append(group)
        results['complete_groups'].append(complete_groups)
        results['number_of_kernels'].append(len(dataFrame))
        results['random_per_K'] = random_per_K
        #print(finalDf_sorted.loc[finalDf_sorted['Segments'] == group])
    return results
#kmeans_clustering(finalDf_sorted, principalComponents_Large)

# Returns the ID of the element closest to the group's centers
def cluster_centers(dataFrame, principalComponents_dataFrame, k_means, debug=False):
    centers_vector = []
    centers_ids = []
    clusters_centers = k_means.cluster_centers_
    temp_pca_df = dataFrame.copy()
    #labels = ['principal component '+str(x+1) for x in range(len(np.unique(k_means.labels_)))]
    for group in np.unique(k_means.labels_):
        temp_df = dataFrame.loc[dataFrame['Segments'] == group]
        temp_grouped_pca_df = temp_df.filter(regex='principal')
        original_indeces = temp_grouped_pca_df.index.values.tolist()
        temp_grouped_pca_df.index = range(len(temp_grouped_pca_df))
        minimum_distance = temp_grouped_pca_df.loc[0,:].values.tolist()
        minimum_index = original_indeces[0]
        #print(minimum_distance)
        if(debug):
            print('The clusters centers are: '+str(clusters_centers[group]))
            print('The first data point is : '+str(minimum_distance))
        minimum_distance = distance.euclidean(minimum_distance, clusters_centers[group])
        if(debug):
            print(minimum_distance)
        for i in range(len(temp_grouped_pca_df)):
            data_point = temp_grouped_pca_df.loc[i,:].values.tolist()
            temp_distance = distance.euclidean(data_point, clusters_centers[group])
            if(temp_distance < minimum_distance):
                minimum_distance = temp_distance
                minimum_index = original_indeces[i]
        if(debug):
            print(minimum_distance, minimum_index)
        centers_ids.append(minimum_index)
    if(debug):
        print('Returns from function \nNew iteration\n\n')
    return centers_ids

def point_closest_to_all(dataFrame, principalComponents_dataFrame, k_means, debug=False):
    closest_ids = []
    for group in np.unique(k_means.labels_):
        temp_df = dataFrame.loc[dataFrame['Segments'] == group]
        temp_grouped_pca_df = temp_df.filter(regex='principal')
        original_indeces = temp_grouped_pca_df.index.value.tolist()
    
def check_results(dataFrame, best_choices, group_counts, column_variable='gpc__cycles_elapsed.avg',print_debug=True):
    result_dictionary = {}
    total_runtime = pd.to_numeric(dataFrame[column_variable]).sum()
    runtime_list = []
    projected_runtime = 0.0
    for i in range(len(best_choices)):
        index = int(best_choices[i])
        runtime_list.append(float(dataFrame.loc[index, column_variable]))
        projected_runtime += float(dataFrame.loc[index, column_variable]) * group_counts[i]
    if(print_debug):
        print('The actual runtime is: '+str(total_runtime))
        print('The projected runtime with the borrowed K-Means is: '+str(projected_runtime))
        print('The ratio is '+str(projected_runtime/total_runtime))
    result_dictionary = {'total_runtime': total_runtime, 'projected_runtime': projected_runtime,
                         'error': np.abs(projected_runtime - total_runtime)/total_runtime,
                         'speedup': total_runtime / np.sum(runtime_list), 'reduced_runtime': np.sum(runtime_list), 
                         'runtime_list': runtime_list}
    return result_dictionary 

In [12]:
# Open Nsight Compute CSV
table = open_file("tiny_test.csv")
# Apply PCA
# We can adjust the number of principal components with the pca_variation argument
df, pca = generate_PCA(table)
#results = kmeans_clustering(df, pca,print_output = True)
#By default the number of clusters sweeped goes from 1 cluster to 20.
results = kmeans_clustering(df, pca)

In [13]:
# This prints the groups compositon and group weights/counts 
for i,(x,y) in enumerate(zip(results['first_choices_id'], results['group_count'])):
    print("For " + str(i+1) +" clusters ")
    print("Principal Kernel: " + str(x))
    print("Group Count: " + str(y))

For 1 clusters 
Principal Kernel: [0.0]
Group Count: [34]
For 2 clusters 
Principal Kernel: [1.0, 0.0]
Group Count: [32, 2]
For 3 clusters 
Principal Kernel: [1.0, 0.0, 12.0]
Group Count: [31, 2, 1]
For 4 clusters 
Principal Kernel: [6.0, 0.0, 12.0, 1.0]
Group Count: [26, 2, 1, 5]
For 5 clusters 
Principal Kernel: [6.0, 4.0, 0.0, 12.0, 2.0]
Group Count: [25, 1, 2, 1, 5]
For 6 clusters 
Principal Kernel: [1.0, 6.0, 4.0, 12.0, 8.0, 0.0]
Group Count: [4, 25, 1, 1, 2, 1]
For 7 clusters 
Principal Kernel: [1.0, 6.0, 4.0, 12.0, 8.0, 0.0, 26.0]
Group Count: [4, 23, 1, 1, 2, 1, 2]
For 8 clusters 
Principal Kernel: [2.0, 9.0, 4.0, 12.0, 1.0, 0.0, 8.0, 26.0]
Group Count: [5, 21, 1, 1, 1, 1, 2, 2]
For 9 clusters 
Principal Kernel: [13.0, 4.0, 12.0, 2.0, 0.0, 26.0, 8.0, 6.0, 1.0]
Group Count: [16, 1, 1, 3, 1, 2, 2, 7, 1]
For 10 clusters 
Principal Kernel: [13.0, 0.0, 12.0, 5.0, 4.0, 1.0, 8.0, 2.0, 26.0, 9.0]
Group Count: [16, 1, 1, 3, 1, 1, 2, 2, 2, 5]
For 11 clusters 
Principal Kernel: [13.0, 4.0

In [14]:
# This prints the clusters found by applying KMeans to the PCA of the data with
# the n_clusters argument
n_clusters = 5
labels = KMeans(n_clusters=n_clusters, random_state=4).fit(pca).labels_
print("Group vector: "+str(labels))
print("Unique groups: "+str(np.unique(labels)))
print("There are "+str(len(np.unique(labels)))+" different groups")

Group vector: [2 2 4 4 1 4 0 0 4 0 4 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Unique groups: [0 1 2 3 4]
There are 5 different groups


In [15]:
# Keys for the result object
for key in results.keys():
    print(key)

random_choices_projections
random_choices_vectors
random_choices_names
random_choices_id
random_choices_speedups
first_choices_projections
center_choices_projections
center_choices_vectors
center_choices_names
center_choices_id
center_choices_speedups
center_choices_errors
complete_groups
first_choices_vectors
first_choices_names
first_choices_id
group_count
total_runtime
group_number
errors
speedups
number_of_kernels
random_per_K


In [16]:
# Errors as a function of clusters
# Note that different runs will results in different errors as
# KMeans is non-deterministic, and the composition of groups will not 
# be always the same.

results['errors']

[0.6029342539891235,
 0.2515317369681213,
 0.2480030403286521,
 0.20360076841346875,
 0.1417213902736887,
 0.06342859241343914,
 0.01290918881733483,
 0.19104994533078895,
 0.00047700711594153686,
 0.09569361682959641,
 0.09694929209562891,
 0.1121320005702361,
 0.046034898902943586,
 0.023535689554819796,
 0.0757274831887754,
 0.04074227748500013,
 0.029817404386681706,
 0.0024332196266228564,
 0.008450794197165594]