In [34]:
import pandas as pd
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil

output_folder = 'data/output/'
# output_folder = 'run_1/mpi/data/output/'
# output_folder = 'run_2(mac)/data/output/'

# If fig folder exists remove it and its contents recursively
if os.path.exists(output_folder+"fig/"):
    shutil.rmtree(output_folder+"fig/")
os.mkdir(output_folder+"fig/")

fontsize = 22
labelsize = 18

In [35]:
# df = pd.read_csv("data/output/result.csv", header=None)
df = pd.read_csv(output_folder+"result.csv")
df.columns = ['algo', 'bucket update time', 'core', 'dataset', 'degree call time', 'execution time', 'neighborhood call time', 'num bucket update', 'num degree computation', 'num neighborhood computation',  'num subgraph call','param_s', 'subgraph computation time']
print(df.shape)
# df.head()

(142, 13)


Unnamed: 0,algo,bucket update time,core,dataset,degree call time,execution time,neighborhood call time,num bucket update,num degree computation,num neighborhood computation,num subgraph call,param_s,subgraph computation time
0,naive_nbr,0.092375,"{'3': 186, '2': 186, '13': 186, '10': 186, '17...",bin_2,0.0,18.734513,1.280646,30995.0,0.0,30995.0,500.0,1.0,17.204791
1,naive_nbr,0.085289,"{'3': 186, '2': 186, '13': 186, '10': 186, '17...",bin_2,0.0,18.199238,1.236897,30995.0,0.0,30995.0,500.0,1.0,16.72401
2,naive_nbr,0.087244,"{'3': 186, '2': 186, '13': 186, '10': 186, '17...",bin_2,0.0,18.236245,1.240288,30995.0,0.0,30995.0,500.0,1.0,16.756796
3,naive_nbr,0.084747,"{'3': 186, '2': 186, '13': 186, '10': 186, '17...",bin_2,0.0,17.923768,1.219865,30995.0,0.0,30995.0,500.0,1.0,16.471322
4,improved_nbr,0.045308,"{'3': 186, '2': 186, '10': 186, '13': 186, '17...",bin_2,0.0,25.368895,0.895802,21207.0,0.0,21207.0,500.0,1.0,16.612531


In [18]:
# # df = pd.read_csv("data/output/result.csv", header=None)
# df = pd.read_csv(output_folder+"result.csv")
# df.columns = ['algo', 'bucket update time', 'core', 'dataset', 'degree call time', 'execution time', 'neighborhood call time', 'num bucket update', 'num degree computation', 'num neighborhood computation', 'param_s']
# print(df.shape)
# # df

(663, 11)


### sample plotting

In [33]:
save = True


sns.set(rc={'figure.figsize':(5,4)})
sns.set_style("whitegrid", {'axes.grid' : True})



good_name = {
    'bucket update time' : 'Bucket time (s)', 
    'execution time' : "Time (s)", 
    'neighborhood call time' : "NB time (s)", 
    'subgraph computation time' : "Sub time(s)",
    'num bucket update' : "#bucket", 
    'num neighborhood computation' : "#nb",
    'num subgraph call' : "#subgraph"
}

good_name_algo = {
    'naive_nbr' : "NBR", 
    'improved_nbr' : "NBR(1)", 
    'improved2_nbr' : "NBR(2)",
    'naive_degree' : "DEG"
}




group_list = ['dataset']
for key, item in df.groupby(group_list, as_index=False):
    # key contains dataset
    item['param_s'] = item['param_s'].astype(int)
    item = item.replace({"algo": good_name_algo})
    for y_axis in ['bucket update time', 'execution time', 'neighborhood call time', 'num bucket update', 'num neighborhood computation', 'subgraph computation time', 'num subgraph call']:

        sns.barplot(x='algo', y=y_axis, data=item, palette='colorblind', order=['NBR', 'NBR(1)', 'NBR(2)'])
        plt.xlabel('Algorithm', fontsize=fontsize)
        plt.ylabel(good_name[y_axis], fontsize=fontsize)
        # plt.yscale("log")
        plt.xticks(fontsize=labelsize)
        plt.yticks(fontsize=labelsize)
        plt.title(key)
        plt.tight_layout()
        filename = (key + " " + y_axis).replace(" ", "_")
        if(save):
            plt.savefig(output_folder+"fig/" + filename + ".pdf")
        else:
            print(filename)
            plt.show()
        plt.clf()

    # Commnet out following to see full results
    if(not save):
        break

<Figure size 360x288 with 0 Axes>

### Effect of parameter "s" in improved2_nbr

In [5]:
save = True

group_list = ['dataset']
for key, item in df[df['algo'] == 'improved2_nbr'].groupby(group_list, as_index=False):
    
    # key contains dataset
    item['param_s'] = item['param_s'].astype(int)

    for y_axis in ['bucket update time', 'execution time', 'neighborhood call time', 'num bucket update', 'num neighborhood computation']:

        sns.barplot(x='param_s', y=y_axis, data=item, palette='colorblind')
        plt.xlabel(r'$s$', fontsize=fontsize)
        plt.ylabel(good_name[y_axis], fontsize=fontsize)
        # plt.yscale("log")
        plt.xticks(fontsize=labelsize)
        plt.yticks(fontsize=labelsize)

        plt.title(key)
        # plt.legend(loc='best', fontsize=labelsize-4)
        plt.tight_layout()
        filename = (key + " param_s " + y_axis).replace(" ", "_")
        if(save):
            plt.savefig(output_folder+"fig/" + filename + ".pdf")
        else:
            print(filename)
            plt.show()
        plt.clf()

    # Commnet out following to see full results
    if(not save):
        break


<Figure size 360x288 with 0 Axes>

In [28]:
df.columns

Index(['algo', 'bucket update time', 'core', 'dataset', 'degree call time',
       'execution time', 'neighborhood call time', 'num bucket update',
       'num degree computation', 'num neighborhood computation', 'param_s'],
      dtype='object')

### statistics of run

In [30]:
group_list = ['dataset', 'algo', 'param_s']
for key, item in df.groupby(group_list, as_index=False):
    print(key, item.shape[0])

('default', 'improved2_nbr', 16.0) 100
('default', 'improved_nbr', 13.0) 100
('default', 'naive_degree', 13.0) 100
('default', 'naive_nbr', 1.0) 100


In [31]:
# df[(df['algo'] == "improved2_nbr") & (df['dataset'] == 'bin_4') & (df['param_s'] == 6)]