In [1]:
from matplotlib import pyplot as plt

%matplotlib ipympl

plt.rcParams['figure.figsize'] = [10, 5]
plt.rcParams['font.size'] = 7.5
# 用来正常显示中文标签
plt.rcParams['font.family'] = 'sans-serif' 
plt.rcParams['figure.autolayout'] = True
# 用来正常显示负号
plt.rcParams['axes.unicode_minus'] = False 
plt.rcParams['axes.xmargin'] = 0
plt.rcParams['axes.ymargin'] = 0

In [2]:
import numpy as np
import pandas as pd
import pylatex
pd.set_option('precision', 2)

from calculate_increases import calculate_increases
from line_chart_with_multiple_lines import line_chart_with_multiple_lines
from grouped_bar_chart import grouped_bar_chart
from parse_data import *

In [3]:
from collections import Counter
import re
import json
import glob
import os
import os.path

import pandas as pd


def parse_json_format_profile_data_directory(directory):
    types = os.listdir(directory)
    
    for type in types:
        type_directory = os.path.join(directory, type)
        
        for benchmark in os.listdir(type_directory):
            if not benchmark.endswith('.npy'):
                benchmark_filepath = os.path.join(type_directory, benchmark)

                with open(benchmark_filepath, 'r') as fp:
                    # print(benchmark_filepath)
                    benchmark_json_data = json.load(fp)

                    yield type, benchmark, benchmark_json_data


def parse_profile_data_in_directory_to_dataframe(parser, key_names, include_average=False):
    data = {
        'executable_type': [],
        'benchmark': []
    }
    
    for key_name in key_names:
        data[key_name] = []
    
    for executable_type, benchmark, key_data in parser:
        data['executable_type'].append(executable_type)
        data['benchmark'].append(benchmark)
        for key_name in key_names:
            data[key_name].append(key_data[key_name])
    
    if include_average:
        for key_name in key_names:
            executable_types_to_key_data_sums = Counter()
            executable_types_to_number_of_benchmarks = Counter()

            for executable_type, key_data in zip(data['executable_type'], data[key_name]):
                executable_types_to_key_data_sums[executable_type] += key_data
                executable_types_to_number_of_benchmarks[executable_type] += 1

            for executable_type, key_data_sum in executable_types_to_key_data_sums.items():
                data['executable_type'].append(executable_type)
                data['benchmark'].append('average')
                data[key_name].append(key_data_sum / executable_types_to_number_of_benchmarks[executable_type])
    
    return pd.DataFrame(data)

In [4]:
cache_data = parse_profile_data_in_directory_to_dataframe(
    parse_json_format_profile_data_directory('writes_on_hottest_stack_address_with_cache'),
    ["writes", "mean", "median", "std", "cache_hits", "cache_misses"]
)

In [5]:
cache_data

Unnamed: 0,executable_type,benchmark,writes,mean,median,std,cache_hits,cache_misses
0,new_loop2recursion-recursion_depth_limit-64,bf,15592,4155,628,5140,25597536,4624328
1,new_loop2recursion-recursion_depth_limit-64,bitcnts,14064,8459,11012,4624,56743620,1847476
2,new_loop2recursion-recursion_depth_limit-64,dijkstra,30169,13906,13503,10446,57084262,9670863
3,new_loop2recursion-recursion_depth_limit-64,patricia,10400,2364,2520,1525,26274965,1673930
4,new_loop2recursion-recursion_depth_limit-64,fft,1095,486,514,370,12928693,233731
5,new_loop2recursion-recursion_depth_limit-64,rawcaudio,21902,18240,20914,6062,41791437,9019772
6,new_loop2recursion-recursion_depth_limit-64,susan,439,213,224,124,43016028,228496
7,new_loop2recursion-recursion_depth_limit-64,crc_32,668,132,2,257,34090017,349372
8,new_loop2recursion-recursion_depth_limit-64,pbmsrch,6,3,4,1,60839,3344
9,new_loop2recursion-recursion_depth_limit-64,sha,156,98,104,45,15041564,42351


In [6]:
executable_type_renaming = {
    'baseline': 'No WL',
    'new_loop2recursion-recursion_depth_limit-64': 'Non Selective',
    'selective_loop2recursion': 'Selective'
}

In [7]:
filtered_cache_data = cache_data[
    cache_data['executable_type'].isin(executable_type_renaming)
].replace(
    to_replace=executable_type_renaming
).sort_values(
    by=['benchmark', 'executable_type']
)

filtered_cache_data

Unnamed: 0,executable_type,benchmark,writes,mean,median,std,cache_hits,cache_misses
25,No WL,basicmath,301,27,2,57,18804069,117496
12,Non Selective,basicmath,785,421,495,202,18755547,187997
38,Selective,basicmath,163,50,52,26,18882059,123878
13,No WL,bf,15309,365,4,2050,23344631,1775335
0,Non Selective,bf,15592,4155,628,5140,25597536,4624328
26,Selective,bf,15592,3389,1724,3823,23279182,4416604
14,No WL,bitcnts,2,1,1,0,53959018,277
1,Non Selective,bitcnts,14064,8459,11012,4624,56743620,1847476
27,Selective,bitcnts,2,1,2,0,53958770,511
20,No WL,crc_32,668,109,2,238,34155628,91254


In [8]:
import pandas as pd


def get_writes_on_the_hottest_stack_address_dataframe(dataframe, executable_types):
    extracted_and_sorted_dataframe = dataframe[dataframe['executable_type'].isin({*executable_types})].sort_values(by=['benchmark', 'executable_type'])
    
    benchmarks = extracted_and_sorted_dataframe['benchmark'].unique()
    
    dataframe_data = {
        'benchmark': benchmarks
    }
    
    for executable_type in executable_types:
        dataframe_data[executable_type] = [
            value
            for value in extracted_and_sorted_dataframe[extracted_and_sorted_dataframe['executable_type'] == executable_type]['writes'].values
        ]
    
    return pd.DataFrame(
        dataframe_data
    )


In [9]:
writes_on_the_hottest_stack_address_dataframe = get_writes_on_the_hottest_stack_address_dataframe(filtered_cache_data, ['No WL', 'Non Selective', 'Selective'])

In [10]:
writes_on_the_hottest_stack_address_dataframe

Unnamed: 0,benchmark,No WL,Non Selective,Selective
0,basicmath,301,785,163
1,bf,15309,15592,15592
2,bitcnts,2,14064,2
3,crc_32,668,668,668
4,dijkstra,6120,30169,652
5,fft,46,1095,740
6,patricia,20864,10400,9080
7,pbmsrch,3,6,6
8,qsort,240,645,641
9,rawcaudio,0,21902,22


In [11]:
writes_on_the_hottest_stack_address_dataframe_latex = writes_on_the_hottest_stack_address_dataframe.to_latex(
    caption='Writes on the Hottest Stack Address',
    label='table:writes_on_the_hottest_stack_address_no_wl_non_selective_selective',
    index=False
)

print(writes_on_the_hottest_stack_address_dataframe_latex)

\begin{table}
\centering
\caption{Writes on the Hottest Stack Address}
\label{table:writes_on_the_hottest_stack_address_no_wl_non_selective_selective}
\begin{tabular}{lrrr}
\toprule
benchmark &  No WL &  Non Selective &  Selective \\
\midrule
basicmath &    301 &            785 &        163 \\
       bf &  15309 &          15592 &      15592 \\
  bitcnts &      2 &          14064 &          2 \\
   crc\_32 &    668 &            668 &        668 \\
 dijkstra &   6120 &          30169 &        652 \\
      fft &     46 &           1095 &        740 \\
 patricia &  20864 &          10400 &       9080 \\
  pbmsrch &      3 &              6 &          6 \\
    qsort &    240 &            645 &        641 \\
rawcaudio &      0 &          21902 &         22 \\
rawdaudio &      0 &          21902 &         22 \\
      sha &     39 &            156 &        154 \\
    susan &      7 &            439 &         20 \\
\bottomrule
\end{tabular}
\end{table}



In [12]:
sum(writes_on_the_hottest_stack_address_dataframe['Selective'])

27762

In [13]:
sum(writes_on_the_hottest_stack_address_dataframe['No WL'])

43599

In [14]:
filtered_cache_data['cache_hit_ratio'] = (filtered_cache_data['cache_hits']) / (filtered_cache_data['cache_hits'] + filtered_cache_data['cache_misses'])

In [15]:
filtered_cache_data

Unnamed: 0,executable_type,benchmark,writes,mean,median,std,cache_hits,cache_misses,cache_hit_ratio
25,No WL,basicmath,301,27,2,57,18804069,117496,0.99
12,Non Selective,basicmath,785,421,495,202,18755547,187997,0.99
38,Selective,basicmath,163,50,52,26,18882059,123878,0.99
13,No WL,bf,15309,365,4,2050,23344631,1775335,0.93
0,Non Selective,bf,15592,4155,628,5140,25597536,4624328,0.85
26,Selective,bf,15592,3389,1724,3823,23279182,4416604,0.84
14,No WL,bitcnts,2,1,1,0,53959018,277,1.0
1,Non Selective,bitcnts,14064,8459,11012,4624,56743620,1847476,0.97
27,Selective,bitcnts,2,1,2,0,53958770,511,1.0
20,No WL,crc_32,668,109,2,238,34155628,91254,1.0


In [16]:
def get_cache_hit_ratio_dataframe(dataframe, executable_types):
    extracted_and_sorted_dataframe = dataframe[dataframe['executable_type'].isin({*executable_types})].sort_values(by=['benchmark', 'executable_type'])
    
    benchmarks = extracted_and_sorted_dataframe['benchmark'].unique()
    
    dataframe_data = {
        'benchmark': benchmarks
    }
    
    for executable_type in executable_types:
        dataframe_data[executable_type] = [
            value
            for value in extracted_and_sorted_dataframe[extracted_and_sorted_dataframe['executable_type'] == executable_type]['cache_hit_ratio'].values
        ]
    
    return pd.DataFrame(
        dataframe_data
    )

In [17]:
cache_hit_ratio_dataframe = get_cache_hit_ratio_dataframe(filtered_cache_data, ['No WL', 'Non Selective', 'Selective'])

In [18]:
cache_hit_ratio_dataframe

Unnamed: 0,benchmark,No WL,Non Selective,Selective
0,basicmath,0.99,0.99,0.99
1,bf,0.93,0.85,0.84
2,bitcnts,1.0,0.97,1.0
3,crc_32,1.0,0.99,0.99
4,dijkstra,0.97,0.86,0.96
5,fft,1.0,0.98,0.99
6,patricia,0.95,0.94,0.94
7,pbmsrch,0.99,0.95,0.93
8,qsort,0.96,0.95,0.95
9,rawcaudio,1.0,0.82,1.0


In [19]:
cache_hit_ratio_dataframe['No WL'].mean()

0.9828972857786703

In [20]:
cache_hit_ratio_dataframe['Selective'].mean()

0.968679952006203

In [21]:
cache_hit_ratio_dataframe_latex = cache_hit_ratio_dataframe.to_latex(
    caption='Cache Hit Ratio',
    label='table:cache_hit_ratio_no_wl_non_selective_selective',
    index=False
)

print(cache_hit_ratio_dataframe_latex)

\begin{table}
\centering
\caption{Cache Hit Ratio}
\label{table:cache_hit_ratio_no_wl_non_selective_selective}
\begin{tabular}{lrrr}
\toprule
benchmark &  No WL &  Non Selective &  Selective \\
\midrule
basicmath &   0.99 &           0.99 &       0.99 \\
       bf &   0.93 &           0.85 &       0.84 \\
  bitcnts &   1.00 &           0.97 &       1.00 \\
   crc\_32 &   1.00 &           0.99 &       0.99 \\
 dijkstra &   0.97 &           0.86 &       0.96 \\
      fft &   1.00 &           0.98 &       0.99 \\
 patricia &   0.95 &           0.94 &       0.94 \\
  pbmsrch &   0.99 &           0.95 &       0.93 \\
    qsort &   0.96 &           0.95 &       0.95 \\
rawcaudio &   1.00 &           0.82 &       1.00 \\
rawdaudio &   1.00 &           0.81 &       1.00 \\
      sha &   1.00 &           1.00 &       1.00 \\
    susan &   1.00 &           0.99 &       1.00 \\
\bottomrule
\end{tabular}
\end{table}

