In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import ttest_ind, ttest_rel, mannwhitneyu, norm, normaltest

from analysis.analysis_utils import add_tt_mals_runtime_cols, line_plot_padding_tile_size_tt_mals_runtime_per_matrix, get_percentage_change_per_category, get_percentage_change_per_double_category, line_plot_tile_size_rank_percentage_per_matrix, normality_check_histogram


In [2]:
df1 = pd.read_csv('../../data/sweep_0_10_data1.csv')
df2 = pd.read_csv('../../data/sweep_0_10_data2.csv')
df = pd.concat([df1, df2], axis=0)
df.drop(columns=["run_id", "run_name", "_runtime", "_step", "_timestamp", "gauss_threshold"], inplace=True)
df.shape

(28604, 11)

In [3]:
df.tail()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,n,rank,z_reduced,z_full,max_mode_size,tile_size
9403,True,True,2,Pres_Poisson,5,14819.0,1625.0,718845.0,623417.0,73.0,73.0
9404,True,True,2,Pres_Poisson,5,14819.0,325.0,718845.0,623417.0,203.0,203.0
9405,True,True,2,Pres_Poisson,5,14819.0,85.0,718845.0,623417.0,511.0,511.0
9406,True,True,2,Pres_Poisson,5,14819.0,19.0,718845.0,623417.0,2117.0,2117.0
9407,True,True,2,Pres_Poisson,5,14819.0,1.0,718845.0,623417.0,14819.0,14819.0


In [4]:
df = add_tt_mals_runtime_cols(df)

In [104]:
# important: we only care about best tile size choice rows that get enabled by these approaches (unless we want to plot the complete picture)
# so drop the rest of the rows for this analysis
# idx = df.groupby(by=['amd', 'rcm', "padding", "matrix_name", "partial_gauss"])['log_obj_func'].idxmin()
# df = df.loc[idx].reset_index(drop=True)

In [5]:
df

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,n,rank,z_reduced,z_full,max_mode_size,tile_size,log_obj_func,obj_func
0,False,True,8,ex10hs,5,2551.0,1.0,57237.0,42957.0,2551.0,2551.0,47.065444,2.755896e+20
1,False,True,8,ex10hs,8,2548.0,16886.0,57244.0,43009.0,13.0,2.0,24.599249,4.823008e+10
2,False,True,8,ex10hs,8,2548.0,6453.0,57244.0,43009.0,13.0,4.0,22.677196,7.056368e+09
3,False,True,8,ex10hs,8,2548.0,4009.0,57244.0,43009.0,13.0,7.0,21.727500,2.729816e+09
4,False,True,8,ex10hs,8,2548.0,1752.0,57244.0,43009.0,13.0,13.0,20.083512,5.274221e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9403,True,True,2,Pres_Poisson,5,14819.0,1625.0,718845.0,623417.0,73.0,73.0,25.835484,1.660383e+11
9404,True,True,2,Pres_Poisson,5,14819.0,325.0,718845.0,623417.0,203.0,203.0,31.879337,6.998744e+13
9405,True,True,2,Pres_Poisson,5,14819.0,85.0,718845.0,623417.0,511.0,511.0,37.418218,1.780433e+16
9406,True,True,2,Pres_Poisson,5,14819.0,19.0,718845.0,623417.0,2117.0,2117.0,45.946532,9.001713e+19


In [6]:
# no module is applied - baseline
baseline_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# different levels of padding applied - padding_df
padding_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0)]

# keep amd on and off - amd_df
amd_df = df[(df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# keep rcm on and off - rcm_df
rcm_df = df[(df['amd'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# combined approaches
pg_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['padding'] == 0)]

rcm_padding_df = df[(df['amd'] == False) & (df['partial_gauss'] == 0)]

pg_padding_df = df[(df['amd'] == False) & (df['rcm'] == False)]

amd_pg_df = df[(df['rcm'] == False) & (df['padding'] == 0)]

pg_rcm_df = df[(df['amd'] == False) & (df['padding'] == 0)]

# three-way approaches
amd_pg_padding_df = df[df['rcm'] == False]

amd_pg_rcm_df = df[df['padding'] == 0]

pg_padding_rcm_df = df[df['amd'] == False]

# 4-way
all_combined_df = df

df_list = [baseline_df, padding_df, amd_df, rcm_df, pg_df, rcm_padding_df, pg_padding_df, amd_pg_df, pg_rcm_df, amd_pg_padding_df, amd_pg_rcm_df, pg_padding_rcm_df, all_combined_df]
df_str_list = ["baseline_df", "padding_df", "amd_df", "rcm_df", "pg_df", "rcm_padding_df", "pg_padding_df", "amd_pg_df", "pg_rcm_df", "amd_pg_padding_df", "amd_pg_rcm_df", "pg_padding_rcm_df", "all_combined_df"]


In [7]:
df.tail()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,n,rank,z_reduced,z_full,max_mode_size,tile_size,log_obj_func,obj_func
9403,True,True,2,Pres_Poisson,5,14819.0,1625.0,718845.0,623417.0,73.0,73.0,25.835484,166038300000.0
9404,True,True,2,Pres_Poisson,5,14819.0,325.0,718845.0,623417.0,203.0,203.0,31.879337,69987440000000.0
9405,True,True,2,Pres_Poisson,5,14819.0,85.0,718845.0,623417.0,511.0,511.0,37.418218,1.780433e+16
9406,True,True,2,Pres_Poisson,5,14819.0,19.0,718845.0,623417.0,2117.0,2117.0,45.946532,9.001713e+19
9407,True,True,2,Pres_Poisson,5,14819.0,1.0,718845.0,623417.0,14819.0,14819.0,57.621993,1.059042e+25


In [57]:
# setup color map for each matrix
matrix_names = df["matrix_name"].unique().tolist()
num_matrices = len(matrix_names)

colorscale = px.colors.qualitative.Plotly
colors = [colorscale[i % len(colorscale)] for i in range(num_matrices)]

matrix_color_map = {matrix:color for matrix, color in zip(matrix_names, colors)}

In [9]:
# normality tests - tests the null hypothesis that a sample comes from a normal distribution
# small p-value == this may be taken as evidence against the null hypothesis in favor of the alternative: the weights were not drawn from a normal distribution. 
# Note that:
# The inverse is not true; that is, the test is not used to provide evidence for the null hypothesis.

for df, df_str in zip(df_list, df_str_list):
    res = normaltest(df["obj_func"])
    print("{} normality, p-value: {}".format(df_str, res.pvalue))


baseline_df normality, p-value: 3.7723043882066966e-28
padding_df normality, p-value: 6.902223603464561e-180
amd_df normality, p-value: 1.2930279380468725e-43
rcm_df normality, p-value: 1.2930277107838582e-43
pg_df normality, p-value: 1.0134892235234248e-195
rcm_padding_df normality, p-value: 0.0
pg_padding_df normality, p-value: 0.0
amd_pg_df normality, p-value: 0.0
pg_rcm_df normality, p-value: 0.0
amd_pg_padding_df normality, p-value: 0.0
amd_pg_rcm_df normality, p-value: 0.0
pg_padding_rcm_df normality, p-value: 0.0
all_combined_df normality, p-value: 0.0


In [10]:
for df, df_str in zip(df_list, df_str_list):
    res = normaltest(df["log_obj_func"])
    print("{} normality, p-value: {}".format(df_str, res.pvalue))

baseline_df normality, p-value: 0.16613301510622253
padding_df normality, p-value: 2.377986567580468e-10
amd_df normality, p-value: 0.03339991741992095
rcm_df normality, p-value: 0.03423745058813351
pg_df normality, p-value: 1.3674048313261408e-12
rcm_padding_df normality, p-value: 2.621572735581987e-20
pg_padding_df normality, p-value: 3.433831947995044e-116
amd_pg_df normality, p-value: 7.660226118447551e-25
pg_rcm_df normality, p-value: 4.675559412811609e-25
amd_pg_padding_df normality, p-value: 4.8974808395512204e-234
amd_pg_rcm_df normality, p-value: 1.0044891657676653e-49
pg_padding_rcm_df normality, p-value: 4.519210140649153e-234
all_combined_df normality, p-value: 0.0


Conclusion: except possibly the baseline, AMD and RCM, the (log) runtime estimates do not follow a normal distribution. Therefore, I need to work with non-parametric tests when comparing the approaches.

In [11]:
# Next: check if different approaches give better results than baseline
# important: we only care about best tile size choice rows that get enabled by these approaches

# null-hyp: no difference
_, p_value = mannwhitneyu(rcm_df[rcm_df["rcm"] == False]["log_obj_func"], rcm_df[rcm_df["rcm"] == True]["log_obj_func"], alternative='two-sided')
print(p_value)

0.9254958873875053


In [12]:
rcm_df[rcm_df["rcm"] == False]["log_obj_func"].describe()

count    57.000000
mean     34.362466
std       9.320864
min      20.271063
25%      28.135807
50%      32.908784
75%      40.513622
max      57.623207
Name: log_obj_func, dtype: float64

In [13]:
rcm_df[rcm_df["rcm"] == True]["log_obj_func"].describe()

count    57.000000
mean     34.296210
std       9.395152
min      19.922774
25%      28.125540
50%      32.908784
75%      40.513622
max      57.623207
Name: log_obj_func, dtype: float64

In [14]:
# get best tile size choices
idx = rcm_padding_df.groupby(by=['matrix_name', 'rcm'])['log_obj_func'].idxmin()
rcm_padding_tile_agg_df = rcm_padding_df.loc[idx].reset_index(drop=True)
rcm_padding_tile_agg_df

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,n,rank,z_reduced,z_full,max_mode_size,tile_size,log_obj_func,obj_func
0,False,False,1,Pres_Poisson,0,14823.0,2599.0,715804.0,715804.0,61.0,61.0,25.070247,77244910000.0
1,False,True,1,Pres_Poisson,0,14823.0,2105.0,715804.0,715804.0,61.0,61.0,24.949895,68486010000.0
2,False,False,6,bcsstk13,0,2009.0,569.0,83883.0,83883.0,41.0,41.0,22.397285,5333563000.0
3,False,True,6,bcsstk13,0,2009.0,781.0,83883.0,83883.0,41.0,41.0,22.486159,5829276000.0
4,False,False,10,ex10,0,2420.0,1031.0,54840.0,54840.0,20.0,20.0,20.02497,497432400.0
5,False,True,10,ex10,0,2420.0,907.0,54840.0,54840.0,20.0,20.0,19.807764,400315600.0
6,False,False,2,ex10hs,0,2550.0,1410.0,57308.0,57308.0,17.0,17.0,20.221773,605625800.0
7,False,True,2,ex10hs,0,2550.0,1202.0,57308.0,57308.0,17.0,17.0,19.919391,447591400.0
8,False,False,6,ex13,0,2574.0,1209.0,75628.0,75628.0,18.0,18.0,20.058993,514647800.0
9,False,True,6,ex13,0,2574.0,1235.0,75628.0,75628.0,18.0,18.0,20.098502,535387600.0


Explore effects of rcm followed by padding

do hypothesis testing: 
- null hypothesis: applying RCM on top of padding does not influence the runtime estimate
- significance level: 0.01
- need to check assumptions: observations independent, from sample to sample, samples were obtained via random sampling, data should be approximately normally distributed, variances should be approximately equal (if not, just use Welch’s t-test).

In [38]:
# considering only the best tile size choices
rcm_on_group = rcm_padding_tile_agg_df[rcm_padding_tile_agg_df['rcm'] == True]
rcm_off_group = rcm_padding_tile_agg_df[rcm_padding_tile_agg_df['rcm'] == False]

print("mean, std for log_obj_func without rcm: ", rcm_on_group["log_obj_func"].mean(), rcm_on_group["log_obj_func"].std())
print("mean, std for log_obj_func with rcm: ", rcm_off_group["log_obj_func"].mean(), rcm_off_group["log_obj_func"].std())

mean, std for log_obj_func without rcm:  21.090700359472148 1.9757366476920277
mean, std for log_obj_func with rcm:  22.15626094631789 3.0394536086430586


In [42]:
# considering all tile size choices
rcm_on_group = rcm_padding_df[rcm_padding_df['rcm'] == True]
rcm_off_group = rcm_padding_df[rcm_padding_df['rcm'] == False]

print("mean, std for log_obj_func without rcm: ", rcm_on_group["log_obj_func"].mean(), rcm_on_group["log_obj_func"].std())
print("mean, std for log_obj_func with rcm: ", rcm_off_group["log_obj_func"].mean(), rcm_off_group["log_obj_func"].std())

mean, std for log_obj_func without rcm:  34.1233067451281 9.374179941538983
mean, std for log_obj_func with rcm:  34.178634029709 9.327544387403918


In [43]:
rcm_on_group

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,n,rank,z_reduced,z_full,max_mode_size,tile_size,log_obj_func,obj_func
123,False,True,8,ex10,0,2418.0,15759.0,54840.0,54840.0,31.0,2.0,26.203978,2.400176e+11
124,False,True,8,ex10,0,2418.0,12630.0,54840.0,54840.0,31.0,3.0,25.763845,1.545595e+11
125,False,True,8,ex10,0,2418.0,4590.0,54840.0,54840.0,31.0,6.0,23.780596,2.127069e+10
126,False,True,8,ex10,0,2418.0,1722.0,54840.0,54840.0,31.0,13.0,22.055221,3.788442e+09
127,False,True,8,ex10,0,2418.0,599.0,54840.0,54840.0,31.0,26.0,20.946534,1.250156e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5368,False,True,3,Pres_Poisson,0,14825.0,66067.0,715804.0,715804.0,593.0,5.0,38.346162,4.503254e+16
5369,False,True,3,Pres_Poisson,0,14825.0,8627.0,715804.0,715804.0,593.0,25.0,38.311809,4.351184e+16
5370,False,True,3,Pres_Poisson,0,14825.0,73.0,715804.0,715804.0,593.0,593.0,38.311167,4.348388e+16
5371,False,True,3,Pres_Poisson,0,14825.0,13.0,715804.0,715804.0,2965.0,2965.0,47.967794,6.794354e+20


In [44]:
rcm_padding_diff_df = pd.merge(rcm_on_group, rcm_off_group, on=['amd', 'padding', 'matrix_name', 'partial_gauss', 'tile_size'], suffixes=('_on', '_off'))
rcm_padding_diff_df["log_obj_func_diff"] = rcm_padding_diff_df["log_obj_func_on"] - rcm_padding_diff_df["log_obj_func_off"]
rcm_padding_diff_df["log_obj_func_diff"].describe()

count    615.000000
mean      -0.055327
std        0.188870
min       -1.147567
25%       -0.006678
50%        0.000000
75%        0.000000
max        0.534270
Name: log_obj_func_diff, dtype: float64

In [63]:
fig = px.histogram(rcm_padding_diff_df, x="log_obj_func_diff", nbins=100, log_y=True, 
                   color="matrix_name",
                #  histnorm="probability",  # won't add up to one overall when multiple groups are used
                  color_discrete_map=matrix_color_map,
                  labels={
                      "log_obj_func_diff": r'$\Delta \log(I^6 + rI^3 + r^2I^2)$',
                      "matrix_name": "Matrix name",
                      "tile_size": "Tile size",
                  })
fig.update_layout(
        title={
            'text': "Change in runtime estimates after adding RCM to Padding",
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        plot_bgcolor='white',  # Plot area background color
        paper_bgcolor='white',  # Entire figure background color
        font=dict(color='black'),  # Font color
    )
fig.show()
fig.write_image("plots/rcm_padding_log_obj_func_diff_plot.pdf")

In [None]:
# this initial analysis indicates that RCM on top of padding generally helps a bit, sometimes it makes things worse. Most often, it does not lead to significant changes. For more understanding, would need to look at separate cases in more detail, skip for now.
# Now I just have to make this visually nice and write it down in the report.

In [None]:
# todo: perhaps I can do the above difference plot as follows:
# - consider one improvement (e.g. rcm)
# - against a set of different baselines - use color for these, see which of the baselines improve the most with added stuff
# - this also allows us to evaluate combined baselines easier