In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from scipy import stats

from analysis.analysis_utils import add_tt_mals_runtime_cols, line_plot_padding_tile_size_tt_mals_runtime_per_matrix, get_percentage_change_per_category, get_percentage_change_per_double_category, line_plot_tile_size_rank_percentage_per_matrix


In [46]:
df = pd.read_csv('../../data/sweep_0_10_data1.csv')
# df2 = pd.read_csv('../../data/sweep_0_10_data2.csv')
# df = pd.concat([df1, df2], axis=0)
df.drop(columns=["run_id", "run_name", "_runtime", "_step", "_timestamp", "gauss_threshold"], inplace=True)
df.shape

(19184, 11)

In [47]:
df.tail()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,z_reduced,rank,tile_size,max_mode_size,n,z_full
19179,True,False,6,ex3,7,52620.0,4.0,910.0,910.0,1820.0,43947.0
19180,True,False,6,ex3,7,52620.0,1.0,1820.0,1820.0,1820.0,43947.0
19181,True,False,0,ex3,7,52620.0,18451.0,2.0,907.0,1814.0,43947.0
19182,True,False,0,ex3,7,52620.0,4.0,907.0,907.0,1814.0,43947.0
19183,True,False,0,ex3,7,52620.0,1.0,1814.0,1814.0,1814.0,43947.0


In [48]:
df = add_tt_mals_runtime_cols(df)

In [49]:
# no module is applied - baseline
baseline_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# different levels of padding applied - padding_df
padding_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0)]

# keep amd on and off - amd_df
amd_df = df[(df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# keep rcm on and off - rcm_df
rcm_df = df[(df['amd'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# different levels of partial gauss - partial_gauss_df
partial_gauss_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['padding'] == 0)]

# 2-modules combined: compare effects of both amd and partial_gauss together
amd_partial_gauss_df = df[(df['rcm'] == False) & (df['padding'] == 0)]

In [50]:
df.head()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,z_reduced,rank,tile_size,max_mode_size,n,z_full,log_obj_func,obj_func
0,False,False,9,ex10hs,2,57280.0,7782.0,5.0,73.0,2555.0,42944.0,26.890957,477083300000.0
1,False,False,9,ex10hs,2,57280.0,4846.0,7.0,73.0,2555.0,42944.0,26.352196,278364100000.0
2,False,False,9,ex10hs,2,57280.0,529.0,35.0,73.0,2555.0,42944.0,25.753908,153031300000.0
3,False,False,9,ex10hs,2,57280.0,155.0,73.0,73.0,2555.0,42944.0,25.744,151522600000.0
4,False,False,9,ex10hs,2,57280.0,19.0,365.0,365.0,2555.0,42944.0,35.399385,2364598000000000.0


In [51]:
# setup color map for each matrix
matrix_names = df["matrix_name"].unique().tolist()
num_matrices = len(matrix_names)

colorscale = px.colors.sequential.Viridis
colors = px.colors.sample_colorscale(colorscale, num_matrices)

matrix_color_map = {matrix:color for matrix, color in zip(matrix_names, colors)}

Tile size choice effects (baseline)

In [52]:
# Tile size choice influence on TT-MALS runtime
fig = px.line(baseline_df, x="tile_size", y="log_obj_func", color='matrix_name', symbol="matrix_name", markers=True, log_x=True,
              color_discrete_map=matrix_color_map,
              labels={
                     "tile_size": "Tile size",
                     "matrix_name": "Matrix name",
                 }
              )
fig.update_layout(
    title={
        'text': "Influence of tile size choice on TT-MALS runtime",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
    yaxis_title=r'$\log(I^6 + rI^3 + r^2I^2)$'
)
fig.show()
fig.write_image("plots/baseline_tile_size_vs_log_obj_func.pdf")

In [53]:
# trade-off between rank and max mode size - baseline
fig = px.line(baseline_df, x="rank", y="max_mode_size", color="matrix_name", symbol="matrix_name", log_x=True, log_y=True, color_discrete_map=matrix_color_map,
                 labels={
                     "rank": "Rank (r)",
                     "matrix_name": "Matrix name",
                     "max_mode_size": "Maximum mode size (I)",
                 })
fig.update_layout(
    title={
        'text': "Trade-off between rank and maximum mode size",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black') # Font color
)
fig.show()

fig.write_image("plots/baseline_max_mode_size_vs_rank.pdf")

Explore effects of padding

In [54]:
# show different cases: when it helps and when it cannot improve the situation
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "ex3")

In [55]:
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "ex10hs")

In [56]:
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "bcsstk13")

In [57]:
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "Pres_Poisson")

Effects of RCM

In [58]:
rcm_df = get_percentage_change_per_double_category(data_frame=rcm_df, result_column="rank_percentage", variable="rank", baseline_col="rcm", baseline_value=False, category1="matrix_name", category2="tile_size")
rcm_df.sort_values(by=["matrix_name", "tile_size"], inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [62]:
line_plot_tile_size_rank_percentage_per_matrix(rcm_df[rcm_df["rcm"] == True], "rcm", matrix_color_map=matrix_color_map)

Assess AMD effects

In [63]:
amd_df = get_percentage_change_per_double_category(data_frame=amd_df, result_column="rank_percentage", variable="rank", baseline_col="amd", baseline_value=False, category1="matrix_name", category2="tile_size")
amd_df.sort_values(by=["matrix_name", "tile_size"], inplace=True)
line_plot_tile_size_rank_percentage_per_matrix(amd_df[amd_df["amd"] == True], "amd", matrix_color_map=matrix_color_map)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [64]:
# do some preprocessing to be able to show nonzero entry change for different matrices together - do it in terms of percentage

partial_gauss_df = get_percentage_change_per_category(data_frame=partial_gauss_df, result_column="z_percentage", variable="z_reduced", baseline_col="partial_gauss", baseline_value=0, category="matrix_name")
partial_gauss_df.sort_values(by="partial_gauss", inplace=True)

amd_partial_gauss_df = get_percentage_change_per_category(data_frame=amd_partial_gauss_df, result_column="z_percentage", variable="z_reduced", baseline_col="partial_gauss", baseline_value=0, category="matrix_name")
amd_partial_gauss_df.sort_values(by="partial_gauss", inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [65]:
fig = px.line(partial_gauss_df, x="partial_gauss", y="z_percentage", color="matrix_name", symbol="matrix_name", log_y=True, color_discrete_map=matrix_color_map,
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination on nonzero entry count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/partial_gauss_nonzero_entries.pdf")

In [66]:
amd_partial_gauss_df

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,z_reduced,rank,tile_size,max_mode_size,n,z_full,log_obj_func,obj_func,z_percentage
4940,False,False,0,ex10hs,0,57308.0,265.0,52.0,52.0,2548.0,57308.0,23.718886,1.999776e+10,1.000000
4937,False,False,0,ex10hs,0,57308.0,780.0,26.0,26.0,2548.0,57308.0,20.413888,7.339035e+08,1.000000
4936,False,False,0,ex10hs,0,57308.0,1784.0,14.0,14.0,2548.0,57308.0,20.271063,6.362254e+08,1.000000
4935,False,False,0,ex10hs,0,57308.0,1974.0,13.0,13.0,2548.0,57308.0,20.319352,6.677019e+08,1.000000
4934,False,False,0,ex10hs,0,57308.0,4830.0,7.0,13.0,2548.0,57308.0,22.099010,3.958022e+09,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15644,True,False,0,ex10hs,10,57258.0,740.0,27.0,47.0,2538.0,42861.0,23.213632,1.206569e+10,0.999128
3713,False,False,0,bcsstk13,10,84231.0,1.0,1993.0,1993.0,1993.0,70732.0,45.584378,6.266771e+19,1.004149
15643,True,False,0,ex10hs,10,57258.0,1215.0,18.0,47.0,2538.0,42861.0,23.374135,1.416634e+10,0.999128
15651,True,False,0,ex10hs,10,57258.0,7.0,846.0,846.0,2538.0,42861.0,40.443116,3.666251e+17,0.999128


In [67]:
fig = px.line(amd_partial_gauss_df, x="partial_gauss", y="z_percentage", color="matrix_name", symbol="amd", log_y=True, color_discrete_map=matrix_color_map,
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination with AMD on nonzero entry count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/amd_partial_gauss_nonzero_entries.pdf")

In [68]:
amd_partial_gauss_df[amd_partial_gauss_df["matrix_name"] == "ex10"]

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,z_reduced,rank,tile_size,max_mode_size,n,z_full,log_obj_func,obj_func,z_percentage
566,False,False,0,ex10,0,54840.0,2553.0,10.0,241.0,2410.0,54840.0,32.910894,1.963449e+14,1.000000
565,False,False,0,ex10,0,54840.0,6754.0,5.0,241.0,2410.0,54840.0,32.922689,1.986746e+14,1.000000
564,False,False,0,ex10,0,54840.0,27097.0,2.0,241.0,2410.0,54840.0,33.107299,2.389557e+14,1.000000
568,False,False,0,ex10,0,54840.0,13.0,482.0,482.0,2410.0,54840.0,37.067665,1.253956e+16,1.000000
567,False,False,0,ex10,0,54840.0,28.0,241.0,241.0,2410.0,54840.0,32.908784,1.959310e+14,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2863,False,False,0,ex10,10,54734.0,22.0,300.0,300.0,2400.0,43552.0,34.222696,7.290006e+14,0.998067
2862,False,False,0,ex10,10,54734.0,28.0,240.0,240.0,2400.0,43552.0,32.883836,1.911034e+14,0.998067
2865,False,False,0,ex10,10,54734.0,13.0,480.0,480.0,2400.0,43552.0,37.042717,1.223059e+16,0.998067
2866,False,False,0,ex10,10,54734.0,10.0,600.0,600.0,2400.0,43552.0,38.381578,4.665600e+16,0.998067


In [70]:
fig = px.bar(amd_partial_gauss_df[(amd_partial_gauss_df["matrix_name"] == "ex13") & (amd_partial_gauss_df["rank"] == 1)], x="partial_gauss", y="z_reduced", color="amd", log_y=True, barmode='group', 
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination with AMD on nonzero entry count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/amd_partial_gauss_nonzero_entries.pdf")