In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from scipy import stats

from analysis.analysis_utils import add_tt_mals_runtime_cols, line_plot_padding_tile_size_tt_mals_runtime_per_matrix, get_percentage_change_per_category, get_percentage_change_per_double_category, line_plot_tile_size_rank_percentage_per_matrix


In [2]:
df = pd.read_csv('../../data/sweep_0_10_data1.csv')
# df2 = pd.read_csv('../../data/sweep_0_10_data2.csv')
# df = pd.concat([df1, df2], axis=0)
df.drop(columns=["run_id", "run_name", "_runtime", "_step", "_timestamp", "gauss_threshold"], inplace=True)
df.shape

(19911, 11)

In [3]:
df.tail()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,max_mode_size,n,z_reduced,rank,z_full,tile_size
19906,True,False,0,ex3,0,607.0,1821.0,52685.0,9.0,52685.0,607.0
19907,True,False,0,ex3,0,1821.0,1821.0,52685.0,1.0,52685.0,1821.0
19908,True,True,0,ex3,0,607.0,1821.0,52685.0,10567.0,52685.0,3.0
19909,True,True,0,ex3,0,607.0,1821.0,52685.0,7.0,52685.0,607.0
19910,True,True,0,ex3,0,1821.0,1821.0,52685.0,1.0,52685.0,1821.0


In [4]:
df = add_tt_mals_runtime_cols(df)

In [5]:
# no module is applied - baseline
baseline_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# different levels of padding applied - padding_df
padding_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0)]

# keep amd on and off - amd_df
amd_df = df[(df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# keep rcm on and off - rcm_df
rcm_df = df[(df['amd'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]

# different levels of partial gauss - partial_gauss_df
partial_gauss_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['padding'] == 0)]

# 2-modules combined: compare effects of both amd and partial_gauss together
amd_partial_gauss_df = df[(df['rcm'] == False) & (df['padding'] == 0)]

In [6]:
df.head()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,max_mode_size,n,z_reduced,rank,z_full,tile_size,log_obj_func,obj_func
0,False,False,10,ex15,10,109.0,6867.0,98623.0,33252.0,98560.0,3.0,30.329488,14856930000000.0
1,False,False,10,ex15,10,109.0,6867.0,98623.0,8766.0,98560.0,7.0,28.587079,2601421000000.0
2,False,False,10,ex15,10,109.0,6867.0,98623.0,6816.0,98560.0,9.0,28.436556,2237893000000.0
3,False,False,10,ex15,10,109.0,6867.0,98623.0,2921.0,98560.0,21.0,28.2089,1782254000000.0
4,False,False,10,ex15,10,109.0,6867.0,98623.0,539.0,98560.0,63.0,28.150559,1681250000000.0


In [29]:
# setup color map for each matrix
matrix_names = df["matrix_name"].unique().tolist()
num_matrices = len(matrix_names)

colorscale = px.colors.sequential.Viridis
colors = px.colors.sample_colorscale(colorscale, num_matrices)

matrix_color_map = {matrix:color for matrix, color in zip(matrix_names, colors)}

Tile size choice effects (baseline)

In [30]:
# Tile size choice influence on TT-MALS runtime
fig = px.line(baseline_df, x="tile_size", y="log_obj_func", color='matrix_name', symbol="matrix_name", markers=True, log_x=True,
              color_discrete_map=matrix_color_map,
              labels={
                     "tile_size": "Tile size",
                     "matrix_name": "Matrix name",
                 }
              )
fig.update_layout(
    title={
        'text': "Influence of tile size choice on TT-MALS runtime",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
    yaxis_title=r'$\log(I^6 + rI^3 + r^2I^2)$'
)
fig.show()
fig.write_image("plots/baseline_tile_size_vs_log_obj_func.pdf")

In [8]:
# trade-off between rank and max mode size - baseline
fig = px.line(baseline_df, x="rank", y="max_mode_size", color="matrix_name", symbol="matrix_name", log_x=True, log_y=True, color_discrete_map=matrix_color_map,
                 labels={
                     "rank": "Rank (r)",
                     "matrix_name": "Matrix name",
                     "max_mode_size": "Maximum mode size (I)",
                 })
fig.update_layout(
    title={
        'text': "Trade-off between rank and maximum mode size",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black') # Font color
)
fig.show()

fig.write_image("plots/baseline_max_mode_size_vs_rank.pdf")

Explore effects of padding

In [9]:
# show different cases: when it helps and when it cannot improve the situation
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "ex3")

In [10]:
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "ex10hs")

In [11]:
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "bcsstk13")

In [12]:
line_plot_padding_tile_size_tt_mals_runtime_per_matrix(padding_df, "Pres_Poisson")

Effects of RCM

In [13]:
rcm_df = get_percentage_change_per_double_category(data_frame=rcm_df, result_column="rank_percentage", variable="rank", baseline_col="rcm", baseline_value=False, category1="matrix_name", category2="tile_size")
rcm_df.sort_values(by=["matrix_name", "tile_size"], inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [14]:
line_plot_tile_size_rank_percentage_per_matrix(rcm_df[rcm_df["rcm"] == True], "rcm", matrix_color_map=matrix_color_map)

Assess AMD effects

In [16]:
amd_df = get_percentage_change_per_double_category(data_frame=amd_df, result_column="rank_percentage", variable="rank", baseline_col="amd", baseline_value=False, category1="matrix_name", category2="tile_size")
amd_df.sort_values(by=["matrix_name", "tile_size"], inplace=True)
line_plot_tile_size_rank_percentage_per_matrix(amd_df[amd_df["amd"] == True], "amd", matrix_color_map=matrix_color_map)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [17]:
# do some preprocessing to be able to show nonzero entry change for different matrices together - do it in terms of percentage

partial_gauss_df = get_percentage_change_per_category(data_frame=partial_gauss_df, result_column="z_percentage", variable="z", baseline_col="partial_gauss", baseline_value=0, category="matrix_name")
partial_gauss_df.sort_values(by="partial_gauss", inplace=True)

amd_partial_gauss_df = get_percentage_change_per_category(data_frame=amd_partial_gauss_df, result_column="z_percentage", variable="z", baseline_col="partial_gauss", baseline_value=0, category="matrix_name")
amd_partial_gauss_df.sort_values(by="partial_gauss", inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [18]:
fig = px.line(partial_gauss_df, x="partial_gauss", y="z_percentage", color="matrix_name", symbol="matrix_name", log_y=True, color_discrete_map=matrix_color_map,
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination on nonzero entry count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/partial_gauss_nonzero_entries.pdf")

In [20]:
fig = px.line(amd_partial_gauss_df, x="partial_gauss", y="z_percentage", color="matrix_name", symbol="matrix_name", log_y=True, color_discrete_map=matrix_color_map,
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination with AMD on nonzero entry count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/amd_partial_gauss_nonzero_entries.pdf")

In [32]:
amd_partial_gauss_df[amd_partial_gauss_df["matrix_name"] == "ex10"]

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,rank,tile_size,max_mode_size,n,z,log_obj_func,obj_func
7860,False,False,0,ex10,10,26658.0,2.0,5.0,2400.0,48752.0,23.600753,1.776957e+10
7861,False,False,0,ex10,10,14450.0,3.0,5.0,2400.0,48752.0,22.376124,5.221884e+09
7862,False,False,0,ex10,10,9346.0,4.0,5.0,2400.0,48752.0,21.504825,2.184877e+09
7863,False,False,0,ex10,10,6732.0,5.0,5.0,2400.0,48752.0,20.848887,1.133853e+09
7864,False,False,0,ex10,10,5142.0,6.0,6.0,2400.0,48752.0,20.675129,9.530032e+08
...,...,...,...,...,...,...,...,...,...,...,...,...
18050,True,False,0,ex10,0,2341.0,10.0,241.0,2410.0,54840.0,32.910572,1.962817e+14
18051,True,False,0,ex10,0,44.0,241.0,241.0,2410.0,54840.0,32.908785,1.959313e+14
18052,True,False,0,ex10,0,17.0,482.0,482.0,2410.0,54840.0,37.067665,1.253956e+16
18053,True,False,0,ex10,0,4.0,1205.0,1205.0,2410.0,54840.0,42.565409,3.061416e+18


In [31]:
fig = px.bar(amd_partial_gauss_df[(amd_partial_gauss_df["matrix_name"] == "ex10") & (amd_partial_gauss_df["rank"] == 1)], x="partial_gauss", y="z", color="amd", log_y=False, barmode='group', 
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination with AMD on nonzero entry count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/amd_partial_gauss_nonzero_entries.pdf")