In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from scipy import stats


In [76]:
# df1 = pd.read_csv('../../data/sweep_0_10_data.csv')
# df2 = pd.read_csv('../../data/sweep_0_10_data2.csv')
# df = pd.concat([df1, df2], axis=0)
df = pd.read_csv('../../data/sweep_0_10_7_matrices.csv')
df.drop(columns=["run_id", "run_name", "_runtime", "_step", "_timestamp"], inplace=True)

In [77]:
df.tail()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,n,rank,max_mode_size,z,tile_size
8673,True,False,0,Pres_Poisson,0,14822.0,4.0,7411.0,715804.0,7411.0
8674,True,False,0,Pres_Poisson,0,14822.0,1.0,14822.0,715804.0,14822.0
8675,True,True,0,Pres_Poisson,0,14822.0,567235.0,7411.0,715804.0,2.0
8676,True,True,0,Pres_Poisson,0,14822.0,4.0,7411.0,715804.0,7411.0
8677,True,True,0,Pres_Poisson,0,14822.0,1.0,14822.0,715804.0,14822.0


In [90]:
def add_tt_mals_runtime_cols(df: pd.DataFrame) -> pd.DataFrame:
    df["log_obj_func"] = np.log(df["max_mode_size"]**6 + df["max_mode_size"]**3 *  df["rank"] + df["max_mode_size"]**2 *  df["rank"]**2)
    df["obj_func"] = np.exp(df["log_obj_func"])
    return df
df = add_tt_mals_runtime_cols(df)

In [79]:
baseline_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]
padding_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['partial_gauss'] == 0)]
amd_df = df[(df['rcm'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]
rcm_df = df[(df['amd'] == False) & (df['partial_gauss'] == 0) & (df['padding'] == 0)]
partial_gauss_df = df[(df['amd'] == False) & (df['rcm'] == False) & (df['padding'] == 0)]

In [80]:
df.head()

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,n,rank,max_mode_size,z,tile_size,log_obj_func,obj_func
0,False,False,10,ex15,10,6867.0,33252.0,109.0,98623.0,3.0,30.329488,14856930000000.0
1,False,False,10,ex15,10,6867.0,8766.0,109.0,98623.0,7.0,28.587079,2601421000000.0
2,False,False,10,ex15,10,6867.0,6816.0,109.0,98623.0,9.0,28.436556,2237893000000.0
3,False,False,10,ex15,10,6867.0,2921.0,109.0,98623.0,21.0,28.2089,1782254000000.0
4,False,False,10,ex15,10,6867.0,539.0,109.0,98623.0,63.0,28.150559,1681250000000.0


In [33]:
# Tile size choice influence on TT-MALS runtime
fig = px.line(baseline_df, x="tile_size", y="log_obj_func", color='matrix_name', symbol="matrix_name", markers=True, log_x=True,
              labels={
                     "tile_size": "Tile size",
                     "matrix_name": "Matrix name",
                 }
              )
fig.update_layout(
    title={
        'text': "Influence of tile size choice on TT-MALS runtime",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
    yaxis_title=r'$\log(I^6 + rI^3 + r^2I^2)$'
)
fig.show()
fig.write_image("plots/baseline_tile_size_vs_log_obj_func.pdf")

In [34]:
# trade-off between rank and max mode size - baseline
fig = px.line(baseline_df, x="rank", y="max_mode_size", color="matrix_name", symbol="matrix_name", log_x=True, log_y=True, 
                 labels={
                     "rank": "Rank (r)",
                     "matrix_name": "Matrix name",
                     "max_mode_size": "Maximum mode size (I)",
                 })
fig.update_layout(
    title={
        'text': "Trade-off between rank and maximum mode size",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black') # Font color
)
fig.show()

fig.write_image("plots/baseline_max_mode_size_vs_rank.pdf")

In [35]:
# explore effects of padding: 
def line_plot_padding_tile_size_tt_mals_runtime(matrix_str: str):
    default_colorscale = px.colors.sequential.Jet
    colors = px.colors.sample_colorscale(default_colorscale, 11)
    fig = px.line(padding_df[padding_df["matrix_name"] == matrix_str], x="tile_size", y="log_obj_func", color="padding", symbol="padding", log_x=True, color_discrete_sequence=colors,
                     labels={
                         "tile_size": "Tile size",
                     })
    fig.update_layout(
        title={
            'text': "Influence of tile size choice and padding on TT-MALS runtime ({})".format(matrix_str),
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        plot_bgcolor='white',   # Plot area background color
        paper_bgcolor='white',  # Entire figure background color
        font=dict(color='black'), # Font color
        yaxis_title=r'$\log(I^6 + rI^3 + r^2I^2)$'
    )
    fig.show()
    
    fig.write_image("plots/{}_padding_tile_size_vs_log_obj_func.pdf".format(matrix_str))

In [36]:
# show different cases: when it helps and when it cannot improve the situation
line_plot_padding_tile_size_tt_mals_runtime("ex3")

In [37]:
line_plot_padding_tile_size_tt_mals_runtime("ex10hs")

In [38]:
line_plot_padding_tile_size_tt_mals_runtime("bcsstk13")

In [39]:
line_plot_padding_tile_size_tt_mals_runtime("Pres_Poisson")

In [40]:
fig = px.line(rcm_df[rcm_df["matrix_name"] == "bcsstk13"], x="tile_size", y="rank", color="rcm", symbol="matrix_name", log_x=True, 
                 labels={
                     "rank": "Maximum Rank (r)",
                     "matrix_name": "Matrix Name",
                     "max_mode_size": "Maximum Mode Size (I)",
                 })
fig.update_layout(
    title={
        'text': "Influence of tile size choice and RCM on TT-MALS runtime",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
    yaxis_title=r'$\log(I^6 + rI^3 + r^2I^2)$'
)
fig.show()

fig.write_image("plots/rcm_max_mode_size_vs_max_rank.pdf")

In [56]:
# let's look at partial-Gauss before more in depth analysis of variable reordering

df3

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,tile_size,rank,max_mode_size,z,_timestamp,_runtime,n,_step,run_id,run_name,z_percentage
0,False,False,0,ex15,10,6857.0,1.0,6857.0,98617.0,1.718615e+09,3.859293,6857.0,0.0,pe540qd1,vibrant-sweep-110,0.999453
1,False,False,0,ex15,9,2.0,47685.0,127.0,98600.0,1.718615e+09,6.027897,6858.0,0.0,lcn00hzp,sandy-sweep-109,0.999280
2,False,False,0,ex15,9,3.0,19810.0,127.0,98600.0,1.718615e+09,7.216445,6858.0,1.0,lcn00hzp,sandy-sweep-109,0.999280
3,False,False,0,ex15,9,6.0,7851.0,127.0,98600.0,1.718615e+09,7.861686,6858.0,2.0,lcn00hzp,sandy-sweep-109,0.999280
4,False,False,0,ex15,9,9.0,6168.0,127.0,98600.0,1.718615e+09,8.345176,6858.0,3.0,lcn00hzp,sandy-sweep-109,0.999280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
935,True,False,0,ex3,3,202.0,59.0,202.0,53766.0,1.718615e+09,2.864133,1818.0,6.0,ojl1h137,deep-sweep-4,1.020518
936,True,False,0,ex3,3,303.0,28.0,303.0,53766.0,1.718615e+09,2.878522,1818.0,7.0,ojl1h137,deep-sweep-4,1.020518
937,True,False,0,ex3,3,606.0,9.0,606.0,53766.0,1.718615e+09,2.891512,1818.0,8.0,ojl1h137,deep-sweep-4,1.020518
938,True,False,0,ex3,3,909.0,4.0,909.0,53766.0,1.718615e+09,2.907719,1818.0,9.0,ojl1h137,deep-sweep-4,1.020518


In [81]:
# do some preprocessing to be able to show nonzero entry change for different matrices together - do it in terms of percentage

def get_percentage_change_per_matrix(data_frame: pd.DataFrame, result_column: str, variable: str, baseline_col: str):
    """
    
    :param data_frame: complete df
    :param result_column: column name to store results
    :param variable: name of column to check for change
    :param baseline_col: column name which determines baseline (row should be zero in this column per matrix to give baseline)
    :return: df
    """
    # Initialize the z_percentage column with NaN
    data_frame[result_column] = np.nan
    
    # Calculate z_percentage for each matrix_name
    for matrix_name in data_frame["matrix_name"].unique():
        partial_gauss_df_per_matrix = data_frame[data_frame["matrix_name"] == matrix_name]
        original_z = partial_gauss_df_per_matrix[partial_gauss_df_per_matrix[baseline_col] == 0][variable].unique()
        
        # Ensure there is exactly one original_z value
        if len(original_z) == 1:
            original_z_value = original_z[0]
            data_frame.loc[data_frame["matrix_name"] == matrix_name, result_column] = (
                partial_gauss_df_per_matrix[variable] / original_z_value
            ).values
        else:
            # Handle case where there are no original_z values
            data_frame.loc[data_frame["matrix_name"] == matrix_name, result_column] = np.nan
    return data_frame


partial_gauss_df = get_percentage_change_per_matrix(data_frame=df3[df3["amd"] == False], result_column="z_percentage", variable="z", baseline_col="partial_gauss")
partial_gauss_df.sort_values(by="partial_gauss", inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [82]:
fig = px.line(partial_gauss_df, x="partial_gauss", y="z_percentage", color="matrix_name", symbol="matrix_name", log_y=True,
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination on nonzero entry count",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/partial_gauss_nonzero_entries.pdf")

In [86]:
# check effect with AMD with rounding
amd_partial_gauss_df = df3[(df3['amd'] == True) & (df3['rcm'] == False) & (df3['padding'] == 0)]
amd_partial_gauss_df = get_percentage_change_per_matrix(data_frame=amd_partial_gauss_df, result_column="z_percentage", variable="z", baseline_col="partial_gauss")
amd_partial_gauss_df.sort_values(by="partial_gauss", inplace=True)
fig = px.line(amd_partial_gauss_df, x="partial_gauss", y="z_percentage", color="matrix_name", symbol="matrix_name", log_y=True,
                 labels={
                     "partial_gauss": "Number of eliminated variables",
                     "z_percentage": "Change in nonzero entries",
                     "matrix_name": "Matrix name",
                 })
fig.update_layout(
    title={
        'text': "Effect of variable elimination on nonzero entry count (with AMD)",
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='white',   # Plot area background color
    paper_bgcolor='white',  # Entire figure background color
    font=dict(color='black'), # Font color
)
fig.show()

fig.write_image("plots/partial_gauss_nonzero_entries.pdf")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [91]:
# explore effects of padding: 
def line_plot_partial_gauss_tile_size_tt_mals_runtime(matrix_str: str, pg_df: pd.DataFrame):
    default_colorscale = px.colors.sequential.Jet
    colors = px.colors.sample_colorscale(default_colorscale, 11)
    fig = px.line(pg_df[pg_df["matrix_name"] == matrix_str], x="tile_size", y="log_obj_func", color="partial_gauss", symbol="partial_gauss", log_x=True, color_discrete_sequence=colors,
                     labels={
                         "tile_size": "Tile size",
                     })
    fig.update_layout(
        title={
            'text': "Influence of tile size choice and partial Gauss on TT-MALS runtime ({})".format(matrix_str),
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        plot_bgcolor='white',   # Plot area background color
        paper_bgcolor='white',  # Entire figure background color
        font=dict(color='black'), # Font color
        yaxis_title=r'$\log(I^6 + rI^3 + r^2I^2)$'
    )
    fig.show()
    
    fig.write_image("plots/{}_partial_gauss_tile_size_vs_log_obj_func.pdf".format(matrix_str))

In [100]:
df3.sort_values(by=["partial_gauss", "tile_size"], inplace=True)
df3 = df3[df3["amd"] == False]
line_plot_partial_gauss_tile_size_tt_mals_runtime("ex13", df3)

In [94]:
df3

Unnamed: 0,amd,rcm,padding,matrix_name,partial_gauss,_step,n,rank,_runtime,max_mode_size,z,tile_size,_timestamp,run_id,run_name,log_obj_func,obj_func
410,False,False,0,ex10hs,8,0.0,2540.0,29634.0,2.056748,127.0,50996.0,2.0,1.718616e+09,n09cw7py,rare-sweep-108,30.544493,1.842064e+13
755,True,False,0,Pres_Poisson,10,0.0,14812.0,367168.0,47.960515,23.0,724758.0,2.0,1.718616e+09,f7w7zlui,fine-sweep-66,31.898203,7.132034e+13
1033,True,False,0,ex13,6,0.0,2562.0,28379.0,2.371121,61.0,76008.0,2.0,1.718616e+09,ysu76sl5,floral-sweep-40,28.747714,3.054735e+12
220,False,False,0,ex15,7,0.0,6860.0,47692.0,7.051578,7.0,98480.0,2.0,1.718616e+09,57m6zczj,revived-sweep-129,25.437006,1.114683e+11
739,True,False,0,bcsstk13,1,0.0,2002.0,55321.0,2.683708,13.0,75478.0,2.0,1.718616e+09,lzvbifir,frosty-sweep-68,26.971959,5.173362e+11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
865,True,False,0,Pres_Poisson,2,46.0,14820.0,1.0,202.692586,14820.0,717338.0,14820.0,1.718616e+09,lq4r0in1,kind-sweep-58,57.622397,1.059471e+25
194,False,False,0,Pres_Poisson,1,0.0,14821.0,1.0,11.234797,14821.0,716761.0,14821.0,1.718616e+09,debbj3wq,celestial-sweep-134,57.622802,1.059900e+25
866,True,False,0,Pres_Poisson,1,0.0,14821.0,1.0,10.992875,14821.0,716739.0,14821.0,1.718616e+09,uhookibs,lilac-sweep-57,57.622802,1.059900e+25
869,True,False,0,Pres_Poisson,0,2.0,14822.0,1.0,44.091390,14822.0,715804.0,14822.0,1.718616e+09,ivl5iyq2,celestial-sweep-56,57.623207,1.060330e+25
