In [None]:
import seaborn as sns
import sys
import matplotlib.pyplot as plt
from itertools import product

%matplotlib inline
%reload_ext autoreload
%autoreload 2
sys.path.append('../src/')

from model_data_loader import MNISTDataLoader, AGNewsDataLoader
from data_splitter import DataSplitOnOff
from models_with_training_and_evaluation import TwoBidirectionalLSTM, TwoByTwoConvLayeredNN
from nn_compression_pipeline import *
from compression_pipeline_test_runner import *
from configuration_creator.compression_different_algorithms_per_layers_combination_creator import CompressionDifferentAlgorithmsPerLayersCombinationCreator
import np_utils
from np_utils import ProcessingCache
from visualization_utils import *

FIG_FOLDER = './figs_for_thesis/'
TABLE_FOLDER = './tables_for_thesis/'
CACHE_FOLDER = './cached_processing_data/'
OFFLINE_MODEL_FOLDER = './model_offline_save'
ONLINE_MODEL_AND_EVALUATION_RESULTS_FOLDER = './model_save_runs'
TRAINING_DATA_FOLDER = '../data'
MNIST_DATA_FOLDER = f'{TRAINING_DATA_FOLDER}/mnist'
AG_NEWS_DATA_FOLDER = f'{TRAINING_DATA_FOLDER}/ag_news'

save_values = ProcessingCache(file_folder=CACHE_FOLDER, debug_print=True)


def plot_show_and_save_if_filename(file_name=None):
    if file_name is not None:
        plt.savefig(FIG_FOLDER + file_name, bbox_inches='tight', dpi=1200)
    plt.show()


sns.set(style='white', context='notebook', palette='deep')
random_seed = 2

## Load Used Datasets

In [None]:
mnist_data_loader = MNISTDataLoader(path_to_data=MNIST_DATA_FOLDER)
vocab_size = 10000
sentence_length = 100
ag_news_data_loader = AGNewsDataLoader(vocab_size, sentence_length, path_to_data=AG_NEWS_DATA_FOLDER)

mnist_data_loader.print_label_distribution()
ag_news_data_loader.print_label_distribution()


In [None]:
upm_online = 0.5
tb_online = 150
u_poff_mnist = 0.0001
u_poff_lstm = 0.001
number_of_online_batches = None
# number_of_online_batches = 10
data_split_mnist = DataSplitOnOff(mnist_data_loader,
                                  number_classes=10, batch_size=86,
                                  random_seed=random_seed,
                                  UPMonline=upm_online,
                                  TBonline=tb_online,
                                  UPoff=u_poff_mnist,
                                  model_name=TwoByTwoConvLayeredNN.__name__,
                                  number_of_online_batches=number_of_online_batches,
                                  number_of_labels_to_use=4,
                                  # debug_assert=True,
                                  # debug_print=True,
                                  )
model_cnn = TwoByTwoConvLayeredNN(data_split_mnist, OFFLINE_MODEL_FOLDER, 10)
data_split_lstm = DataSplitOnOff(ag_news_data_loader,
                                 number_classes=4, batch_size=256,
                                 random_seed=random_seed,
                                 UPMonline=upm_online,
                                 TBonline=tb_online,
                                 UPoff=u_poff_lstm,
                                 model_name=TwoBidirectionalLSTM.__name__,
                                 number_of_online_batches=number_of_online_batches,
                                 number_of_labels_to_use=4,
                                 # debug_assert=True,
                                 # debug_print=True,
                                 )
model_lstm = TwoBidirectionalLSTM(data_split_lstm, OFFLINE_MODEL_FOLDER, 10)


def reset_model_split_settings(label_number=0):
    model_cnn.data_split.set_selected_group(label_number)
    model_cnn.data_split.override_split_settings(UPMonline=upm_online, TBonline=tb_online, UPoff=u_poff_mnist)

    model_lstm.data_split.set_selected_group(label_number)
    model_lstm.data_split.override_split_settings(UPMonline=upm_online, TBonline=tb_online, UPoff=u_poff_lstm)


## model selection

models_selection = ModelLabelAndIterationSelection([model_cnn, model_lstm])
selected_models = models_selection.get_models()


def run_on_selected_model(models_to_use, algorithm_combinations, rerun=False):
    df_vis_of_run = []
    runners_of_run = []
    for model_to_use in models_to_use:
        test_runner = TestRunnerForCompressionPipelinesAndDifferentDataAndModels(
            ONLINE_MODEL_AND_EVALUATION_RESULTS_FOLDER,
            model_to_use,
            algorithm_combinations)

        test_runner.run(rerun)
        runners_of_run.append(test_runner)
        df_vis_of_run.append(test_runner.get_metrics_df())

    return df_vis_of_run, runners_of_run


## Accuracy and Architecture of Used Models
(used in '4.4 Selected Models and Data Sets')
To show if chosen models are representative for other current models
Size, architecture and accuracy shown

In [None]:
accuracy_test_data_of_offline_model_with_all_data_for_comparison \
    = [(model_loader.get_validation_accuracy_for_full_and_offline_only_model_with_all_data(),
        model_loader.data_split.get_offline_only_model_name(), model_loader.get_total_params(True))
       for model_loader in models_selection.get_models()]

for accuracy, model_name, (params_total, params_per_layer, shapes_per_layer,
                           summary) in accuracy_test_data_of_offline_model_with_all_data_for_comparison:
    error_rate = (1 - accuracy) * 100
    print(f'-- {error_rate:0.3}% Error Rate of {model_name} Model {params_total}')
    print(summary)
    print('\n\n')


# Determine Parameters for Drift Simulation
(used in '4.5 Determine Data Split Parameter Settings for Virtual
Drift Simulation')

## Determine TBOn Setting
(Used in '4.5.1 Determine TBOn Setting')

In [None]:
# get accuracy for different TBOn settings
models = models_selection.get_models()
for model in models:
    model.data_split.set_selected_group(0)
    model.data_split.override_split_settings(UPMonline=0.5, TBonline=tb_online, UPoff=0)

tb_online_accuracy_for_evaluations = get_run_data_for_different_params_for_all_labels('TBonline', [100, 125, 150, 160],
                                                                                      models,
                                                                                      save_values)
reset_model_split_settings()


In [None]:
# visualize loaded data
show_line_graph_of_table(tb_online_accuracy_for_evaluations, selected_value=[100, 150], label_number=2)
plot_show_and_save_if_filename('online_split_params_determine_tbonline_extreme_compare_single_label')

tb_online_table = process_table_to_overview_per_param(tb_online_accuracy_for_evaluations, 'TBonline')
print_and_save_if_file_present_multi_column(tb_online_table, TABLE_FOLDER, 'online_split_params_determine_tbonline')


## Determine UPOff Setting
(Used in '4.5.2 Determine UPOff Setting')

In [None]:
# get accuracy for different UPOff settings
models = models_selection.get_models()
for model in models:
    model.data_split.set_selected_group(0)
    model.data_split.override_split_settings(UPMonline=0.5, TBonline=150, UPoff=0)

up_off_accuracy_for_evaluations = get_run_data_for_different_params_for_all_labels('UPoff', [0.0001, 0.001], models,
                                                                                   save_values)
reset_model_split_settings()


In [None]:
# visualize loaded data
show_line_graph_of_table(up_off_accuracy_for_evaluations, label_number=2)
plot_show_and_save_if_filename('online_split_params_determine_upoff_extreme_compare_single_label')

u_poff_table = create_cross_table_from_run(up_off_accuracy_for_evaluations)
u_poff_table = index_and_column_names_to_string_percent_from_fraction(u_poff_table)
print_and_save_if_file_present_multi_column(u_poff_table, TABLE_FOLDER, 'online_split_params_determine_upoff')


## Determine UPMOn Setting
(Used in '4.5.3 Determine UPMOn Setting')

In [None]:
# get accuracy for different UPMOn settings
u_poff_mnist = 0.0001
u_poff_lstm = 0.001
models = models_selection.get_models()
for model in models:
    model.data_split.set_selected_group(0)
    model_name = model.data_split.get_offline_only_model_name()
    upoff = u_poff_mnist if NN_CONV in model_name else u_poff_lstm
    model.data_split.override_split_settings(UPMonline=0.5, TBonline=150, UPoff=upoff)

u_pmon_accuracy_for_evaluations = get_run_data_for_different_params_for_all_labels('UPMonline', [0.4, 0.5, 0.6], models,
                                                                                   save_values)
reset_model_split_settings()

In [None]:
# visualize loaded data
show_line_graph_of_table(u_pmon_accuracy_for_evaluations, label_number=2)
plot_show_and_save_if_filename('online_split_params_determine_upmonline_extreme_compare_single_label')

u_pmon_table = create_cross_table_from_run(u_pmon_accuracy_for_evaluations)
u_pmon_table = index_and_column_names_to_string_percent_from_fraction(u_pmon_table)
print_and_save_if_file_present_multi_column(u_pmon_table, TABLE_FOLDER, 'online_split_params_determine_upmonline')


## Define Baselines for Upcoming Evaluations
Since this block should define all baselines, `lower_bl_lossless_for_lossy_evaluation` and `upper_bl_general_purpose_compression` are anticipated from the 'Lossless Algorithm Evaluations' 

In [None]:
upper_bl_pipeline_save_weights_directly = [[
    PickleToFile(),
]]

upper_bl_general_purpose_compression = [[
    PickleDump(),
    ZSTDWithMeasurement(1),
    PickleToFile(),
]]

lower_bl_lossless_for_lossy_evaluation = [[
    Combine(np_utils.float_byte_wise_xor),
    SplitFloatAndStackByByteSegments(),
    PickleDump(),
    ZSTDWithMeasurement(1),
    PickleToFile(),
    DifResetSaver(fixed_number_of_iterations=28),
]]


# Evaluation Lossless Configurations
(Used in '4.7 Evaluation Lossless Configurations')

## General Compression for Further Testing
(Used in '4.7.1 General Compression for Further Testing')

In [None]:
# evaluate general compression for further testing
general_compression_further_testing_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [PickleDump()],
    [*[ZSTDWithMeasurement(p) for p in [1, 5, 10, 15, 22]],
     *[BrotliWithMeasurement(p) for p in [1, 9, 10, 11]],
     LZMAWithMeasurement(),
     *[BZ2WithMeasurement(p) for p in [1, 9]],
     *[ZLibWithMeasurement(p) for p in [1, 9]],
     ],
    [PickleToFile()],
])
general_compression_further_testing_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + general_compression_further_testing_configurations.get_combinations_of_product_per_layer()
    + upper_bl_pipeline_save_weights_directly
)


In [None]:
# show table of evaluation
general_compression_further_testing_table = create_grouped_comparison_table_for_different_nn(
    *general_compression_further_testing_evaluation_results)

general_compression_further_testing_table_sorted = sort_and_fill_na(general_compression_further_testing_table,
                                                                    COMPRESSION_TIME_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    general_compression_further_testing_table_sorted, TABLE_FOLDER,
    'general_compression_float_comparison'
)


## Reset Point Frequency for Delta Creator
(Used in '4.7.2 Reset Point Frequency for Delta Creator')

In [None]:
# evaluate reset point frequency for delta creator

reset_point_frequency_for_delta_creator_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=t) for t in
     [4, 5, 6, 8, 10, 12, 13, 14, 15, 16, 17, 18, 20, 24, 28, 32, 36, 46, 49, 74, 148]],  # 149 is with save at end
])

reset_point_frequency_for_delta_creator_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + reset_point_frequency_for_delta_creator_configurations.get_combinations_of_product_per_layer()
    + upper_bl_general_purpose_compression
)


In [None]:
## show decompression time to compression ratio graph of evaluation

(df_vis, runners) = reset_point_frequency_for_delta_creator_evaluation_results

reset_point_frequency_for_delta_creator_df_for_visualization = get_grouped_df_without_baselines_and_reset_frequency_as_algorithm_name_sorted(
    df_vis)

scatterplot_for_nn_types_grouped(reset_point_frequency_for_delta_creator_df_for_visualization)
plot_show_and_save_if_filename('reset_point_delta-decompression_max_to_decompression_ratio-lossless')

# show table of evaluation
reset_point_frequency_for_delta_creator_table = create_grouped_comparison_table_for_different_nn(
    *reset_point_frequency_for_delta_creator_evaluation_results)
reset_point_frequency_for_delta_creator_table = only_keep_table_rows_containing_substrings_and_baseline(
    reset_point_frequency_for_delta_creator_table, ['28', '148'])

reset_point_frequency_for_delta_creator_table_sorted = sort_and_fill_na(reset_point_frequency_for_delta_creator_table,
                                                                        COMPRESSION_TIME_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    reset_point_frequency_for_delta_creator_table_sorted, TABLE_FOLDER,
    'lossless-chosen_reset_points-table')


## Different Bytewise Segmentation for Float Matrices Algorithms
(Used in 4.7.3 Different Bytewise Segmentation for Float Matrices Algorithms')

In [None]:
# evaluate bytewise segmentation

bytewise_segmentation_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [SplitFloatAndStack(), SplitFloatAndStackByByteSegmentsSplitLater(), SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
])

bytewise_segmentation_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + bytewise_segmentation_configurations.get_combinations_of_product_per_layer()
    + upper_bl_general_purpose_compression
)


In [None]:
# show table of evaluation
bytewise_segmentation_table = create_grouped_comparison_table_for_different_nn(
    *bytewise_segmentation_evaluation_results)
bytewise_segmentation_table_sorted = sort_and_fill_na(bytewise_segmentation_table, COMPRESSION_RATIO_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    bytewise_segmentation_table_sorted, TABLE_FOLDER,
    'lossless-float_split_compare-table')


## Combining General Compression and Bytewise Segmentation for Final Evaluation
(Used in '4.7.4 Combining Selected General Compression and Bytewise
Segmentation for Float Matrices Processing Steps for Full Run')

In [None]:
# evaluate general compression and bytewise segmentation
general_compression_and_bytewise_segmentation_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator(
    [
        [Combine(np_utils.float_byte_wise_xor)],
        [SplitFloatAndStack(), SplitFloatAndStackByByteSegments()],
        [PickleDump()],
        [
            *[ZSTDWithMeasurement(p) for p in [1, 15]],
            *[BrotliWithMeasurement(p) for p in [1, 10]],
            LZMAWithMeasurement(),
            BZ2WithMeasurement(9),
            ZLibWithMeasurement(9)
        ],
        [PickleToFile()],
        [DifResetSaver(fixed_number_of_iterations=28)],
    ]
)

general_compression_and_bytewise_segmentation_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + general_compression_and_bytewise_segmentation_configurations.get_combinations_of_product_per_layer()
    + upper_bl_general_purpose_compression
)


In [None]:
# show table of evaluation
general_compression_and_bytewise_segmentation_table = create_grouped_comparison_table_for_different_nn(
    *general_compression_and_bytewise_segmentation_evaluation_results)

general_compression_and_bytewise_segmentation_table_sorted = sort_and_fill_na(
    general_compression_and_bytewise_segmentation_table, COMPRESSION_RATIO_COL, NN_CONV, False)
print_and_save_if_file_present_multi_column(
    general_compression_and_bytewise_segmentation_table_sorted,
    TABLE_FOLDER,
    'lossless_different_split_floats_with_general_compression_algs-table'
)


## Final Evaluation Lossless Configurations
(Used in '4.7.5 Final Evaluation Lossless Configurations')

In [None]:
# evaluate final lossless configurations

zst_final_lossless_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=f) for f in [28, 148]],
])

lzma_final_lossless_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [SplitFloatAndStack()],
    [PickleDump()],
    [LZMAWithMeasurement()],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=f) for f in [28, 148]],
])

final_lossless_configurations_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + zst_final_lossless_configurations.get_combinations_of_product_per_layer()
    + lzma_final_lossless_configurations.get_combinations_of_product_per_layer()
    + upper_bl_general_purpose_compression
)

In [None]:
# show table of evaluation
final_lossless_configurations_table = create_grouped_comparison_table_for_different_nn(
    *final_lossless_configurations_evaluation_results)
final_lossless_configurations_table_sorted = sort_and_fill_na(
    final_lossless_configurations_table, COMPRESSION_RATIO_COL, NN_CONV)

print_and_save_if_file_present_multi_column(
    final_lossless_configurations_table_sorted, TABLE_FOLDER,
    'lossless_compression_all_combinations_final_run'
)

# Evaluation Lossy Configurations
(Used in '4.8 Lossy Algorithms')

## Top-K
(Used in '4.8.1 Top-K')

In [None]:
# evaluate Top-k for different ks
selected_ks_for_top_k_evaluation = [0.0001, 0.001, 0.0025, 0.005, 0.01, 0.0175, 0.025, 0.05, 0.075, 0.1, 0.15, 0.25]

top_k_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, 0) for k in selected_ks_for_top_k_evaluation],
    [Combine(np_utils.float_byte_wise_xor)],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

top_k_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + top_k_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)


In [None]:
# show table of evaluation
top_k_table = create_grouped_comparison_table_for_different_nn_with_acc(*top_k_evaluation_results)
top_k_table_sorted = sort_and_fill_na(top_k_table, MEAN_ACCURACY_DIFF_COL, NN_LSTM)
print_and_save_if_file_present_multi_column(
    top_k_table_sorted, TABLE_FOLDER,
    'lossy_topk_all'
)


## Using GCXS for Sparse Delta Weights
(Used in '4.8.1 Top-K - Using GCXS for Spare Delta Weights')

In [None]:
# evaluate Using GCXS for Sparse Delta Weights
gcxs_sparse_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, 0) for k in [0.15, 0.0175]],
    [Combine(np_utils.float_byte_wise_xor)],
    [GCXS(l, r, False, False) for l, r in list(product([True, False], [True, False]))]
    + [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

gcxs_sparse_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + gcxs_sparse_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)


In [None]:
# show table of evaluation
gcxs_sparse_table = create_grouped_comparison_table_for_different_nn(*gcxs_sparse_evaluation_results)
gcxs_sparse_table_sorted = sort_groups_by_regex_and_fill_na(
    gcxs_sparse_table, COMPRESSION_RATIO_COL, NN_CONV, TABLE_GROUP_REG_SAME_K)
print_and_save_if_file_present_multi_column(
    gcxs_sparse_table_sorted, TABLE_FOLDER,
    'lossy_topk_gcxs'
)


## Compressing GCXS Indices
(Used in '4.8.1 Top-K - Compressing GCXS Indices')

In [None]:
# evaluate Compressing GCXS Indices
gxs_indices_compression_configuration = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, 0) for k in [0.15, 0.0175]],
    [Combine(np_utils.float_byte_wise_xor)],
    []
    + [GCXS(True, True, l, r) for l, r in list(product([True, False], [True, False]))],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

gxs_indices_compression_evaluation_results = run_on_selected_model(models_selection.get_models(), []
                                                                   + gxs_indices_compression_configuration.get_combinations_of_product_per_layer()
                                                                   + lower_bl_lossless_for_lossy_evaluation
                                                                   )

In [None]:
# show table of evaluation
gxs_indices_compression_evaluation_results_table = create_grouped_comparison_table_for_different_nn(
    *gxs_indices_compression_evaluation_results)
gxs_indices_compression_evaluation_results_table_sorted = sort_groups_by_regex_and_fill_na(
    gxs_indices_compression_evaluation_results_table, COMPRESSION_TIME_COL, NN_CONV, TABLE_GROUP_REG_SAME_K)
print_and_save_if_file_present_multi_column(
    gxs_indices_compression_evaluation_results_table_sorted,
    TABLE_FOLDER, 'lossy_topk_gcxs_compressing_indixes'
)

## Top-K Minimum per Layer
(Used in '4.8.1 Top-K - Minimum per Layer')

In [None]:
# evaluating Minimum per Layer
selected_ks_for_minimum_per_layer = [0.0175, 0.025, 0.05, 0.075, 0.15]

selected_minimum_per_layer = [0, 0.001, 0.0001]
selected_k_and_min_per_layer_for_minimum_per_layer = [
    (k, min_p) for k, min_p in
    product(selected_ks_for_minimum_per_layer, selected_minimum_per_layer) if k >= min_p]

minimum_per_layer_configuration = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, min_p) for k, min_p in selected_k_and_min_per_layer_for_minimum_per_layer],
    [Combine(np_utils.float_byte_wise_xor)],
    [GCXS(True, True, True, True)],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

minimum_per_layer_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + minimum_per_layer_configuration.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)

In [None]:
# show table of evaluation
minimum_per_layer_evaluation_results_table = create_grouped_comparison_table_for_different_nn_with_acc(
    *minimum_per_layer_evaluation_results)
minimum_per_layer_evaluation_results_table_sorted = sort_and_fill_na(minimum_per_layer_evaluation_results_table,
                                                                     MEAN_ACCURACY_DIFF_COL, NN_LSTM)
print_and_save_if_file_present_multi_column(
    minimum_per_layer_evaluation_results_table_sorted, TABLE_FOLDER,
    'lossy_topk_all_with_mpl'
)

## All Layers Top-K
(Used in '4.8.1 Top-K - All Layers Top-K')

In [None]:
# evaluating All Layers Top-K
selected_k_and_min_per_layer_for_minimum_per_layer_for_all_layers_top_k = [
    (0.0750, 0.001),
    (0.0500, 0.001),
    (0.0175, 0.001),
]

all_layers_top_k_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, min_p) for k, min_p in selected_k_and_min_per_layer_for_minimum_per_layer_for_all_layers_top_k]
    + [TopKOverAllLayers(k, min_p) for k, min_p in
       selected_k_and_min_per_layer_for_minimum_per_layer_for_all_layers_top_k],
    [Combine(np_utils.float_byte_wise_xor)],
    [GCXS(True, True, True, True)],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

all_layers_top_k_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + all_layers_top_k_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)


In [None]:
# show table of evaluation
all_layers_top_k_table = create_grouped_comparison_table_for_different_nn_with_acc(*all_layers_top_k_evaluation_results)
all_layers_top_k_table_sorted = sort_groups_by_regex_and_fill_na(
    all_layers_top_k_table, MEAN_ACCURACY_DIFF_COL, NN_CONV, TABLE_GROUP_REG_SAME_K)
print_and_save_if_file_present_multi_column(
    all_layers_top_k_table_sorted, TABLE_FOLDER,
    'lossy_topk_all_layers'
)


## Loss Adaptive TkP
(Used in '4.8.1 Top-K - Loss Adaptive TkP')

In [None]:
# visualize linegraph for loss adaptive transformation function
convex_settings_to_visualize_for_loss_adaptive_k = [True, 0.1, 2, -0.5]
lineplot_for_topk_loss_adaptive_curve_settings(convex_settings_to_visualize_for_loss_adaptive_k)
plot_show_and_save_if_filename('lossy_top-k-loss-adaptive_used-convex-settings.png')


In [None]:
# evaluating loss adaptive TkP

selected_k_and_min_per_layer_for_minimum_per_layer_for_loss_adaptive_k = [
    (0.1000, 0.001),
    (0.0750, 0.001),
    (0.0500, 0.001),
]

convex_settings_to_evaluate_for_loss_adaptive_k = [False, True, 0.1, 2, -0.5]

loss_adaptive_k_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [TopK(k, min_p, loss_adaptive_percentage_linear_or_convex=c) for k, min_p in
     selected_k_and_min_per_layer_for_minimum_per_layer_for_loss_adaptive_k for c in
     convex_settings_to_evaluate_for_loss_adaptive_k],
    [Combine(np_utils.float_byte_wise_xor)],
    [GCXS(True, True, True, True)],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

loss_adaptive_k_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + loss_adaptive_k_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)


In [None]:
# show table of evaluation
loss_adaptive_k_table = create_grouped_comparison_table_for_different_nn_with_acc(*loss_adaptive_k_evaluation_results)
loss_adaptive_k_table_sorted = sort_groups_by_regex_and_fill_na(
    loss_adaptive_k_table, MEAN_ACCURACY_DIFF_COL, NN_CONV, TABLE_GROUP_REG_SAME_K)
print_and_save_if_file_present_multi_column(
    loss_adaptive_k_table_sorted, TABLE_FOLDER,
    'eval_lossy_topk_loss_adaptive'
)


## Reset Least Significant Bits
(Used in '3.8.2 Reset Least Significant Bits')

In [None]:
# evaluate reset least significant bits when most significant bits set
selected_numb_lower_bits_to_overwrite = [8, 16]
selected_numb_higher_bits_diff = [4, 0, -4]
selected_overwrite_bits_and_higher_diff = list(
    product(selected_numb_lower_bits_to_overwrite, selected_numb_higher_bits_diff))
selected_higher_and_lower_number_of_bits = (
        [(23, h) for h in selected_numb_lower_bits_to_overwrite]
        + [(23 - h - diff, h) for h, diff in selected_overwrite_bits_and_higher_diff]
)

reset_least_sig_when_most_sig_set_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    []
    + [RemoveLowerFractionsIfHigherTrue(l, h) for l, h in selected_higher_and_lower_number_of_bits],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

reset_least_sig_when_most_sig_set_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + reset_least_sig_when_most_sig_set_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)

In [None]:
# show table of evaluation
reset_least_sig_when_most_sig_set_table = create_grouped_comparison_table_for_different_nn_with_acc(
    *reset_least_sig_when_most_sig_set_evaluation_results)
regex_select_least_significant_bits = r'.*s(\d+)b.*'
reset_least_sig_when_most_sig_set_table_sorted = sort_groups_by_regex_and_fill_na(
    reset_least_sig_when_most_sig_set_table, MEAN_ACCURACY_DIFF_COL, NN_CONV, regex_select_least_significant_bits)
print_and_save_if_file_present_multi_column(
    reset_least_sig_when_most_sig_set_table_sorted, TABLE_FOLDER,
    'lossy_remove_lower_fractions_check_higher_bits'
)

In [None]:
# evaluate reset least significant bits

selected_bits_to_keep_least_sig = [16, 20, 21, 22, 23]
selected_float_bits_to_keep = [(l, 32) for l in selected_bits_to_keep_least_sig]

reset_least_sig_bits_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [FloatRemoveSections(l, h) for l, h in selected_float_bits_to_keep],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

reset_least_sig_bits_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + reset_least_sig_bits_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)

In [None]:
# show table of evaluation
reset_least_sig_bits_table = create_grouped_comparison_table_for_different_nn_with_acc(
    *reset_least_sig_bits_evaluation_results)
reset_least_sig_bits_table_sorted = sort_and_fill_na(reset_least_sig_bits_table, MEAN_ACCURACY_DIFF_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    reset_least_sig_bits_table_sorted, TABLE_FOLDER,
    'lossy_remove_lower_fractions'
)

## Save Decision through New Model
(Used in '3.8.3 Save Decision through New Model')

In [None]:
# evaluate Save Decision through New Model

selected_percentage_bounds_for_save_decision = [0, 0.01]

save_decision_through_model_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [SkipSaveWhenAccuracyStillGood(p, l) for p in selected_percentage_bounds_for_save_decision for l in [True, False]],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

save_decision_through_model_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + save_decision_through_model_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)


In [None]:
# show table of evaluation
save_decision_through_model_table = create_grouped_comparison_table_for_different_nn_with_acc(
    *save_decision_through_model_evaluation_results)
regex_select_percentage_value = r'.*g(\d+)p.*'
save_decision_through_model_table_sorted = sort_groups_by_regex_and_fill_na(save_decision_through_model_table,
                                                                            MEAN_ACCURACY_DIFF_COL, NN_CONV, regex_select_percentage_value )
print_and_save_if_file_present_multi_column(
    save_decision_through_model_table_sorted,
    TABLE_FOLDER, 'eval_lossy_skip_save_when_accuracy_still_good')

## Combine Top-k and Reset Least Significant Bits Parameters
(Used in '4.8.4 Combine Top-k and Reset Least Significant Bits Parameters')

In [None]:
# evaluate Combine Top-k and Reset Least Significant Bits Parameters

selected_best_parameters_top_k = [
    [0.0750, 0.001, -0.5],
    [0.0750, 0.001, True],
    [0.0500, 0.001, 2],
]
selected_float_bits_to_remove = [18, 20]

top_k_and_reset_least_significant_bits_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, min_p, loss_adaptive_percentage_linear_or_convex=c) for k, min_p, c in selected_best_parameters_top_k],
    [Combine(np_utils.float_byte_wise_xor)],
    [FloatRemoveSections(l, 32) for l in selected_float_bits_to_remove] + [None],
    [GCXS(True, True, True, True)],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

top_k_and_reset_least_significant_bits_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + top_k_and_reset_least_significant_bits_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)


In [None]:
# show table of evaluation
top_k_and_reset_least_significant_bits_table = create_grouped_comparison_table_for_different_nn_with_acc(
    *top_k_and_reset_least_significant_bits_evaluation_results)
top_k_and_reset_least_significant_bits_table = remove_shortened_algorithm_from_index_of_pivot_metrics_table(
    top_k_and_reset_least_significant_bits_table, [GCXS_ALL_SETTINGS_TRUE])

top_k_and_reset_least_significant_bits_table_sorted = sort_and_fill_na(
    top_k_and_reset_least_significant_bits_table, MEAN_ACCURACY_DIFF_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    top_k_and_reset_least_significant_bits_table_sorted, TABLE_FOLDER,
    'lossy_final_run_float_bits_and_top_k_combination')


## Selecting Fitting Lossy Configurations
(Used in '4.8.5 Selecting Fitting Lossy Configurations')

In [None]:
# evaluate Selecting Fitting Lossy Configurations
selected_best_parameters_top_k = [
    [0.0750, 0.001, -0.5],
    [0.0750, 0.001, True],
    [0.0500, 0.001, 2],
]

selected_best_save_decision_parameters = [
    [0, False],
    [0.01, False],
]

selected_best_float_bits_to_keep_for_top_k = [[18, 32], [20, 32]]
selected_best_float_bits_to_keep = [[20, 32]]

final_lossy_with_top_k_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, min_p, loss_adaptive_percentage_linear_or_convex=c) for k, min_p, c in selected_best_parameters_top_k],
    [Combine(np_utils.float_byte_wise_xor)],
    [FloatRemoveSections(l, h) for l, h in selected_best_float_bits_to_keep_for_top_k] + [None],
    [SkipSaveWhenAccuracyStillGood(p, l) for p, l in selected_best_save_decision_parameters] + [None],
    [GCXS(True, True, True, True)],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

final_lossy_without_top_k_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [FloatRemoveSections(l, h) for l, h in selected_best_float_bits_to_keep],
    [SkipSaveWhenAccuracyStillGood(p, l) for p, l in selected_best_save_decision_parameters] + [None],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=28)],
])

final_lossy_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + final_lossy_with_top_k_configurations.get_combinations_of_product_per_layer()
    + final_lossy_without_top_k_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)


In [None]:
# show table of evaluation
final_lossy_table = create_grouped_comparison_table_for_different_nn_with_acc_without_decompression_time(
    *final_lossy_evaluation_results)
final_lossy_table = remove_shortened_algorithm_from_index_of_pivot_metrics_table(final_lossy_table,
                                                                                 [GCXS_ALL_SETTINGS_TRUE])
select_fast_slow_string = ['Skisg']

# show table for Low Compression Time
final_lossy_fast_table = only_keep_table_rows_containing_baseline_and_not_substrings(final_lossy_table,
                                                                                     select_fast_slow_string)
final_lossy_fast_table_sorted = sort_and_fill_na(final_lossy_fast_table, MEAN_ACCURACY_DIFF_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    final_lossy_fast_table_sorted, TABLE_FOLDER,
    'lossy_final_run_without_skip')

# show table for High General Compression
final_lossy_slow_table = only_keep_table_rows_containing_substrings_and_baseline(final_lossy_table,
                                                                                 select_fast_slow_string)
final_lossy_slow_table_sorted = sort_and_fill_na(final_lossy_slow_table, MEAN_ACCURACY_DIFF_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    final_lossy_slow_table_sorted, TABLE_FOLDER,
    'lossy_final_run_with_skip')


## Selecting Fitting Lossy Configurations
(Used in '4.8.5 Selecting Fitting Lossy Configurations - Slower General Compression')

In [None]:
# evaluate Selecting Fitting Lossy Configurations
high_compression_parameters_top_k = [
    [0.0750, 0.001, -0.5],
]

highest_compression_parameters_save_decision = [
    [0.01, False],
]

float_bits_to_keep = [[20, 32]]

final_lossy_with_slow_general_compression_with_top_k_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator(
    [
        []
        + [TopK(k, min_p, loss_adaptive_percentage_linear_or_convex=c) for k, min_p, c in
           high_compression_parameters_top_k],
        [Combine(np_utils.float_byte_wise_xor)],
        [FloatRemoveSections(l, h) for l, h in float_bits_to_keep],
        [SkipSaveWhenAccuracyStillGood(p, l) for p, l in highest_compression_parameters_save_decision] + [None],
        [GCXS(True, True, True, True)],
        [PickleDump()],
        [ZSTDWithMeasurement(1), LZMAWithMeasurement()],
        [PickleToFile()],
        [DifResetSaver(fixed_number_of_iterations=28)],
    ])

final_lossy_with_slow_general_compression_without_top_k_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator(
    [
        [Combine(np_utils.float_byte_wise_xor)],
        [FloatRemoveSections(l, h) for l, h in float_bits_to_keep],
        [SkipSaveWhenAccuracyStillGood(p, l) for p, l in highest_compression_parameters_save_decision] + [None],
        [SplitFloatAndStackByByteSegments()],
        [PickleDump()],
        [ZSTDWithMeasurement(1), LZMAWithMeasurement()],
        [PickleToFile()],
        [DifResetSaver(fixed_number_of_iterations=28)],
    ])

final_lossy_with_slow_general_compression_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + final_lossy_with_slow_general_compression_with_top_k_configurations.get_combinations_of_product_per_layer()
    + final_lossy_with_slow_general_compression_without_top_k_configurations.get_combinations_of_product_per_layer()
    + lower_bl_lossless_for_lossy_evaluation
)

In [None]:
# show table of evaluation
final_lossy_with_slow_general_compression_table = create_grouped_comparison_table_for_different_nn(
    *final_lossy_with_slow_general_compression_evaluation_results)
final_lossy_with_slow_general_compression_table = remove_shortened_algorithm_from_index_of_pivot_metrics_table(
    final_lossy_with_slow_general_compression_table, [GCXS_ALL_SETTINGS_TRUE])

final_lossy_with_slow_general_compression_table_sorted = sort_and_fill_na(
    final_lossy_with_slow_general_compression_table, COMPRESSION_RATIO_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    final_lossy_with_slow_general_compression_table_sorted, TABLE_FOLDER,
    'lossy_final_run_slow_run_with_high_compression')

## Final Lossless Configurations with Different Reset Point Interval Settings
(Used in '4.8.5 Selecting Fitting Lossy Configurations - Final Lossless Configurations with Different Reset Point Interval Settings')

In [None]:
# evaluate Final Lossy Configurations with Different Reset Point Interval Settings
best_samples_from_last_run_top_k_selected = [
    [0.0750, 0.001, -0.5],
    [0.0750, 0.001, True],
]

best_acc_bound_selected = [
    [0, False],
    [0.01, False],
]
reset_point_iterations = [28, 148]

float_bits_to_keep = [(20, 32)]

final_lossy_rpi_fast_0_acc_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [FloatRemoveSections(l, h) for l, h in float_bits_to_keep],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=t) for t in reset_point_iterations],
])

final_lossy_rpi_fast_0p1_acc_configuration = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    []
    + [TopK(k, min_p, loss_adaptive_percentage_linear_or_convex=c) for k, min_p, c in
       [(0.0750, 0.001, True)]],
    [Combine(np_utils.float_byte_wise_xor)],
    [FloatRemoveSections(l, h) for l, h in [(18, 32)]],
    [GCXS(True, True, True, True)],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=t) for t in reset_point_iterations],
])

final_lossy_rpi_slow_0p1_and_0_acc_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator([
    [Combine(np_utils.float_byte_wise_xor)],
    [FloatRemoveSections(l, h) for l, h in float_bits_to_keep],
    [SkipSaveWhenAccuracyStillGood(p, l) for p, l in best_acc_bound_selected],
    [SplitFloatAndStackByByteSegments()],
    [PickleDump()],
    [ZSTDWithMeasurement(1)],
    [PickleToFile()],
    [DifResetSaver(fixed_number_of_iterations=t) for t in reset_point_iterations],
])

final_lossy_rpi_slow_and_fast_1_acc_configurations = CompressionDifferentAlgorithmsPerLayersCombinationCreator(
    [
        []
        + [TopK(k, min_p, loss_adaptive_percentage_linear_or_convex=c) for k, min_p, c in
           [(0.0750, 0.001, -0.5)]],
        [Combine(np_utils.float_byte_wise_xor)],
        [FloatRemoveSections(l, h) for l, h in float_bits_to_keep],
        [SkipSaveWhenAccuracyStillGood(p, l) for p, l in [(0.01, False)]] + [None],
        [GCXS(True, True, True, True)],
        [PickleDump()],
        [ZSTDWithMeasurement(1)],
        [PickleToFile()],
        [DifResetSaver(fixed_number_of_iterations=t) for t in reset_point_iterations],
    ])

final_lossy_rpi_slow_and_fast_evaluation_results = run_on_selected_model(
    models_selection.get_models(),
    []
    + final_lossy_rpi_fast_0_acc_configurations.get_combinations_of_product_per_layer()
    + final_lossy_rpi_fast_0p1_acc_configuration.get_combinations_of_product_per_layer()
    + final_lossy_rpi_slow_0p1_and_0_acc_configurations.get_combinations_of_product_per_layer()
    + final_lossy_rpi_slow_and_fast_1_acc_configurations.get_combinations_of_product_per_layer()
    + upper_bl_general_purpose_compression
)

In [None]:
# show table of evaluation
final_lossy_rpi_slow_and_fast_table = create_grouped_comparison_table_for_different_nn_with_acc_without_decompression_time(
    *final_lossy_rpi_slow_and_fast_evaluation_results)
final_lossy_rpi_slow_and_fast_table = remove_shortened_algorithm_from_index_of_pivot_metrics_table(
    final_lossy_rpi_slow_and_fast_table,
    [GCXS_ALL_SETTINGS_TRUE, SPLIT_FLOAT_AND_STACK_BY_BYTE_SEGMENTS, '32r'])
final_lossy_rpi_slow_and_fast_table_sorted = sort_and_fill_na(final_lossy_rpi_slow_and_fast_table,
                                                              MEAN_ACCURACY_DIFF_COL, NN_CONV)
print_and_save_if_file_present_multi_column(
    final_lossy_rpi_slow_and_fast_table_sorted, TABLE_FOLDER,
    'lossy_final_run_all_with_different_reset_points'
)