In [1]:
import numpy as np
import math
import copy
import datetime
import pandas as pd
import json

### Description of add_up_a_decomposition
The function takes as inputs:
1. a decomposition (x_list, BN_matrices_array) output by SER 1, SER 2, GER (x_list is a list, whereas BN_matrices_array is a 2D np array).
2. The dimension of the output TPM row_dim, col_dim

The function then sums up (x_list, BN_matrices_array) and outputs the resulting TPM (row_dim-by-col_dim).

In [2]:
def add_up_a_decomposition_float(x_list, BN_matrices_array, row_dim, col_dim):
    output_TPM = np.zeros((row_dim, col_dim), dtype=float)
    num_BN_matrices = len(x_list)
    
    for k in range(num_BN_matrices):
        for col in range(col_dim):
            output_TPM[BN_matrices_array[k, col], col] += x_list[k]
    
    return output_TPM

# Implementation of the arXiv Version of GER

## Order of execution: get_col_indices, first_occurrence, column_freq, find_v_list, GERESA, f_score, new_GER.

This function requires two inputs:
1. $P$ is a $2^{n} \times 2^{n}$ numpy array.
2. $z$ is a real number greater than 1.

This function outputs:
1. The length of the decomposition $K$.
2. A list of positive real numbers x_list.
3. A 2D numpy array representing a list of BN matrices A_array.

In [9]:
def float_GER(P, z, stopping_error):
    row_num, col_num = P.shape
    
    R = copy.deepcopy(P)
    K = 0
    x_list = []
    BN_list = []
    
    while (np.linalg.norm(R, ord='fro') >= stopping_error):
        K += 1
        B = np.min(np.max(R, axis=0))
        v_list = find_v_list(R, B)
        x = 0
        score = -math.inf
        
#        print("Iteration", K)
#        print("Number of positive entries:", np.sum(R > 0))
#        print("Number of positive entries (each col)", np.sum(R > 0, axis=0))
#        print("Number of negative entries:", np.sum(R < 0))
#        print("Number of zero entries:", np.sum(R == 0))
#        print(R)
#        print()
        
        for v in v_list:
            temp_A = GERESA(R, v) # temp_A should be a list of length col_num representing a BN matrix.
            
            R_subtract_v_temp_A = copy.deepcopy(R)
            for j in range(col_num):
                R_subtract_v_temp_A[temp_A[j], j] -= v
            
            temp_score = f_score(R_subtract_v_temp_A, z)
            
            if ((temp_score > score) or ((temp_score == score) and (v > x))):
                score = temp_score
                x = v
                A = temp_A
        
        for j in range(col_num):
            R[A[j], j] -= x
        
        x_list.append(x)
        BN_list.append(A)
        
    return K, x_list, np.array(BN_list), np.linalg.norm(R, ord='fro')

This function requires two inputs:
1. an integer $v$.
2. A 2D numpy array an_array consisting of integers.

The function outputs the column frequency of $v$ in an_array.

In [5]:
def column_freq(v, an_array):
    row_num, col_num = an_array.shape
    col_freq = 0
    
    for j in range(col_num):
        if v in an_array[:, j]:
            col_freq += 1
    
    return col_freq

This function outputs the list of positive entries of $R$ which are not greater than $B$ (upper_bound) and attain the maximum column frequency in $R$.

In [6]:
def find_v_list(R, upper_bound):
    row_num, col_num = R.shape
    classify_entries = [set() for i in range(col_num + 1)]
    
    for i in range(row_num):
        for j in range(col_num):
            if (R[i, j] > 0 and R[i, j] <= upper_bound):
                classify_entries[column_freq(R[i, j], R)].add(R[i, j])
    
    empty_set = set()
    
    for freq in range(col_num, 0, -1):
        if classify_entries[freq] != empty_set:
            return list(classify_entries[freq])

In [7]:
def GERESA(R, v):
    row_num, col_num = R.shape
    A = [0] * col_num
    R_copy = copy.deepcopy(R)
    selected_columns = []
    col_indices_v_R = get_col_indices(v, R)
    col_indices_v_R_complement = set(range(col_num)) - col_indices_v_R
    
    for j in col_indices_v_R:
        A[j] = first_occurrence(v, R[:, j])
        R_copy[A[j], j] = 0
        selected_columns.append(j)
    
    for j in col_indices_v_R_complement:
        # Compute a dictionary with keys in Larger(v, R(:, j)) and values being associated column frequencies.
        dict_KLarger_VColFreq = dict()
        for i in range(row_num):
            if R[i, j] > v:
                dict_KLarger_VColFreq[i] = column_freq(R[i, j] - v, R_copy[:, selected_columns])
        
        # Set A[j] to the right choice from range(row_num).
        A[j] = max(dict_KLarger_VColFreq.keys(), key=dict_KLarger_VColFreq.get)
        
        R_copy[A[j], j] = R[A[j], j] - v
        selected_columns.append(j)
        
    return A

This function requires two inputs:
1. an integer $v$.
2. A 2D numpy array an_array consisting of integers.

The function outputs the set $\text{Col_indices}(v, \text{an_array})$.

In [3]:
def get_col_indices(v, an_array):
    row_num, col_num = an_array.shape
    col_indices = []
    
    for j in range(col_num):
        if v in an_array[:, j]:
            col_indices.append(j)
    
    return set(col_indices)

This function requires two inputs:
1. An integer $v$.
2. A 1D numpy array array_1D such that v is an element of array_1D.

The function outputs the index of the first occurrence of $v$ in array_1D.

In [4]:
def first_occurrence(v, array_1D):
    length = array_1D.size
    
    for i in range(length):
        if v == array_1D[i]:
            return i
    
    return -1

This function requires two inputs:
1. An integer 2D array array_2D.
2. A positive real number $z > 1$.

The function outputs a score.

In [8]:
def f_score(array_2D, z):
    row_num, col_num = array_2D.shape
    score = 0
    checked_positive_entries = set()
    
    for i in range(row_num):
        for j in range(col_num):
            if (array_2D[i, j] > 0) and (not array_2D[i, j] in checked_positive_entries):
                score += z ** column_freq(array_2D[i, j], array_2D)
                checked_positive_entries.add(array_2D[i, j])
    
    return score

# Floating-Point Matrix Experiment

In [10]:
# Custom JSON encoder that handles NumPy types (created by GPT-o3 on poe.com)
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.integer,)):
            return int(obj)
        elif isinstance(obj, (np.floating,)):
            return float(obj)
        elif isinstance(obj, (np.ndarray,)):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

## $8 \times 8$ TPMs

In [70]:
z = 5
stopping_error = 0.01

for k in range(10):
    print('iteration', k)
    print('Time now is:', datetime.datetime.now())
    fTPM_filepath = './8-by-8/dim8_fTPM_' + str(k) + '.npy'
    fTPM = np.load(fTPM_filepath)
    
    start_time = datetime.datetime.now()
    decom_and_error = float_GER(fTPM, z, stopping_error)
    end_time = datetime.datetime.now()
    execution_duration = end_time - start_time
    
    exe_dur_seconds = execution_duration.total_seconds()
    exe_dur_dict = {"execution time": exe_dur_seconds}
    
    # save the data
    decom_filepath = "./raw data (float)/dim8_decom_err_" + str(k) + ".json"
    time_filepath = "./raw data (float)/dim8_time_" + str(k) + ".json"
    
    with open(decom_filepath, 'w') as out1:
        json.dump(decom_and_error, out1, cls=NumpyEncoder)
        
    with open(time_filepath, 'w') as out2:
        json.dump(exe_dur_dict, out2)
    
    print()

iteration 0
Time now is: 2025-04-10 18:07:06.798652

iteration 1
Time now is: 2025-04-10 18:07:09.766304

iteration 2
Time now is: 2025-04-10 18:07:12.911557

iteration 3
Time now is: 2025-04-10 18:07:15.961934

iteration 4
Time now is: 2025-04-10 18:07:18.375797

iteration 5
Time now is: 2025-04-10 18:07:20.787594

iteration 6
Time now is: 2025-04-10 18:07:22.943457

iteration 7
Time now is: 2025-04-10 18:07:25.115693

iteration 8
Time now is: 2025-04-10 18:07:27.264881

iteration 9
Time now is: 2025-04-10 18:07:29.436662



## $16 \times 16$ TPMs

In [73]:
z = 5
stopping_error = 0.01

for k in range(10):
    print('iteration', k)
    print('Time now is:', datetime.datetime.now())
    fTPM_filepath = './16-by-16/dim16_fTPM_' + str(k) + '.npy'
    fTPM = np.load(fTPM_filepath)
    
    start_time = datetime.datetime.now()
    decom_and_error = float_GER(fTPM, z, stopping_error)
    end_time = datetime.datetime.now()
    execution_duration = end_time - start_time
    
    exe_dur_seconds = execution_duration.total_seconds()
    exe_dur_dict = {"execution time": exe_dur_seconds}
    
    # save the data
    decom_filepath = "./raw data (float_GER)/dim16_decom_err_" + str(k) + ".json"
    time_filepath = "./raw data (float_GER)/dim16_time_" + str(k) + ".json"
    
    with open(decom_filepath, 'w') as out1:
        json.dump(decom_and_error, out1, cls=NumpyEncoder)
        
    with open(time_filepath, 'w') as out2:
        json.dump(exe_dur_dict, out2)
    
    print()

iteration 0
Time now is: 2025-04-12 02:03:42.728519

iteration 1
Time now is: 2025-04-12 02:05:50.898864

iteration 2
Time now is: 2025-04-12 02:07:52.396510

iteration 3
Time now is: 2025-04-12 02:09:56.507876

iteration 4
Time now is: 2025-04-12 02:11:58.056670

iteration 5
Time now is: 2025-04-12 02:14:02.858283

iteration 6
Time now is: 2025-04-12 02:15:58.954414

iteration 7
Time now is: 2025-04-12 02:18:03.503288

iteration 8
Time now is: 2025-04-12 02:19:56.881817

iteration 9
Time now is: 2025-04-12 02:21:51.199964



## $32 \times 32$ TPMs

In [11]:
z = 5
stopping_error = 0.01

for k in range(2, 10):
    print('iteration', k)
    print('Time now is:', datetime.datetime.now())
    fTPM_filepath = './32-by-32/dim32_fTPM_' + str(k) + '.npy'
    fTPM = np.load(fTPM_filepath)
    
    start_time = datetime.datetime.now()
    decom_and_error = float_GER(fTPM, z, stopping_error)
    end_time = datetime.datetime.now()
    execution_duration = end_time - start_time
    
    exe_dur_seconds = execution_duration.total_seconds()
    exe_dur_dict = {"execution time": exe_dur_seconds}
    
    # save the data
    decom_filepath = "./raw data (float_GER)/dim32_decom_err_" + str(k) + ".json"
    time_filepath = "./raw data (float_GER)/dim32_time_" + str(k) + ".json"
    
    with open(decom_filepath, 'w') as out1:
        json.dump(decom_and_error, out1, cls=NumpyEncoder)
        
    with open(time_filepath, 'w') as out2:
        json.dump(exe_dur_dict, out2)
    
    print()

iteration 2
Time now is: 2025-04-12 13:53:34.635337

iteration 3
Time now is: 2025-04-12 16:13:27.684780

iteration 4
Time now is: 2025-04-12 18:37:22.761187

iteration 5
Time now is: 2025-04-12 20:26:30.513999

iteration 6
Time now is: 2025-04-12 22:23:50.525757

iteration 7
Time now is: 2025-04-13 00:43:58.168202

iteration 8
Time now is: 2025-04-13 03:09:24.204496

iteration 9
Time now is: 2025-04-13 05:23:52.120285



## $64 \times 64$ TPMs

In [None]:
z = 5
stopping_error = 0.01

for k in range(10):
    print('iteration', k)
    print('Time now is:', datetime.datetime.now())
    fTPM_filepath = './64-by-64/dim64_fTPM_' + str(k) + '.npy'
    fTPM = np.load(fTPM_filepath)
    
    start_time = datetime.datetime.now()
    decom_and_error = float_GER(fTPM, z, stopping_error)
    end_time = datetime.datetime.now()
    execution_duration = end_time - start_time
    
    exe_dur_seconds = execution_duration.total_seconds()
    exe_dur_dict = {"execution time": exe_dur_seconds}
    
    # save the data
    decom_filepath = "./raw data (float_GER)/dim64_decom_err_" + str(k) + ".json"
    time_filepath = "./raw data (float_GER)/dim64_time_" + str(k) + ".json"
    
    with open(decom_filepath, 'w') as out1:
        json.dump(decom_and_error, out1, cls=NumpyEncoder)
        
    with open(time_filepath, 'w') as out2:
        json.dump(exe_dur_dict, out2)
    
    print()