In [96]:
import numpy as np
import math
import copy
import datetime
import pandas as pd
import json

### Description of add_up_a_decomposition
The function takes as inputs:
1. a decomposition (x_list, BN_matrices_array) output by SER 1, SER 2, GER (x_list is a list, whereas BN_matrices_array is a 2D np array).
2. The dimension of the output TPM row_dim, col_dim

The function then sums up (x_list, BN_matrices_array) and outputs the resulting TPM (row_dim-by-col_dim).

In [2]:
def add_up_a_decomposition(x_list, BN_matrices_array, row_dim, col_dim):
    output_TPM = np.zeros((row_dim, col_dim), dtype=int)
    num_BN_matrices = len(x_list)
    
    for k in range(num_BN_matrices):
        for col in range(col_dim):
            output_TPM[BN_matrices_array[k, col], col] += x_list[k]
    
    return output_TPM

# Implementation of the arXiv Version of GER

## Order of execution: get_col_indices, first_occurrence, column_freq, find_v_list, GERESA, f_score, new_GER.

This function requires two inputs:
1. $P$ is a $2^{n} \times 2^{n}$ numpy array.
2. $z$ is a real number greater than 1.

This function outputs:
1. The length of the decomposition $K$.
2. A list of positive real numbers x_list.
3. A 2D numpy array representing a list of BN matrices A_array.

In [9]:
def new_GER(P, z):
    row_num, col_num = P.shape
    zero_matrix = np.zeros((row_num, col_num), dtype=int)
    
    R = copy.deepcopy(P)
    K = 0
    x_list = []
    BN_list = []
    
    while (not np.array_equal(R, zero_matrix)):
        K += 1
        B = np.min(np.max(R, axis=0))
        v_list = find_v_list(R, B)
        x = 0
        score = -math.inf
        
        for v in v_list:
            temp_A = GERESA(R, v) # temp_A should be a list of length col_num representing a BN matrix.
            
            R_subtract_v_temp_A = copy.deepcopy(R)
            for j in range(col_num):
                R_subtract_v_temp_A[temp_A[j], j] -= v
            
            temp_score = f_score(R_subtract_v_temp_A, z)
            
            if ((temp_score > score) or ((temp_score == score) and (v > x))):
                score = temp_score
                x = v
                A = temp_A
        
        for j in range(col_num):
            R[A[j], j] -= x
        
        x_list.append(x)
        BN_list.append(A)
        
    return K, x_list, np.array(BN_list)

This function requires two inputs:
1. an integer $v$.
2. A 2D numpy array an_array consisting of integers.

The function outputs the column frequency of $v$ in an_array.

In [5]:
def column_freq(v, an_array):
    row_num, col_num = an_array.shape
    col_freq = 0
    
    for j in range(col_num):
        if v in an_array[:, j]:
            col_freq += 1
    
    return col_freq

This function outputs the list of positive entries of $R$ which are not greater than $B$ (upper_bound) and attain the maximum column frequency in $R$.

In [6]:
def find_v_list(R, upper_bound):
    row_num, col_num = R.shape
    classify_entries = [set() for i in range(col_num + 1)]
    
    for i in range(row_num):
        for j in range(col_num):
            if (R[i, j] > 0 and R[i, j] <= upper_bound):
                classify_entries[column_freq(R[i, j], R)].add(R[i, j])
    
    empty_set = set()
    
    for freq in range(col_num, 0, -1):
        if classify_entries[freq] != empty_set:
            return list(classify_entries[freq])

In [7]:
def GERESA(R, v):
    row_num, col_num = R.shape
    A = [0] * col_num
    R_copy = copy.deepcopy(R)
    selected_columns = []
    col_indices_v_R = get_col_indices(v, R)
    col_indices_v_R_complement = set(range(col_num)) - col_indices_v_R
    
    for j in col_indices_v_R:
        A[j] = first_occurrence(v, R[:, j])
        R_copy[A[j], j] = 0
        selected_columns.append(j)
    
    for j in col_indices_v_R_complement:
        # Compute a dictionary with keys in Larger(v, R(:, j)) and values being associated column frequencies.
        dict_KLarger_VColFreq = dict()
        for i in range(row_num):
            if R[i, j] > v:
                dict_KLarger_VColFreq[i] = column_freq(R[i, j] - v, R_copy[:, selected_columns])
        
        # Set A[j] to the right choice from range(row_num).
        A[j] = max(dict_KLarger_VColFreq.keys(), key=dict_KLarger_VColFreq.get)
        
        R_copy[A[j], j] = R[A[j], j] - v
        selected_columns.append(j)
        
    return A

This function requires two inputs:
1. an integer $v$.
2. A 2D numpy array an_array consisting of integers.

The function outputs the set $\text{Col_indices}(v, \text{an_array})$.

In [3]:
def get_col_indices(v, an_array):
    row_num, col_num = an_array.shape
    col_indices = []
    
    for j in range(col_num):
        if v in an_array[:, j]:
            col_indices.append(j)
    
    return set(col_indices)

This function requires two inputs:
1. An integer $v$.
2. A 1D numpy array array_1D such that v is an element of array_1D.

The function outputs the index of the first occurrence of $v$ in array_1D.

In [4]:
def first_occurrence(v, array_1D):
    length = array_1D.size
    
    for i in range(length):
        if v == array_1D[i]:
            return i
    
    return -1

This function requires two inputs:
1. An integer 2D array array_2D.
2. A positive real number $z > 1$.

The function outputs a score.

In [8]:
def f_score(array_2D, z):
    row_num, col_num = array_2D.shape
    score = 0
    checked_positive_entries = set()
    
    for i in range(row_num):
        for j in range(col_num):
            if (array_2D[i, j] > 0) and (not array_2D[i, j] in checked_positive_entries):
                score += z ** column_freq(array_2D[i, j], array_2D)
                checked_positive_entries.add(array_2D[i, j])
    
    return score

# Define the 18 integerized p-TPMs to be tested.

#### Synthetic p-TPMs

In [10]:
P1 = np.array([[1, 5, 6,  0], 
               [4, 0, 2,  0], 
               [5, 2, 0, 10], 
               [0, 3, 2,  0]])

In [11]:
P2 = np.array([[12, 30, 22, 10, 10, 15, 54, 34],
               [10, 24, 19, 54, 30,  0,  0,  0],
               [54, 15,  0, 12, 12,  0,  0, 30],
               [ 0,  0, 24, 15, 24, 19, 10,  0],
               [ 0,  0,  0,  0, 34, 10, 12,  0],
               [19,  0, 15,  0,  0, 12,  0, 22],
               [15, 19,  0, 19,  0, 54,  0, 24],
               [ 0, 22, 30,  0,  0,  0, 34,  0]])

In [12]:
P3 = np.array([[ 0,  0,  0, 49,  0, 43,  0, 49],
               [ 0, 30, 12,  0, 30,  0, 25,  0],
               [25,  0,  0, 15,  0, 22, 12, 15],
               [ 0,  0, 10, 24, 19,  0, 30,  0],
               [30, 43, 15,  0, 24, 15,  0,  0],
               [43,  0, 49,  0,  0,  0, 19, 24],
               [ 0, 22,  0, 22, 12, 30,  0, 12],
               [12, 15, 24,  0, 25,  0, 24, 10]])

In [13]:
P4 = np.array([[ 26,   0,   0,   0,  59,   0,  49,   0,  29,   0,   0,   0,   0,   9,   0,  46],
               [  0,   0,  39,  17,   0,   0,   0,  49,  49,   0,   0,   0,   0,  98,  54,  17],
               [  0,   0,  26,  49,   0,   0,   0,   9,   0,   0,   0,  59,   9,   0,   0,   0],
               [  0,   0,   0,   0,   0,   9,  54,   0,   0,   0,   0,   0,  39,   0,  49,  26],
               [ 49,   0,   9,   0,   0,  29,   0,   0,   0,   0, 108,  63,   0,  17,  59,   0],
               [  0,   0,   0,  26,   0,   0,   0,  17,  39,  29,   0,   0,   0,   0,   0,  88],
               [ 17,  63,  88,   0,   0,   0,   0,  98,   0,   9,  37,   0,  88,   0,   0,  49],
               [  0,   0,   0,   0,  49,   0,   0,   0,  17,  37,   9,  29,  63,  49,  39,   0],
               [ 98,   0,   0,   0,  46, 108,  26,   0,   0,   0,   0,   9,   0,  63,   9,   0],
               [  0,  29,   0,   0,  17,   0,  29,   0,  46,   0,  26,   0,   0,   0,  26,   0],
               [ 29, 108,   0,  88,   0,  39,   0,   0,   0,   0,  39,   0,   0,  29,   0,   0],
               [  0,  39,   0,  39,   0,   0,   0,   0,  26,  26,  29,  39,   0,   0,   0,   0],
               [  0,   0,   0,   0,  29,   0,  98,  29,   0,  39,   0,   0,   0,   0,   0,   0],
               [ 46,   0,  49,  37,  39,  26,   9,   0,   0, 108,  17,   0,  49,   0,  29,   0],
               [  0,   9,  54,   9,   0,  54,   0,  37,   0,  17,   0,  49,  17,   0,   0,  39],
               [  0,  17,   0,   0,  26,   0,   0,  26,  59,   0,   0,  17,   0,   0,   0,   0]])

In [14]:
P5 = np.array([[0, 6, 8, 1, 7, 5, 8, 5, 6, 5, 3, 4, 10, 2, 2, 0], 
               [8, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,  0, 0, 0, 3], 
               [2, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,  0, 0, 0, 0], 
               [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0],
               [0, 1, 1, 8, 2, 4, 2, 5, 0, 3, 2, 3,  0, 3, 4, 1],
               [0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0],
               [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 3,  0, 0, 2, 1],
               [0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0,  0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,  0, 4, 2, 5],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 1, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0]])

#### p-TPMs from the MOMP paper

In [15]:
PA1 = np.array([[1, 3, 2, 1], 
                [2, 3, 2, 0], 
                [0, 0, 6, 4], 
                [7, 4, 0, 5]])

In [16]:
PA2 = np.array([[1, 3, 2, 1, 0, 0, 0, 0], 
                [2, 3, 2, 0, 0, 0, 0, 0], 
                [0, 0, 6, 4, 0, 0, 0, 0], 
                [7, 4, 0, 5, 0, 0, 0, 0],
                [0, 0, 0, 0, 1, 3, 2, 1],
                [0, 0, 0, 0, 2, 3, 2, 0],
                [0, 0, 0, 0, 0, 0, 6, 4],
                [0, 0, 0, 0, 7, 4, 0, 5]])

In [17]:
PA3 = np.array([[57,  0, 10,  0,  0,  4,  0,  0], 
                [14, 31,  0, 50, 12, 13, 33,  6], 
                [ 0,  8, 40, 25, 25,  0, 67,  0], 
                [ 0, 15,  0,  0,  0,  8,  0,  0],
                [ 0, 15, 30,  0,  0, 13,  0,  0],
                [29, 31, 20,  0, 25, 29,  0, 39],
                [ 0,  0,  0,  0, 38,  0,  0,  0],
                [ 0,  0,  0, 25,  0, 33,  0, 55]])

#### p-TPMs from the PBGDM paper

In [18]:
PB1 = np.array([[1, 3, 5, 6], 
                [0, 7, 0, 0], 
                [0, 0, 5, 0], 
                [9, 0, 0, 4]])

In [19]:
PB3 = np.array([[10,  0,  0, 2,  0, 0, 0,  0], 
                [ 0,  0,  0, 2,  0, 0, 0,  0], 
                [ 0,  0,  0, 0, 10, 0, 0,  0], 
                [ 0,  0,  0, 0,  0, 0, 0,  0],
                [ 0,  0,  0, 3,  0, 0, 5,  0],
                [ 0,  0,  0, 3,  0, 0, 5,  0],
                [ 0, 10, 10, 0,  0, 5, 0,  0],
                [ 0,  0,  0, 0,  0, 5, 0, 10]])

In [20]:
def perturb_PB3(d):
    output_array = np.array([[100 - d,       0,       0, 20,       0,  0,  0,       0], 
                             [      0,       0,       0, 20,       0,  0,  0,       0], 
                             [      0,       0,       0,  0, 100 - d,  0,  0,       0], 
                             [      d,       d,       d,  0,       d,  0,  0,       d],
                             [      0,       0,       0, 30,       0,  0, 50,       0],
                             [      0,       0,       0, 30,       0,  0, 50,       0],
                             [      0, 100 - d, 100 - d,  0,       0, 50,  0,       0],
                             [      0,       0,       0,  0,       0, 50,  0, 100 - d]])
    return output_array

In [21]:
PB4_d1 = perturb_PB3(1)
PB4_d2 = perturb_PB3(2)
PB4_d3 = perturb_PB3(3)
PB4_d4 = perturb_PB3(4)

In [22]:
PB6_d1 = np.zeros((16, 16), dtype=int)
PB6_d1[0:8, 0:8] = PB4_d1
PB6_d1[8:16, 8:16] = PB4_d1

PB6_d2 = np.zeros((16, 16), dtype=int)
PB6_d2[0:8, 0:8] = PB4_d2
PB6_d2[8:16, 8:16] = PB4_d2

PB6_d3 = np.zeros((16, 16), dtype=int)
PB6_d3[0:8, 0:8] = PB4_d3
PB6_d3[8:16, 8:16] = PB4_d3

PB6_d4 = np.zeros((16, 16), dtype=int)
PB6_d4[0:8, 0:8] = PB4_d4
PB6_d4[8:16, 8:16] = PB4_d4

# Execute new_GER on the 18 p-TPMs

### $P_{1}$

In [24]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(P1, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [25]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

4
4
(4, 4)
2


In [26]:
np.array_equal(P1, add_up_a_decomposition(opt_x_list, opt_A_array, 4, 4))

True

In [27]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.045192
Mean execution time: 0 days 00:00:00.003614419
Min execution time: 0 days 00:00:00


### $P_{2}$

In [28]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(P2, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [29]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

6
6
(6, 8)
2


In [30]:
np.array_equal(P2, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [31]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.022058
Mean execution time: 0 days 00:00:00.012390483
Min execution time: 0 days 00:00:00.004039


### $P_{3}$

In [32]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(P3, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [33]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

6
6
(6, 8)
2


In [34]:
np.array_equal(P3, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [35]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.048735
Mean execution time: 0 days 00:00:00.012898709
Min execution time: 0 days 00:00:00.005210


### $P_{4}$

In [36]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(P4, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [37]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

8
8
(8, 16)
2


In [38]:
np.array_equal(P4, add_up_a_decomposition(opt_x_list, opt_A_array, 16, 16))

True

In [39]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.177088
Mean execution time: 0 days 00:00:00.083961290
Min execution time: 0 days 00:00:00.064358


### $P_{5}$

In [40]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(P5, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [41]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

7
7
(7, 16)
2


In [42]:
np.array_equal(P5, add_up_a_decomposition(opt_x_list, opt_A_array, 16, 16))

True

In [43]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.019091
Mean execution time: 0 days 00:00:00.015438967
Min execution time: 0 days 00:00:00.010926


### $P^{A}_{1}$

In [44]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PA1, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [45]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 4)
2


In [46]:
np.array_equal(PA1, add_up_a_decomposition(opt_x_list, opt_A_array, 4, 4))

True

In [47]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.002553
Mean execution time: 0 days 00:00:00.001460032
Min execution time: 0 days 00:00:00.000504


### $P^{A}_{2}$

In [48]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PA2, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [49]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 8)
2


In [50]:
np.array_equal(PA2, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [51]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.006367
Mean execution time: 0 days 00:00:00.004941645
Min execution time: 0 days 00:00:00.003000


### $P^{A}_{3}$

In [52]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PA3, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [53]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

11
11
(11, 8)
2


In [54]:
np.array_equal(PA3, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [55]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.036274
Mean execution time: 0 days 00:00:00.025057193
Min execution time: 0 days 00:00:00.015286


### $P^{B}_{1}$

In [56]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB1, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [57]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

4
4
(4, 4)
2


In [58]:
np.array_equal(PB1, add_up_a_decomposition(opt_x_list, opt_A_array, 4, 4))

True

In [59]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.002332
Mean execution time: 0 days 00:00:00.001195032
Min execution time: 0 days 00:00:00.001002


### $P^{B}_{3}$

In [60]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB3, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [61]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

4
4
(4, 8)
2


In [62]:
np.array_equal(PB3, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [63]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.004341
Mean execution time: 0 days 00:00:00.003173516
Min execution time: 0 days 00:00:00.002000


### $P^{B}_{4}(0.01)$

In [64]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB4_d1, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [65]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 8)
2


In [66]:
np.array_equal(PB4_d1, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [67]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.009049
Mean execution time: 0 days 00:00:00.006136387
Min execution time: 0 days 00:00:00


### $P^{B}_{4}(0.02)$

In [68]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB4_d2, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [69]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 8)
2


In [70]:
np.array_equal(PB4_d2, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [71]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.013448
Mean execution time: 0 days 00:00:00.007244129
Min execution time: 0 days 00:00:00


### $P^{B}_{4}(0.03)$

In [72]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB4_d3, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [73]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 8)
2


In [74]:
np.array_equal(PB4_d3, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [75]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.012235
Mean execution time: 0 days 00:00:00.005973419
Min execution time: 0 days 00:00:00


### $P^{B}_{4}(0.04)$

In [76]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB4_d4, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [77]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 8)
2


In [78]:
np.array_equal(PB4_d4, add_up_a_decomposition(opt_x_list, opt_A_array, 8, 8))

True

In [79]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.012137
Mean execution time: 0 days 00:00:00.006123838
Min execution time: 0 days 00:00:00


### $P^{B}_{6}(0.01)$

In [80]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB6_d1, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [81]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 16)
2


In [82]:
np.array_equal(PB6_d1, add_up_a_decomposition(opt_x_list, opt_A_array, 16, 16))

True

In [83]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.024935
Mean execution time: 0 days 00:00:00.017832483
Min execution time: 0 days 00:00:00.006916


### $P^{B}_{6}(0.02)$

In [84]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB6_d2, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [85]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 16)
2


In [86]:
np.array_equal(PB6_d2, add_up_a_decomposition(opt_x_list, opt_A_array, 16, 16))

True

In [87]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.025509
Mean execution time: 0 days 00:00:00.018626612
Min execution time: 0 days 00:00:00.012632


### $P^{B}_{6}(0.03)$

In [88]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB6_d3, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [89]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 16)
2


In [90]:
np.array_equal(PB6_d3, add_up_a_decomposition(opt_x_list, opt_A_array, 16, 16))

True

In [91]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.030765
Mean execution time: 0 days 00:00:00.018398483
Min execution time: 0 days 00:00:00.008318


### $P^{B}_{6}(0.04)$

In [92]:
opt_K = math.inf
opt_x_list = []
opt_A_array = []
opt_z = 0
exe_time_list = []

for z in range(2, 33):
    time_before_execution = datetime.datetime.now()
    K, x_list, A_array = new_GER(PB6_d4, z)
    time_after_execution = datetime.datetime.now()
    
    execution_duration = time_after_execution - time_before_execution
    exe_time_list.append(pd.Timedelta(execution_duration))
    
    if opt_K > K:
        opt_K = K
        opt_x_list = x_list
        opt_A_array = A_array
        opt_z = z

In [93]:
print(opt_K)
print(len(opt_x_list))
print(opt_A_array.shape)
print(opt_z)

5
5
(5, 16)
2


In [94]:
np.array_equal(PB6_d4, add_up_a_decomposition(opt_x_list, opt_A_array, 16, 16))

True

In [95]:
exe_time_pdSeries = pd.Series(exe_time_list)
print('Max execution time:', exe_time_pdSeries.max())
print('Mean execution time:', exe_time_pdSeries.mean())
print('Min execution time:', exe_time_pdSeries.min())

Max execution time: 0 days 00:00:00.024543
Mean execution time: 0 days 00:00:00.018321451
Min execution time: 0 days 00:00:00.010071


# Random Matrix Experiment

In [97]:
z_low = 2
z_high = 32
row_dim = 32
col_dim = 32

print("Start of execution:   ", datetime.datetime.now())

for z in range(z_low, z_high + 1):
    print("\nstart of z =", z)
    decompositions_lengths_list = []
    execution_times_list = []
    coefficients_lengths_list = []
    A_array_shapes_list = []
    output_eq_input_list = []
    
    for k in range(4001, 6001):
        TPM_file_path = './random_pTPMs/dim32_pTPM_' + str(k) + '.npy'
        current_TPM = np.load(TPM_file_path)
    
        time_before_execution = datetime.datetime.now()
        length_k, coefficient_list_xi, BN_matrices_list_Ai = new_GER(current_TPM, z)
        time_after_execution = datetime.datetime.now()
        execution_duration = time_after_execution - time_before_execution
    
        decompositions_lengths_list.append(length_k)
        execution_times_list.append(pd.Timedelta(execution_duration))
        coefficients_lengths_list.append(len(coefficient_list_xi))
        A_array_shapes_list.append(BN_matrices_list_Ai.shape)
    
        decomposition_sum = add_up_a_decomposition(coefficient_list_xi, BN_matrices_list_Ai, row_dim, col_dim)
        is_output_eq_input = np.array_equal(current_TPM, decomposition_sum)
        output_eq_input_list.append(is_output_eq_input)
    
    print("decom lengths == coef lengths:", decompositions_lengths_list == coefficients_lengths_list)
    
    A_array_lengths_list = [x[0] for x in A_array_shapes_list]
    print("decom lengths == A_array lengths:", decompositions_lengths_list == A_array_lengths_list)
    
    print("sum of output decoms equal input pTPMs:", all(output_eq_input_list))
    
    lengths_path = './expt_results/GER_decom_lengths_z' + str(z) + '.json'
    times_path = './expt_results/GER_exe_time_z' + str(z) + '.pkl'
    coefficients_path = './expt_results/GER_coef_lengths_z' + str(z) + '.json'
    A_array_shapes_path = './expt_results/GER_A_array_shapes_z' + str(z) + '.json'
    out_eq_in_path = './expt_results/GER_out_eq_in_z' + str(z) + '.json'

    with open(lengths_path, "w") as out_1:
        json.dump(decompositions_lengths_list, out_1)

    execution_times_pdSeries = pd.Series(execution_times_list)
    execution_times_pdSeries.to_pickle(times_path)

    with open(coefficients_path, "w") as out_2:
        json.dump(coefficients_lengths_list, out_2)

    with open(A_array_shapes_path, "w") as out_3:
        json.dump(A_array_shapes_list, out_3)
    
    with open(out_eq_in_path, "w") as out_4:
        json.dump(output_eq_input_list, out_4)
    
    print('Time:', datetime.datetime.now(), '    ', 'Completed z =', z)

Start of execution:    2024-12-24 23:35:18.661812

start of z = 2
decom lengths == coef lengths: True
decom lengths == A_array lengths: True
sum of output decoms equal input pTPMs: True
Time: 2024-12-25 04:31:11.360123      Completed z = 2

start of z = 3
decom lengths == coef lengths: True
decom lengths == A_array lengths: True
sum of output decoms equal input pTPMs: True
Time: 2024-12-25 08:42:20.066029      Completed z = 3

start of z = 4
decom lengths == coef lengths: True
decom lengths == A_array lengths: True
sum of output decoms equal input pTPMs: True
Time: 2024-12-25 13:32:22.008039      Completed z = 4

start of z = 5
decom lengths == coef lengths: True
decom lengths == A_array lengths: True
sum of output decoms equal input pTPMs: True
Time: 2024-12-25 18:29:13.651616      Completed z = 5

start of z = 6
decom lengths == coef lengths: True
decom lengths == A_array lengths: True
sum of output decoms equal input pTPMs: True
Time: 2024-12-25 23:21:54.821856      Completed z = 6


KeyboardInterrupt: 