# Determinism vs. Specificity

We look at various toy systems under forced determinism and specificity merges.

In [None]:
import numpy as np
from scipy.stats import entropy

from ce import *

### Separate `det` and `spec` calculators

In [21]:
def calc_det(tpm, P_c=None):
    n = tpm.shape[0]

    # special case
    if n == 1:
        return 1.0, 1.0, 1.0

    if P_c is None:
        P_c = np.full(n, 1.0/n) # uniform prior if none given

    log2n = np.log2(n)  # strictly positive since n > 1
    H = lambda distribution: entropy(distribution, base=2)

    def _determinism():
        row_entropies = np.apply_along_axis(H, 1, tpm)
        weighted_average_row_entropies = P_c @ row_entropies
        return 1.0 - (weighted_average_row_entropies / log2n)

    def _specificity():
        weighted_column_sums = P_c @ tpm
        weighted_column_entropies = H(weighted_column_sums)
        degeneracy = 1.0 - (weighted_column_entropies / log2n)
        return 1.0 - degeneracy

    # compute metrics
    determinism = _determinism()
    specificity = _specificity()
    
    cp = (determinism + specificity) / 2.0

    return determinism, 0, 0

In [22]:
def calc_spec(tpm, P_c=None):
    n = tpm.shape[0]

    # special case
    if n == 1:
        return 1.0, 1.0, 1.0

    if P_c is None:
        P_c = np.full(n, 1.0/n) # uniform prior if none given

    log2n = np.log2(n)  # strictly positive since n > 1
    H = lambda distribution: entropy(distribution, base=2)

    def _determinism():
        row_entropies = np.apply_along_axis(H, 1, tpm)
        weighted_average_row_entropies = P_c @ row_entropies
        return 1.0 - (weighted_average_row_entropies / log2n)

    def _specificity():
        weighted_column_sums = P_c @ tpm
        weighted_column_entropies = H(weighted_column_sums)
        degeneracy = 1.0 - (weighted_column_entropies / log2n)
        return 1.0 - degeneracy

    # compute metrics
    determinism = _determinism()
    specificity = _specificity()
    
    cp = (determinism + specificity) / 2.0

    return specificity, 0, 0

### Hardware...

In [23]:
def print_matrices_side_by_side(mats, titles=None, labels_list=None, sep='   '):

    # stringify each matrix and record widths
    str_mats = []
    widths = []
    for mat in mats:
        lines = [' '.join(f'{float(x):.2f}' for x in row) for row in mat]
        str_mats.append(lines)
        widths.append(max((len(l) for l in lines), default=0))

    # prefix sorted labels if provided
    if labels_list:
        for idx in range(len(str_mats)):
            lines = str_mats[idx]
            lbls = labels_list[idx] if idx < len(labels_list) else None
            if not lbls or not isinstance(lbls, (list, tuple)):
                continue
            sorted_lbls = sorted(lbls, key=lambda t: t[0])
            labeled = [f'{lbl} {line}' for lbl, line in zip(sorted_lbls, lines)]
            str_mats[idx] = labeled
            widths[idx] = max(len(l) for l in labeled)

    # print titles centered
    if titles:
        header = [title.center(widths[i]) for i, title in enumerate(titles)]
        print(sep.join(header))

    # determine max rows and print each
    max_rows = max(len(lines) for lines in str_mats)
    for i in range(max_rows):
        row_parts = [
            str_mats[j][i].rjust(widths[j]) if i < len(str_mats[j]) else ' ' * widths[j]
            for j in range(len(str_mats))
        ]
        print(sep.join(row_parts))

In [24]:
def analyze_system(microstate):
    ce = createPath(microstate)
    det = createPath(microstate, scorer=calc_det)
    spec = createPath(microstate, scorer=calc_spec)
    
    for i in range(len(ce)):
        _, d, s, = calculate_cp_score(ce[i][0])


        print_matrices_side_by_side(
            [ce[i][0], det[i][0], spec[i][0]],
            titles=[f"CE={ce[i][1]:.2f}", f"DET={det[i][1]:.2f}, S={s:.2f}", f"D={d:.2f}, SPEC={spec[i][1]:.2f}"],
            labels_list=[ce[i][3], det[i][3], spec[i][3]]
        )

        if np.array_equal(ce[i][0], det[i][0]) and np.array_equal(ce[i][0], spec[i][0]):
            status = "Merged for BOTH"
        elif np.array_equal(ce[i][0], spec[i][0]):
            status = "Merged for SPEC"
        elif np.array_equal(ce[i][0], det[i][0]):
            status = "Merged for DET"
        else:
            status = "Neither"
        print(status, '\n')

    return ce, det, spec

## Examples

In [25]:
from ce_systems import *

### Determinism

Larger jumps in determinism occur when merging nodes lowers average row entropy.

Note– as we merge, we're averaging over fewer items. If there are two rows with the same high entropy, merging them might simply maintain the absolute entropy values but averaging over fewer items results in a `det` score uptick.

In [None]:
_ = analyze_system(merge_for_det)

        CE=0.62                DET=0.25, S=1.00          D=0.25, SPEC=1.00    
(0,) 0.50 0.50 0.00 0.00   (0,) 0.50 0.50 0.00 0.00   (0,) 0.50 0.50 0.00 0.00
(1,) 0.00 0.00 0.50 0.50   (1,) 0.00 0.00 0.50 0.50   (1,) 0.00 0.00 0.50 0.50
(2,) 0.25 0.25 0.25 0.25   (2,) 0.25 0.25 0.25 0.25   (2,) 0.25 0.25 0.25 0.25
(3,) 0.25 0.25 0.25 0.25   (3,) 0.25 0.25 0.25 0.25   (3,) 0.25 0.25 0.25 0.25
Merged for BOTH 

       CE=0.71             DET=0.47, S=0.95       D=0.47, SPEC=0.95  
  (0,) 0.50 0.50 0.00     (0,) 0.50 0.50 0.00   (0, 1) 0.50 0.25 0.25
  (1,) 0.00 0.00 1.00     (1,) 0.00 0.00 1.00     (2,) 0.50 0.25 0.25
(2, 3) 0.25 0.25 0.50   (2, 3) 0.25 0.25 0.50     (3,) 0.50 0.25 0.25
Merged for DET 

      CE=0.61           DET=0.52, S=0.70    D=0.52, SPEC=1.00
(0, 2, 3) 0.62 0.38   (0, 2, 3) 0.62 0.38   (0, 1) 0.50 0.50
     (1,) 1.00 0.00        (1,) 1.00 0.00   (2, 3) 0.50 0.50
Merged for DET 

     CE=1.00         DET=1.00, S=1.00   D=1.00, SPEC=1.00
(0, 1, 2, 3) 1.00   (0, 1, 2, 3

At our first merge, the uptick in DET is due to two thing: [1] merging noes 2 and 3 (to get entropy of 0 rather than 0.5) for this row and [2] eliminating one of the worst-contributing summands: a row of `0.25, 0.25, 0.25, 0.25`.

In [None]:
simpler_det = np.array([
    [0.00, 0.90, 0.10],
    [0.90, 0.00, 0.10],
    [0.333333, 0.333333, 0.333333]
], dtype=float)

_ = analyze_system(simpler_det)

      CE=0.71           DET=0.47, S=0.94     D=0.47, SPEC=0.94 
(0,) 0.00 0.90 0.10   (0,) 0.00 0.90 0.10   (0,) 0.00 0.90 0.10
(1,) 0.90 0.00 0.10   (1,) 0.90 0.00 0.10   (1,) 0.90 0.00 0.10
(2,) 0.33 0.33 0.33   (2,) 0.33 0.33 0.33   (2,) 0.33 0.33 0.33
Merged for BOTH 

    CE=0.71        DET=0.52, S=0.89   D=0.52, SPEC=0.89
(0, 2) 0.38 0.62   (0, 2) 0.38 0.62   (0, 2) 0.38 0.62
  (1,) 1.00 0.00     (1,) 1.00 0.00     (1,) 1.00 0.00
Merged for BOTH 

   CE=1.00       DET=1.00, S=1.00   D=1.00, SPEC=1.00
(0, 1, 2) 1.00   (0, 1, 2) 1.00   (0, 1, 2) 1.00
Merged for BOTH 



Sometimes we merge to redirect/simplify out-edges of another node– in this case, we merge (0,2) so that 1 becomes deterministic.

### Specificity

In [30]:
row_identical_tpm = np.array([
    [0.8, 0.1, 0.1],
    [0.8, 0.1, 0.1],
    [0.1, 0.1, 0.8]
])

_ = analyze_system(row_identical_tpm)

      CE=0.63           DET=0.42, S=0.84     D=0.42, SPEC=0.84 
(0,) 0.80 0.10 0.10   (0,) 0.80 0.10 0.10   (0,) 0.80 0.10 0.10
(1,) 0.80 0.10 0.10   (1,) 0.80 0.10 0.10   (1,) 0.80 0.10 0.10
(2,) 0.10 0.10 0.80   (2,) 0.10 0.10 0.80   (2,) 0.10 0.10 0.80
Merged for BOTH 

    CE=0.70        DET=0.53, S=0.99   D=0.40, SPEC=0.99
(0, 1) 0.90 0.10   (0, 2) 0.90 0.10   (0, 1) 0.90 0.10
  (2,) 0.20 0.80     (1,) 0.90 0.10     (2,) 0.20 0.80
Merged for SPEC 

   CE=1.00       DET=1.00, S=1.00   D=1.00, SPEC=1.00
(0, 1, 2) 1.00   (0, 1, 2) 1.00   (0, 1, 2) 1.00
Merged for BOTH 



Do we always eliminate extreme column? Is there a rule??

We see a large jump in spec by eliminating the (relatively) low chance of transitioning to node 1, distributing the likelihood of transitions more evenly.

In [28]:
_ = analyze_system(merge_for_spec)

        CE=0.88                DET=1.00, S=0.75          D=1.00, SPEC=0.75    
(0,) 0.00 1.00 0.00 0.00   (0,) 0.00 1.00 0.00 0.00   (0,) 0.00 1.00 0.00 0.00
(1,) 0.00 0.00 1.00 0.00   (1,) 0.00 0.00 1.00 0.00   (1,) 0.00 0.00 1.00 0.00
(2,) 0.00 0.00 0.00 1.00   (2,) 0.00 0.00 0.00 1.00   (2,) 0.00 0.00 0.00 1.00
(3,) 0.00 0.00 0.00 1.00   (3,) 0.00 0.00 0.00 1.00   (3,) 0.00 0.00 0.00 1.00
Merged for BOTH 

       CE=0.86             DET=1.00, S=0.92       D=0.79, SPEC=0.92  
(0, 2) 0.00 0.50 0.50     (0,) 0.00 1.00 0.00   (0, 2) 0.00 0.50 0.50
  (1,) 1.00 0.00 0.00     (1,) 0.00 0.00 1.00     (1,) 1.00 0.00 0.00
  (3,) 0.00 0.00 1.00   (2, 3) 0.00 0.00 1.00     (3,) 0.00 0.00 1.00
Merged for SPEC 

      CE=0.77           DET=1.00, S=0.95     D=0.59, SPEC=0.95 
(0, 1, 2) 0.75 0.25        (0,) 0.00 1.00   (0, 1, 2) 0.75 0.25
     (3,) 0.00 1.00   (1, 2, 3) 0.00 1.00        (3,) 0.00 1.00
Merged for SPEC 

     CE=1.00         DET=1.00, S=1.00   D=1.00, SPEC=1.00
(0, 1, 2, 3) 1.00   (

### More...

Want to show $\delta$ `det`, $\delta$ `spec`.

In [29]:
_ = analyze_system(rand_20x20)

                                                 CE=0.53                                                                                                 DET=0.06, S=1.00                                                                                           D=0.06, SPEC=1.00                                            
 (0,) 0.04 0.06 0.09 0.02 0.02 0.07 0.01 0.07 0.03 0.02 0.03 0.07 0.07 0.09 0.02 0.07 0.06 0.03 0.07 0.06    (0,) 0.04 0.06 0.09 0.02 0.02 0.07 0.01 0.07 0.03 0.02 0.03 0.07 0.07 0.09 0.02 0.07 0.06 0.03 0.07 0.06    (0,) 0.04 0.06 0.09 0.02 0.02 0.07 0.01 0.07 0.03 0.02 0.03 0.07 0.07 0.09 0.02 0.07 0.06 0.03 0.07 0.06
 (1,) 0.00 0.01 0.09 0.06 0.08 0.01 0.08 0.00 0.08 0.09 0.09 0.03 0.03 0.04 0.07 0.05 0.05 0.04 0.07 0.04    (1,) 0.00 0.01 0.09 0.06 0.08 0.01 0.08 0.00 0.08 0.09 0.09 0.03 0.03 0.04 0.07 0.05 0.05 0.04 0.07 0.04    (1,) 0.00 0.01 0.09 0.06 0.08 0.01 0.08 0.00 0.08 0.09 0.09 0.03 0.03 0.04 0.07 0.05 0.05 0.04 0.07 0.04
 (2,) 0.01 0.09 0.11 0.03 0.03 0.1