# How many bits do we get from modulus knowledge for the top of the lattice?

In [1]:
import sys, io
import math
import numpy as np
import pandas as pd
import random
import re
from scipy.optimize import nnls
from fractions import Fraction
from sympy import factorint
from itertools import product
from sympy.ntheory import factorint
from typing import List, Optional, Tuple

In [2]:
def T(n):
    """ Compute next value in simplified Collatz sequence.
    """
    if n & 1 == 0:
        return n//2
    else:
        return (3*n + 1)//2
#
def L_T(n):
    """ Compute binary label-string for a given Collatz number
    """
    if n == 1:
        return "1"

    S = ""
    while n != 1:
        if n & 1 == 0:
            n = n//2
            S = S + "1"
        else:
            n = (3*n + 1)//2
            S = S + "0"
    return S
#
def Ay_L(L):
    """ Generate A matrix and y vector from label string
    """
    rank = len(L) + 2    
    A = np.zeros((rank,rank))
    y = np.zeros((rank))
    for row in range(rank-3):
        if L[row] == "1":
            a_val = -1.0
            y_val = 0.0
        else:
            a_val = -3.0
            y_val = 1.0
        A[row][row] = a_val
        A[row][row+1] = 2.0
        y[row] = y_val
    #
    # Last 3 rows are always the same
    row = rank - 3
    A[row][row] = -1
    A[row][row+1] = 2
    y[row] = 0
    row = rank - 2
    A[row][row] = -3
    A[row][row+1] = 2
    y[row] = 1
    row = rank - 1
    A[row][row] = -1
    A[row][row-2] = 1
    y[row] = 0
    
    return A, y
#
def solve_Ay_L(L):
    """ Solve for the x vector given the label-string
    """
    A, y = Ay_L(L)
    return A, np.linalg.solve(A, y), y
#
def x0_L(L):
    """ Get the x[0] value given a label-string
    """
    A, x, y = solve_Ay_L(L)
    return round(x[0])  # clean up mantisa garbage
#

def countZeros(label):
    zero_count = 0
    for bit in label:
        if bit == "0":
            zero_count += 1
    return zero_count
#

def Z(L):
    """ Indexes of zeros in label string
    """
    for i in range(len(L)):
        if L[i] == "0":
            yield i
#
def a_b_c_L(L):
    """ Get the (power-of-two, power-of-three, zero-sum-accumulator) tuple for a node given its label
    """
    a = len(L)
    b = 0
    for bit in L:
        if bit == "0":
            b += 1
    ZZ = [(j,i) for j, i in enumerate(Z(L))]
    c = sum((3 ** (b - j - 1)) * (2 ** (i)) for j, i in ZZ)
    S = [zz[1] for zz in ZZ]
    return (a,b,c,S)
#
def val_a_b_c(a_b_c):
    """ Get the value for a node given the tuple (power-of-two, power-of-three, zero-sum-accumulator)
    """
    a, b, c = a_b_c
    f = Fraction( ((2**a) - c), (3**b) )
    return (f.numerator, f.denominator)
#
def val_a_b_c_L(a_b_c_L):
    a_b_c = a_b_c_L[0:3]
    return val_a_b_c(a_b_c)
#
def val_L(L):
    """ Get the value for a node given the label string
    """
    return val_a_b_c_L(a_b_c_L(L))
#
def collatzPath(collatzNumber):
    path = []
    while collatzNumber != 1:
        if (collatzNumber & 1) == 0:
            collatzNumber = collatzNumber // 2
            path.append("1")
        else:
            collatzNumber = (3 * collatzNumber + 1) // 2
            path.append("0")
    return "".join(path)
#

def collatzPath2(n_d_tup, truncate_at=100):
    """
    Collatz Path for rationals with loop detection and "too long" cut-off
    """
    chain = [n_d_tup]
    path = []
    governor = truncate_at
    while n_d_tup != (1, 1):
        if (n_d_tup[0] & 1) == 0:
            n_d_tup = (n_d_tup[0]//2, n_d_tup[1])
            path.append("1")
        else:
            f = Fraction((3 * n_d_tup[0] + n_d_tup[1])//2, n_d_tup[1])
            n_d_tup = (f.numerator, f.denominator)
            path.append("0")
        if n_d_tup in chain:
            path.insert(0,"↺")
            break
        
        governor -= 1
        if governor == 0:
            path.insert(0,"∀")
            break
        #
        chain.append(n_d_tup)
    #
    return ("".join(path), chain)
#

    
#
#  Mixed Radix form and functions
#

N_ = ((0,0), [])

def mr_TupItemValue(a_b, a_0):
    a,b = a_b
    val = (2**a)*(3**(a_0 + b))
    # print(f"val {a_b}*(3**{a_0}) = {val}")
    return val
#
def mrTupValue(mr_tup):
    # Multiplying the numerator by 3 ** the generation keeps us in integer land
    a_0 = mr_tup[0][0]
    total = mr_TupItemValue(mr_tup[0], a_0)
    for a_b in mr_tup[1]:
        total -= mr_TupItemValue(a_b, a_0)
    frac = Fraction(total, 3**a_0)
    return (frac.numerator, frac.denominator)
#

def F_0(mr_tup):
    return ( (mr_tup[0][0]+1, mr_tup[0][1]-1), mr_tup[1] + [(mr_tup[0][0], -(len(mr_tup[1])+1))] )
#

def F_1(mr_tup):
    return ((mr_tup[0][0]+1, mr_tup[0][1]), mr_tup[1])
#
def mrTupFromPath(label):
    mr_tup = N_
    for bit in label:
        if bit == "1":
            mr_tup = F_1(mr_tup)
        else:
            mr_tup = F_0(mr_tup)
    return mr_tup
#
def mrTupToPath(T):
    """
    Convert mrTup to node label
    The T[1] list encodes the positions of the zeros in the numerator power of two values
    """
    S = ["1"] * T[0][0]
    for j in range(len(T[1])):
        S[(T[1][j][0])] = "0" 
    return "".join(S)
#

def mrTupFromValue(n):
    label = collatzPath(n)
    return mrTupFromPath(label)
#

def strip_01(label):
    while len(label) > 2 and label[-2:] == "01":
        label = label[0:-2]
    return label
#

LABEL_RX = re.compile('^(?P<prefix>.*?)((?P<inttag>111)(?P<tail>((01)*)))$')

def split_int_label(s):
    """
    Splits label into 3 parts and returns prefix and suffix if matches integer-candidate pattern
    """
    match = LABEL_RX.search(s)
    if match:
        return (match.group('prefix'), match.group('tail'))
    else:
        return None  # or raise an error if preferred
#

def generationLabels(a):
    if a == 0:
        return ""
    seqs = product('10', repeat=(a))
    for bit_tup in seqs:
        label = "".join(bit_tup)
        yield label
#
def generationTups(a):
    for label in generationLabels(a):
        mrTup = mrTupFromPath(label)
        yield (label, mrTup, mrTupValue(mrTup))
    
def generationIntCandidateLabels(a):
    seqs = product('10', repeat=(a))
    for bit_tup in seqs:
        label = "".join(bit_tup)
        head_tail = split_int_label(label)
        if head_tail:
            # return int candidate with stripped tail
            yield head_tail[0] + "111"
#

def mrIntTupsForGeneration(aa):
    for label in generationIntCandidateLabels(aa):
        mrTup = mrTupFromPath(label)
        yield (label, mrTup, mrTupValue(mrTup))
#
def generationGenNums(a):
    vals = []
    bb = 3**(a)
    for infoTup in generationTups(a):
        _, __, val_tup = infoTup
        vals.append(val_tup[0] * (bb//val_tup[1]))
    vals.sort(reverse=True)
    for idx, val in enumerate(vals):
        if idx < len(vals) - 1:
            delta = val - vals[idx+1]
        else:
            delta = None
        print(f'{val}\t{delta}')
#        
def generationPairGenNums(a):
    vals = []
    bb = 3**(a+1)
    for infoTup in generationTups(a):
        _, __, val_tup = infoTup
        vals.append(val_tup[0] * (bb//val_tup[1]))
    for infoTup in generationTups(a+1):
        _, __, val_tup = infoTup
        vals.append(val_tup[0] *  (bb//val_tup[1]))
    vals.sort(reverse=True)
    for idx, val in enumerate(vals):
        if idx < len(vals) - 1:
            delta = val - vals[idx+1]
        else:
            delta = None
        print(f'{val}\t{delta}')
#
def mr2Nplus_1(T):
    B = len(T[1])  
    L = [(0, -1)]

    # Keep initial zeros
    idx = 0
    for idx, val in enumerate(T[1]):
        if T[1][idx][0] != idx:
            break
        L.append( (T[1][idx][0] + 1, T[1][idx][1]-1) )
    # Remove the first tuple where (a, -a) is true
    match = False
    for i in range(idx, B, 1):
        if (not match) and (T[1][i][0] == -T[1][i][1]):
            match = True
        else:
            L.append( (T[1][i][0]+1, T[1][i][1]) )
    if not match:
        return None
    return ( (T[0][0] + 1, T[0][1]), L)
#

'''
Do not think this works yet:
def mr2Nplus_1_inv(T):
    """
    Find the inverse tuple of mr2Nplus_1

    We know the zero was removed when L item index matched numerator and denominator
    """
    B = len(T[1])  
    L = []

    # Keep initial zeros after poping (0, -1) off the front
    idx = 0
    for idx, val in enumerate(T[1]):
        if idx == 0:
            if val == (0, -1):
                continue
            else:
                # This tuple was not generated by mr2Nplus_1
                return None
        else:
            L.append( (T[1][idx][0] - 1, T[1][idx][1] + 1) )
    inserted = False
    for i in range(idx, B, 1):
        if (not inserted) and i == (len(L)+1) and (T[0][i][0] > i):
            # Next zero in 2n+1 L list is larger than i ... insert the (i, -i) term
            L.append( (i, -i) )    
            inserted == True
        L.append( (T[1][i][0]-1, T[1][i][1]) )
        
    return ( (T[0][0] - 1, T[0][1]), L)
#
'''

def lattice2N_plus1_pairs(a):
    """
    For a given depth in the tree, generate all 2n+1 pairs in the tree
    """
    seqs = product('10', repeat=(a))
    for bit_tup in seqs:
        label = "".join(bit_tup)
        label = strip_01(label)
        val = mrTupValue(mrTupFromPath(label))
        f = Fraction(2 * val[0] + 1, val[1])
        val_ = (f.numerator, f.denominator)
        label_, chain_ = collatzPath2(val_)
        if (len(label_) == 0):
            d = len(label)
        elif label_[0] in ["↺", "∀"]:
            d = 100 + len(label)
        else:
            d = distance(label, label_)
        yield (len(label), d, (val, label), (val_, label_))
#

def mrTupToLaTex(T):
    a, b = T[0]
    s = "\\frac{2^{%d}}{3^{%d}}"%(a, -b)
    L = T[1]
    if len(L) > 0:
        s = s + " - ( "
        plus = "  "
        for c_d in L:
            c, d = c_d
            t = "\\frac{2^{%d}}{3^{%d}}"%(c, -d)
            s = s + plus + t
            plus = " + "
        s = s + " )"
    return "$ " + s + " $"
#

In [9]:
diffs = {}
diff_examples = []
def collect_diff(delta, example):
    if delta not in diffs:
        diffs[delta] = 0
        diff_examples.append(example)
    diffs[delta] += 1
#
for i in range(1, 1024, 1):
    n = 8*i
    label = collatzPath(n)
    bit_str = bin(n)[2:]
    bits = len(bit_str)
    delta = bits - len(label)
    collect_diff(delta , (delta, f"{n} = {bit_str}, label={label}"))
#
print(sorted(diff_examples))
sorted(diffs.items(), reverse=True)

[(-103, '6968 = 1101100111000, label=11100010000001000000101110100100111011011001010110000011100100010000101100010010000001100001110101011101100011110111'), (-100, '7496 = 1110101001000, label=11101000000101000000100101000110111000010000010001000110101100010000111100000111010111110110010011100110010110111'), (-98, '5624 = 1010111111000, label=111000000101000000100101000110111000010000010001000110101100010000111100000111010111110110010011100110010110111'), (-89, '8136 = 1111111001000, label=111010010010001010100011111010100011000010100100010000101100010010000001100001110101011101100011110111'), (-87, '6104 = 1011111011000, label=1110010010001010100011111010100011000010100100010000101100010010000001100001110101011101100011110111'), (-84, '6872 = 1101011011000, label=1110010001010100011111010100011000010100100010000101100010010000001100001110101011101100011110111'), (-82, '2616 = 101000111000, label=111000101000001001010111101100001010010001000010110001001000000110000111010101110110001111

[(1, 10),
 (-1, 20),
 (-3, 35),
 (-4, 31),
 (-6, 41),
 (-7, 4),
 (-8, 46),
 (-9, 47),
 (-11, 56),
 (-12, 38),
 (-14, 42),
 (-15, 1),
 (-16, 43),
 (-17, 30),
 (-19, 36),
 (-20, 24),
 (-22, 31),
 (-23, 1),
 (-24, 35),
 (-25, 15),
 (-27, 25),
 (-28, 12),
 (-30, 13),
 (-31, 4),
 (-32, 7),
 (-33, 8),
 (-35, 9),
 (-36, 8),
 (-38, 12),
 (-39, 4),
 (-41, 4),
 (-42, 3),
 (-44, 6),
 (-46, 8),
 (-47, 7),
 (-49, 11),
 (-50, 6),
 (-51, 2),
 (-52, 12),
 (-54, 17),
 (-55, 7),
 (-57, 11),
 (-58, 10),
 (-60, 15),
 (-62, 22),
 (-63, 18),
 (-65, 27),
 (-66, 20),
 (-68, 26),
 (-69, 14),
 (-71, 17),
 (-73, 23),
 (-74, 8),
 (-76, 12),
 (-77, 2),
 (-79, 3),
 (-81, 8),
 (-82, 5),
 (-84, 5),
 (-87, 2),
 (-89, 1),
 (-98, 1),
 (-100, 1),
 (-103, 1)]