# Random BitInt Matches

Do some sequence matches against pure random numbers.

N.B. Julia does support properly generating random numbers in `BigInt` ranges, however even with this still see higher number of non-matches for repeated digit strings... hence the two functions used for generating the random digits to cross check that nothing fishy is happening.

(*Spoiler:* nothing fishy is happening, the two methods produce the same results.)

In [1]:
using Random
using JSON
using Printf
using ProgressBars

In [2]:
"""
Simple function to create a long random digit string piecewise, in groups of digits (given by slice)

This is basically a cross-check that Julia's native random generator for BigInts is working correctly.
"""
function create_bigint_string_by_pieces(digits, rng; slice=10)
    digit_string = ""
    # Getting sprintf to work with a predefined string is tricky
    fmt_string = Printf.Format("%0$(slice)d")
    for i in 1:(digits ÷ slice)
        digit_string *= Printf.format(fmt_string, rand(rng, 0:10^slice-1))
    end
    extra_digits = digits - length(digit_string)
    if extra_digits > 0
        fmt_string = Printf.Format("%0$(extra_digits)d")
        digit_string *= Printf.format(fmt_string, rand(rng, 0:10^extra_digits-1))
    end
    digit_string
end

create_bigint_string_by_pieces

In [3]:
"""This is the standard Julia way to create a random BigInt string"""
function create_bigint_string(digits, rng)
    Base.GMP.string(rand(rng, 0:BigInt(10)^digits-1), base = 10, pad = digits)
end

create_bigint_string

In [4]:
seed = 1234567890
mt = MersenneTwister(seed)

MersenneTwister(1234567890)

In [5]:
# Warning - only base 10 supported at the moment
base = 10
@assert base == 10
seq_len = 5

5

In [6]:
seq_matches = [Base.GMP.string(BigInt(i - 1), base = base, pad = seq_len)
                   for i in 1:base^seq_len];

In [7]:
match_counts = Dict{String, Int}()
for seq in seq_matches
    match_counts[seq] = 0
end
n_nonapocalypse = zeros(Int, length(seq_matches));

In [8]:
number_length = 300
start_n = BigInt(1)
end_n = BigInt(10)^number_length;

In [9]:
max_i = 1_000_000

1000000

Now decide which random generator we will use...

In [10]:
random_string_generator = create_bigint_string
#random_string_generator = create_bigint_string_by_pieces

create_bigint_string (generic function with 1 method)

In [11]:

for i ∈ ProgressBar(1:max_i)
    i_str = random_string_generator(number_length, mt)

    # March over the string and check off matches
    for j in 1:(length(i_str)-(seq_len-1))
        m_str = SubString(i_str, j, j + (seq_len - 1))
        if haskey(match_counts, m_str)
            match_counts[m_str] += 1
        end
    end
    # Now check the matches
    for (seq_n, seq) in enumerate(seq_matches)
        if match_counts[seq] == 0
            n_nonapocalypse[seq_n] += 1
        else
            # Reset for next iteration
            match_counts[seq] = 0
        end
    end
end

0.0%┣                                      ┫ 0/1.0M [00:01<-338:-53:-34, -1s/it]
0.0%┣                                         ┫ 1/1.0M [00:01<Inf:Inf, InfGs/it]
0.0%┣                                          ┫ 2/1.0M [00:02<433:50:38, 2s/it]
0.0%┣                                          ┫ 4/1.0M [00:02<150:14:21, 2it/s]
0.0%┣                                           ┫ 6/1.0M [00:02<93:24:16, 3it/s]
0.0%┣                                           ┫ 8/1.0M [00:02<69:00:52, 4it/s]
0.0%┣                                          ┫ 10/1.0M [00:02<55:20:33, 5it/s]
0.0%┣                                          ┫ 12/1.0M [00:02<46:41:25, 6it/s]
0.0%┣                                          ┫ 14/1.0M [00:02<40:36:09, 7it/s]
0.0%┣                                          ┫ 16/1.0M [00:02<36:09:38, 8it/s]
0.0%┣                                          ┫ 18/1.0M [00:02<32:44:13, 8it/s]
0.0%┣                                          ┫ 20/1.0M [00:02<30:03:10, 9it/s]
0.0%┣                       

In [None]:
function save_sweep_results(results; number_length, base, seq_len, start, stop, filename = nothing)
    if isnothing(filename)
        filename = joinpath("..", "results",
            "n-non-matches-v4-base-$(base)-length-$(number_length)-seq-$(seq_len).json")
    end
    @info "Saving results to $filename at n=$stop (total non-matches: $(sum(results)))"
    results = Dict("power" => 0, "base" => base, "seq_len" => seq_len,
        "start" => start, "stop" => stop, "results" => results, "length" => number_length,
        "seed" => seed, "format" => "v4", "method" => "random")
    open(filename, "w") do io
        JSON.print(io, results, 2)
    end
end

In [None]:
save_sweep_results(n_nonapocalypse, number_length=number_length, base=base, seq_len=seq_len, start=1, stop=max_i)