In [345]:
from line_profiler import LineProfiler

In [346]:
%load_ext line_profiler

## Speed up modulo by using remainder instead of entire number

Testing if we can use the remainder of a division with a large multiple of 10 to speed up the computation of modulo 10.

NOTE: There seems to be something fishy going on with the last digit, depending on which exponent we choose. All exponents of two ending in 0 seem to be ending in 6 once calculated. All with 1 end with 2, all with 2 end with 4 and so on. Check this out later!

In [11]:
N = 1<<1001

In [12]:
one = N % 10**1

In [13]:
two = N % 10**2

In [14]:
three = N % 10**3

In [15]:
four = N % 10**4

In [16]:
five = N % 10**5

In [17]:
six = N % 10**6

In [18]:
seven = N % 10**7

In [19]:
eight = N % 10**8

In [20]:
one, two, three, four, five, six, seven, eight

(2, 52, 752, 8752, 38752, 138752, 6138752, 36138752)

In [122]:
N

42860344287450692837937001962400072422456192468221344297750015534814042044997444899727935152627834325103786916702125873007485811427692561743938310298794299215738271099296923941684298420249484567511816728612185899934327765069595070236662175784308251658284785910746168670641719326610497547348822672277504

In [123]:
N % 10**399

42860344287450692837937001962400072422456192468221344297750015534814042044997444899727935152627834325103786916702125873007485811427692561743938310298794299215738271099296923941684298420249484567511816728612185899934327765069595070236662175784308251658284785910746168670641719326610497547348822672277504

It seems like any time we do a `n mod 10**k`, we're really grabbing about the last k decimal digits of the number n. So if we then do a mod 10 afterwards on that value, we will get the result of n mod 10, without having to do the whole computation again.  
With this realisation it's obvious that we can just take the modulo of the remainder instead of the entire number, saving massively on computation.

In [209]:
profiler = LineProfiler()
@profiler
def count_sixes(N, D, Cache):
    SixCounter    = 0
    SixCounterMax = 0
    j = 0
    while N > 0:
        # TODO: use continued division for this as well? Or can we just use the remainder instead of the number?
        Digit = N % 10
        if Digit == 6:
            SixCounter   += 1
            SixCounterMax = max(SixCounterMax, SixCounter)
        else:
            SixCounter = 0
        try:
            Q, R = Cache[j]
            R    = (R << 1) | (N & 1)
            if R >= D:
                R = R - D
                N = (Q << 1) | 1
            else:
                N = (Q << 1)
        except KeyError:
            R = N  % D
            N = N // D
        Cache[j] = N, R
        j += 1
    return SixCounterMax

In [213]:
# This is the new version
profiler = LineProfiler()
@profiler
def count_sixes(N, D, Cache):
    SixCounter    = 0
    SixCounterMax = 0
    j = 0
    while N > 0:
        try:
            Q, R = Cache[j]
            R    = (R << 1) | (N & 1)
            if R >= D:
                R = R - D
                N = (Q << 1) | 1
            else:
                N = (Q << 1)
        except KeyError:
            R = N  % D
            N = N // D
        Cache[j] = N, R
        j += 1
        
        Digit = R % 10
        if Digit == 6:
            SixCounter   += 1
            SixCounterMax = max(SixCounterMax, SixCounter)
        else:
            SixCounter = 0
        
    return SixCounterMax

In [214]:
%%time
i        = 100_000
Number   = 1<<(i - 1)
print('Building Cache...')
Divisor = 10**399
Cache   = {}
count_sixes(Number, Divisor, Cache)
Number = Number << 1
print('Done!')

Building Cache...
Done!
CPU times: total: 31.2 ms
Wall time: 18 ms


In [211]:
%%time
output = []
for _ in range(1000):
    output.append(count_sixes(Number, Divisor, Cache))
    i      += 1
    Number  = Number<<1
print(output)

[2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 3, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 3, 1, 1, 1, 1, 1, 1, 2, 2, 3, 2, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 6, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 3, 1, 1, 3, 2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 2, 1, 2, 3, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 3, 1, 1, 2, 2, 1, 2, 1, 3, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 1, 2, 3, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 3, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 1, 1, 1, 3, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 4, 1, 1, 3, 1, 2, 1, 1, 1, 1, 3, 2, 1, 2, 2, 

In [215]:
%%time
output = []
for _ in range(1000):
    output.append(count_sixes(Number, Divisor, Cache))
    i      += 1
    Number  = Number<<1
print(output)

[2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 3, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 3, 1, 1, 1, 1, 1, 1, 2, 2, 3, 2, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 6, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 3, 1, 1, 3, 2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 2, 1, 2, 3, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 3, 1, 1, 2, 2, 1, 2, 1, 3, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 1, 2, 3, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 3, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 1, 1, 1, 3, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 4, 1, 1, 3, 1, 2, 1, 1, 1, 1, 3, 2, 1, 2, 2, 

In [207]:
[2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 3, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 3, 1, 1, 1, 1, 1, 1, 2, 2, 3, 2, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 6, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 3, 1, 1, 3, 2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 2, 1, 2, 3, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 3, 1, 1, 2, 2, 1, 2, 1, 3, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 1, 2, 3, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 3, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 1, 1, 1, 3, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 4, 1, 1, 3, 1, 2, 1, 1, 1, 1, 3, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 3, 2, 3, 4, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 3, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 3, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 3, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 3, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 3, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 3, 1, 3, 1, 1, 2, 1, 1, 1, 2, 4, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 3, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 3, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 3, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 3, 1, 2, 1, 1, 1, 4, 3, 1, 1, 2, 2, 1, 3, 1, 1, 1, 1, 2, 3, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 3, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 3, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 3, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 4, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 3, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 3, 2, 3, 1, 1, 1, 1, 2, 3, 1, 3, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 3, 1, 2, 2, 2, 3, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 3, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, 1, 1, 3, 3, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 3, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 3, 1, 2, 2, 2, 3, 1, 3, 1, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 3, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2] == [2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 3, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 3, 1, 1, 1, 1, 1, 1, 2, 2, 3, 2, 3, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 6, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 3, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 3, 1, 1, 3, 2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 2, 1, 2, 3, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 3, 2, 1, 2, 1, 2, 2, 1, 3, 1, 1, 2, 2, 1, 2, 1, 3, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 1, 2, 3, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 3, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 1, 1, 1, 3, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 4, 1, 1, 3, 1, 2, 1, 1, 1, 1, 3, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 3, 2, 3, 4, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 3, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 3, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 3, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 3, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 3, 2, 3, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 3, 1, 3, 1, 1, 2, 1, 1, 1, 2, 4, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 3, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 3, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 3, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 3, 1, 2, 1, 1, 1, 4, 3, 1, 1, 2, 2, 1, 3, 1, 1, 1, 1, 2, 3, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 3, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 3, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 3, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 4, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 3, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 3, 2, 3, 1, 1, 1, 1, 2, 3, 1, 3, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 3, 1, 2, 2, 2, 3, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 3, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, 1, 1, 3, 3, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 3, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 3, 1, 2, 2, 2, 3, 1, 3, 1, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 3, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2]

True

In [212]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 0.7823 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_16104\356962432.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(N, D, Cache):
     4      1001       2489.0      2.5      0.0      SixCounter    = 0
     5      1001       2437.0      2.4      0.0      SixCounterMax = 0
     6      1001       2445.0      2.4      0.0      j = 0
     7     77342     190859.0      2.5      2.4      while N > 0:
     8                                                   # TODO: use continued division for this as well? Or can we just use the remainder instead of the number?
     9     76341    4213638.0     55.2     53.9          Digit = N % 10
    10     76341     188286.0      2.5      2.4          if Digit == 6:
    11      7925      21975.0      2.8      0.3              SixCounter   += 1
 

In [216]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 0.388465 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_16104\3149308276.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(N, D, Cache):
     4      1001       2521.0      2.5      0.1      SixCounter    = 0
     5      1001       2312.0      2.3      0.1      SixCounterMax = 0
     6      1001       2215.0      2.2      0.1      j = 0
     7     77342     194824.0      2.5      5.0      while N > 0:
     8     76341     157660.0      2.1      4.1          try:
     9     76341     265909.0      3.5      6.8              Q, R = Cache[j]
    10     76264     315681.0      4.1      8.1              R    = (R << 1) | (N & 1)
    11     76264     196525.0      2.6      5.1              if R >= D:
    12     37555     124209.0      3.3      3.2                  R = R - D
    13    

## Improve Candidate selection

So far we're checking every 399th digit of the number for a six, and if we find five such sixes in a row anywhere in the number, we declare the number a candidate for more thorough checking. This works very good and has given us a massive speedup already. However since the numbers are growing bigger and bigger, the chance of finding a candidate in this way also go up. At the start the probability was about 0.3%, but now it's become over 2%. This is not ideal since the thorough check is very expensive, and we want to do as little of them as possible.  
To reduce the likelyhood of detecting a candidate, we can simply check more digits. The more digits we check, the slower we progress however.  
What we've done here is to check more digits, but only if we've already detected 5 sixes in the usual way. On the fast pass this is only a single branch more whenever we hit a six, and on the slow pass, we now just start checking k digits between the 4th and 5th segment where we have already found the sixes at the start and end. All of these digits need to be sixes in order to be a goliath number, so a single non six immediately stops our search, and we condinue with taking large strides through the digits of the number.

In [149]:
def count_sixes_old(N, D, Cache):
    SixCounter    = 0
    SixCounterMax = 0
    j = 0
    while N > 0:
        try:
            Q, R = Cache[j]
            R    = (R << 1) | (N & 1)
            if R >= D:
                R = R - D
                N = (Q << 1) | 1
            else:
                N = (Q << 1)
        except KeyError:
            R = N  % D
            N = N // D
        Cache[j] = N, R
        j += 1
        
        Digit = R % 10
        if Digit == 6:
            SixCounter   += 1
            SixCounterMax = max(SixCounterMax, SixCounter)
        else:
            SixCounter = 0
        
    return SixCounterMax

In [151]:
profiler = LineProfiler()
@profiler
def count_sixes(N, D, Cache, k=1):
    SixCounter    = 0
    SixCounterMax = 0
    j = 0
    while N > 0:
        try:
            Q, R = Cache[j]
            R    = (R << 1) | (N & 1)
            if R >= D:
                R = R - D
                N = (Q << 1) | 1
            else:
                N = (Q << 1)
        except KeyError:
            R = N  % D
            N = N // D
        Cache[j] = N, R
        
        Digit = R % 10
        if Digit == 6:
            SixCounter += 1
            if SixCounter == 5:
                # Go back and run a more expensive check to reduce likelyhood of selecting (wrong) candidates.
                # If this is a goliath number, then this needs to now be all sixes.
                R = (Cache[j-1][1]) // 10 # We already know the last digit is a six, so skip it.
                for _ in range(k):
                    Digit = R % 10
                    if Digit == 6:
                        R = R // 10
                    else:
                        SixCounter = 1 # Set to one because the digit here at j is actually a 6.
                        break
            SixCounterMax = max(SixCounterMax, SixCounter) # This is not in the else block to support k = 0
        else:
            SixCounter = 0
        j += 1
    return SixCounterMax

In [54]:
%%time
i        = 100_000
Divisor = 10**399

Building Cache...
1
Done!
CPU times: total: 15.6 ms
Wall time: 17.1 ms


In [66]:
G = int('666'*666)

In [153]:
%%time
i      = 1_400_052
Number = 1<<(i)
count_sixes(Number, Divisor, {}, k=2)

CPU times: total: 3.3 s
Wall time: 3.3 s


4

In [154]:
count_sixes_old(Number, Divisor, {})

5

## Only store the last bit of each number, and the remainder?

Currently, for each step we take through the 399 size chunks of the number, we're storing the entire number left at that point, as well as the remainder. This is already a red flag, since if our cache is p segments long, that means that the very last segment (the very beginning of the number) is stored p times. The p-1th segment is stored p-1 times and so on, essentially creating a huge triangle if we were to visualise the data.  
When we look at the algorithm, we really only need the remainder and the last bit of the previous number. Even when we miss the cache, we have a pretty good idea of what the next number should be. Do some more testing, but a new segment added to the cache should always be a zero at first, and some remainder. 

In [423]:
def count_sixes_old(N, D, Cache, k=1):
    SixCounter    = 0
    SixCounterMax = 0
    j = 0
    while N > 0:
        try:
            Q, R = Cache[j]
            R    = (R << 1) | (N & 1)
            if R >= D:
                R = R - D
                N = (Q << 1) | 1
            else:
                N = (Q << 1)
        except KeyError:
            R = N  % D
            N = N // D
        Cache[j] = N, R
        
        Digit = R % 10
        if Digit == 6:
            SixCounter += 1
            if SixCounter == 5:
                R = (Cache[j-1][1]) // 10
                for _ in range(k):
                    Digit = R % 10
                    if Digit == 6:
                        R = R // 10
                    else:
                        SixCounter = 1
                        break
            SixCounterMax = max(SixCounterMax, SixCounter)
        else:
            SixCounter = 0
        j += 1
    return SixCounterMax

In [424]:
def RebuildCacheOld(LastNumber):
    """If the cache gets corrupted, use this to rebuild it.
    Be warned, this can take multiple seconds!"""
    Cache = {}
    i      = LastNumber
    Number = 1<<(i - 1)
    N = Number
    D = 10**399
    j = 0
    while N > 0:
        R = N  % D
        N = N // D
        Cache[j] = N, R
        j += 1
    return i, (Number<<1), D, Cache

In [425]:
def VerifyCacheOld(i, Number, Divisor, Cache):
    'If all of these checks pass, then it should be fine to use these values to continue the search.'
    assert Number  == 1<<i
    assert Divisor == 10**399
    assert (Number>>1) // Divisor == Cache[0][0]
    assert (Number>>1)  % Divisor == Cache[0][1]
    j            = len(Cache) - 1
    j0thDivision = (Number>>1) // (10**(399*(j  )))
    j1thDivision = (Number>>1) // (10**(399*(j+1)))
    assert j0thDivision != 0
    assert j0thDivision           == Cache[j-1][0]
    assert j1thDivision           == Cache[j][0] == 0
    assert j0thDivision % Divisor == Cache[j][1]
    assert j+1 not in Cache

In [428]:
def RebuildCache(LastNumber):
    """If the cache gets corrupted, use this to rebuild it.
    Be warned, this can take multiple seconds!"""
    i = LastNumber
    N = 1<<(i - 1)
    D = 10**399
    # j = 0
    Cache = []
    while N > 0:
        R = N  % D
        N = N // D
        Cache.append(R)
        # Cache[j] = R
        # j += 1
    return i, D, Cache

In [429]:
def VerifyCache(i, Divisor, Cache):
    'If all of these checks pass, then it should be fine to use these values to continue the search.'
    Number = 1<<(i - 1)
    assert Divisor == 10**399
    assert Number % Divisor == Cache[0]
    j            = len(Cache) - 1
    j0thDivision = Number // (10**(399*(j  )))
    j1thDivision = Number // (10**(399*(j+1)))
    assert j0thDivision != 0
    assert j1thDivision == 0
    assert j0thDivision % Divisor == Cache[j]
    assert Cache[-1] != 0

We iterate until N is no longer larger than 0, which means that at the last step, N will have to have been 0 for the loop to end.  
Let's say it is, then it can either be already in the cache, or it can be missing from the cache. In the case where we have a cache hit, we will increase R as usual and then check if it's become larger than D or not. If it hasn't, then N doesn't change. Shifting a zero to the left doesn't change the number, so it's still a zero, and the loop ends.  
If R has become larger than D, then we decrease R again, again shift the zero to the left, but now also "and" it with one. This means N is now 1, which means that the loop will continue.  
We will now enter the case where we have a cache miss.  
The new N and R are calculated by dividing the previous N, and taking the modulo of N respectively, each with the given Divisor. But we already know that the previous N must be a 1. Since D is larger than one, a floor division between 1 and D will always give 0 as the answer. Similarly, 1 % D will always give 1 as the answer.  
This means the new N will be 0, and the new R will be 1, no matter what happened in the iteration before.

In [437]:
# profiler = LineProfiler()
# @profiler
def count_sixes(D, Cache, k=1):
    SixCounter    = 0
    SixCounterMax = 0
    N = 0
    J = len(Cache)
    for j in range(J):
        R = (Cache[j] << 1) | N
        if R >= D:
            R = R - D
            N = 1
        else:
            N = 0
        Cache[j] = R
        
        Digit = R % 10
        if Digit == 6:
            SixCounter += 1
            if SixCounter == 5:
                R = (Cache[j-1]) // 10
                for _ in range(k):
                    Digit = R % 10
                    if Digit == 6:
                        R = R // 10
                    else:
                        SixCounter = 1
                        break
            SixCounterMax = max(SixCounterMax, SixCounter)
        else:
            SixCounter = 0
    if N > 0:
        # print('hi')
        # Cache[J] = 1
        Cache.append(1)
    return SixCounterMax

In [447]:
%%time
i, Number, Divisor, Cache = RebuildCacheOld(1_000_000)
VerifyCacheOld(i, Number, Divisor, Cache)

CPU times: total: 1.64 s
Wall time: 1.64 s


In [448]:
%%time
output = []
for _ in range(1000):
    output.append(count_sixes_old(Number, Divisor, Cache))
    i      += 1
    Number  = Number<<1

CPU times: total: 9.94 s
Wall time: 9.96 s


In [449]:
%%time
i, Divisor, Cache = RebuildCache(1_000_000)
VerifyCache(i, Divisor, Cache)

CPU times: total: 1.62 s
Wall time: 1.64 s


In [450]:
%%time
output2 = []
for _ in range(1_000):
    output2.append(count_sixes(Divisor, Cache))
    i      += 1

CPU times: total: 297 ms
Wall time: 305 ms


In [451]:
output == output2

True

In [455]:
VerifyCache(i, Divisor, Cache)

Crushing defeat!!!  

The new version is at least 5x faster than the old one.

In [418]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 7.88987 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_20112\3990742229.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(D, Cache, k=1):
     4      1000       4742.0      4.7      0.0      SixCounter    = 0
     5      1000       3561.0      3.6      0.0      SixCounterMax = 0
     6      1000       2571.0      2.6      0.0      N = 0
     7      1000       8541.0      8.5      0.0      J = len(Cache)
     8   3019792    7982991.0      2.6     10.1      for j in range(J):
     9   3018792   12152451.0      4.0     15.4          R = (Cache[j] << 1) | N
    10   3018792    7493703.0      2.5      9.5          if R >= D:
    11   1508042    4779038.0      3.2      6.1              R = R - D
    12   1508042    3723418.0      2.5      4.7              N = 1
    13               