In [1]:
from itertools import count
from IPython.display import clear_output
from pathlib import Path
from time import time, sleep
from line_profiler import LineProfiler
import pickle

In [2]:
%load_ext line_profiler

In [3]:
PATH = Path('./data/last_number.txt')

In [4]:
PATH_DIVISORS = Path('./data/Divisors.pkl')

In [5]:
PATH_CACHE = Path('./data/Cache.pkl')

In [6]:
PATH_CHECKPOINT = Path('./data/Checkpoint.pkl')

## Version 1

In [3]:
def print_digits(number):
    while number > 0:
        digit  = number % 10
        number = number // 10
        print(digit)

In [5]:
# Sadly, this approach of going backwards in the digits to check further is too slow.
# Mostly (probably) because we need to allocate and track a bunch of additional data.
# And we can't just check two or three digits, since that happens way too often.
def soft_check_goliath(number):
    interval = goliath_amount // 2
    divisor  = 10**interval
    original_number = number
    prev_was_six    = False
    prev_location   = -interval
    while number > 0:
        digit  = number % 10
        number = number // divisor
        if digit == 6:
            if prev_was_six:
                return True
                # candidate found, check more intensely
                tmp_number = original_number // 10**(prev_location + 1)
                digit      = tmp_number % 10
                if digit == 6:
                    tmp_number = tmp_number // 10
                    digit      = tmp_number % 10
                    if digit == 6:
                        tmp_number = tmp_number // 10
                        digit      = tmp_number % 10
                        if digit == 6:
                            tmp_number = tmp_number // 10
                            digit      = tmp_number % 10
                            if digit == 6:
                                # we now know of six sixes, which we take as enough evidence to trigger an even more expensive check.
                                return True
            prev_was_six = True
        else:
            prev_was_six = False
        prev_location += interval
    return False

In [35]:
profiler = LineProfiler()
@profiler
def count_sixes_naive(number, divisor):
    six_counter     = 0
    six_counter_max = 0
    while number > 0:
        digit    = number % 10
        number //= divisor
        if digit == 6:
            six_counter    += 1
            six_counter_max = max(six_counter_max, six_counter)
        else:
            six_counter = 0
    return six_counter_max

https://stackoverflow.com/questions/1686004/fastest-way-to-convert-binary-to-decimal

In [6]:
LastNumber = int(PATH.read_text()); LastNumber

570123

In [7]:
goliath = 666*'666'
goliath_amount = len(goliath)

In [8]:
goliath_amount

1998

These divisors are let us only check if ever nth digit is a 6, where n is the exponent $10^n$. The threshold is how many 6s we would expect to see if the number we're checking were to contain a goliath amount of 6s in a row.  

In [54]:
# 1995 / 3 = 665
fat_divisor = 10**665
candidate_treshold = 3

In [20]:
# 1995 / 5 = 399 <-------------- This makes about 0.33% of numbers trigger an expensive check
fat_divisor = 10**399
candidate_treshold = 5

In [34]:
# 1995 / 7 = 285
fat_divisor = 10**285
candidate_treshold = 7

In [37]:
i = LastNumber
Number = 1<<i
tStart = time()
tExpensiveDuration = 0
ExpensiveChecks    = 0
while True:
    t1 = time()
    sixes = count_sixes(Number, fat_divisor)
    t2 = time()
    if sixes >= candidate_treshold:
        ExpensiveChecks += 1
        t3 = time()
        if goliath in str(Number):
            print(f'Found one: {i}')
            break
        t4 = time()
    Number = Number<<1
    i += 1
    
    tEnd = time()
    clear_output(wait=True)
    print(i)
    print(f'#Expensive checks : {ExpensiveChecks} ({round(100* ExpensiveChecks / (i - LastNumber), 2)}%)')
    print(f'Total Duration    : {round(tEnd - tStart, 4)} seconds')
    print(f'Cheap Duration    : {round(t2 - t1, 4)} seconds')
    print(f'Expensive Duration: {round(t4 - t3, 4)} seconds')
    # if (i - LastNumber) > 100: break
    tStart = tEnd

570124
#Expensive checks : 123 (0.36%)
Total Duration    : 0.2803 seconds
Cheap Duration    : 0.2793 seconds
Expensive Duration: 0.4039 seconds


KeyboardInterrupt: 

In [1]:
i

570124

In [25]:
PATH.write_text(f'{i-1}');

In [13]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 21.1059 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_2900\4117333934.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(number, divisor):
     4       101        696.0      6.9      0.0      six_counter     = 0
     5       101        329.0      3.3      0.0      six_counter_max = 0
     6     38279     111773.0      2.9      0.1      while number > 0:
     7     38178    9941772.0    260.4      4.7          digit  = number % 10
     8     38178  200726980.0   5257.7     95.1          number = number // divisor
     9     38178     128177.0      3.4      0.1          if digit == 6:
    10      3793      15622.0      4.1      0.0              six_counter    += 1
    11      3793      43681.0     11.5      0.0              six_counter_max = max(six_counter_max, six_counter)
   

## Version 2

In [7]:
# def ContinueDivision(Quotient, Remainder, Divisor):
#     Remainder = (Remainder << 1) # Shift in a zero, since we know that the last bit of 2^x will always be a zero.
#     if Remainder >= Divisor:
#         Remainder = Remainder - Divisor
#         Quotient  = (Quotient << 1) | 1
#     else:
#         Quotient  = (Quotient << 1)
#     return Quotient, Remainder

In [8]:
# profiler = LineProfiler()
# @profiler
def count_sixes(Number, Divisors, Cache):
    SixCounter    = 0
    SixCounterMax = 0
    j = 0
    N = Number
    while N > 0:
        # TODO: Could divide first, which would allow us to remove the (j+1) from the divisor.
        # TODO: use continued division for this as well?
        Digit = N % 10
        if Digit == 6:
            SixCounter   += 1
            SixCounterMax = max(SixCounterMax, SixCounter)
        else:
            SixCounter = 0
        try:
            # TODO: remove divisors from caching, and instead grab last bit from the numerator
            D    = Divisors[j]
            N, R = Cache[j]
            # N, R = ContinueDivision(Q, R, D) # inline
            R = (R << 1)
            if R >= D:
                R = R - D
                N = (N << 1) | 1
            else:
                N = (N << 1)
        except KeyError:
            D = 10**(399*(j+1))
            N = Number // D
            R = Number  % D
            Divisors[j] = D
        Cache[j] = N, R
        j += 1
    return SixCounterMax

In [47]:
profiler = LineProfiler()
@profiler
def count_sixes(N, D, Cache):
    SixCounter    = 0
    SixCounterMax = 0
    j = 0
    while N > 0:
        # TODO: Could divide first, which would allow us to remove the (j+1) from the divisor.
        # TODO: use continued division for this as well? Or can't we just use the remainder instead of the number?
        Digit = N % 10
        if Digit == 6:
            SixCounter   += 1
            SixCounterMax = max(SixCounterMax, SixCounter)
        else:
            SixCounter = 0
        try:
            Q, R = Cache[j]
            R    = (R << 1) | (N & 1)
            if R >= D:
                R = R - D
                N = (Q << 1) | 1
            else:
                N = (Q << 1)
        except KeyError:
            R = N  % D
            N = N // D
        Cache[j] = N, R
        j += 1
    return SixCounterMax

### state v1

In [22]:
LastNumber = int(PATH.read_text()); LastNumber

1750000

### state v2

In [9]:
LastNumber = int(PATH.read_text()); LastNumber

1400000

In [18]:
i      = LastNumber
Number = 1<<i

In [13]:
with PATH_DIVISORS.open(mode='rb') as f:
    Divisors = pickle.load(f)

In [14]:
with PATH_CACHE.open(mode='rb') as f:
    Cache = pickle.load(f)

### state v3

In [9]:
class checkpoint:
    def __init__(self, LastNumber, Divisors, Cache):
        self.LastNumber = LastNumber
        self.Divisors = Divisors
        self.Cache = Cache

In [10]:
with PATH_CHECKPOINT.open(mode='rb') as f:
    Checkpoint = pickle.load(f)

In [11]:
LastNumber = Checkpoint.LastNumber
Divisors   = Checkpoint.Divisors
Cache      = Checkpoint.Cache
i          = LastNumber
Number     = 1<<i
LastNumber

1640000

### state v4

In [40]:
class checkpoint:
    def __init__(self, LastNumber, Cache):
        self.LastNumber = LastNumber
        self.Cache = Cache

In [41]:
with PATH_CHECKPOINT.open(mode='rb') as f:
    Checkpoint = pickle.load(f)

In [42]:
LastNumber = Checkpoint.LastNumber
Cache      = Checkpoint.Cache
i          = LastNumber
Number     = 1<<i
LastNumber

1750000

### Rebuild cache

In [26]:
# If the caches get corrupted, use this to rebuild them.
# Be warned, this can take multiple seconds!
%%time
if 0:
    i        = LastNumber
    Number   = 1<<(i - 1)
    print('Building Cache...')
    Divisor = 10**399
    Cache   = {}
    count_sixes(Number, Divisor, Cache)
    Number = Number << 1
    print('Done!')

Building Cache...
Done!
CPU times: total: 5.08 s
Wall time: 5.11 s


In [27]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 5.09137 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_1912\369381277.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(N, D, Cache):
     4         1         11.0     11.0      0.0      SixCounter    = 0
     5         1          4.0      4.0      0.0      SixCounterMax = 0
     6         1         12.0     12.0      0.0      j = 0
     7      1322       4659.0      3.5      0.0      while N > 0:
     8                                                   # TODO: Could divide first, which would allow us to remove the (j+1) from the divisor.
     9                                                   # TODO: use continued division for this as well? Or can't we just use the remainder instead of the number?
    10      1321    1316625.0    996.7      2.6          Digit = N % 10
    11

### Main

In [49]:
# Verify cache
assert Number  == 1<<i
assert Divisor == 10**399
assert (Number>>1) // Divisor == Cache[0][0]
assert (Number>>1)  % Divisor == Cache[0][1]
j            = len(Cache) - 1
j0thDivision = (Number>>1) // (10**(399*(j  )))
j1thDivision = (Number>>1) // (10**(399*(j+1)))
assert j0thDivision != 0
assert j1thDivision           == Cache[j][0] == 0
assert j0thDivision % Divisor == Cache[j][1]
assert j+1 not in Cache
del j, j0thDivision, j1thDivision

In [44]:
len(Cache)

1321

In [45]:
goliath = 666*'666'
goliath_amount = len(goliath); goliath_amount

1998

In [46]:
# 1995 / 5 = 399 <-------------- This makes about 0.33% of numbers trigger an expensive check
# Exponent    = 399
# fat_divisor = 10**Exponent
candidate_treshold = 5

In [50]:
## WARNING!!!
## This cell can NOT be restarted after a keyboard interrupt due to inconsistent caches!
t0 = time()
tt = 1 / 60
t3 = 0
t4 = 0
CandidatesFound = 0
while True:
    for _ in range(5_000):
        t1 = time()
        sixes = count_sixes(Number, Divisor, Cache)
        t2 = time()
        if sixes >= candidate_treshold:
            CandidatesFound += 1
            Path(f'./candidates/{i}').touch() # do the expensive check offline
        Number = Number<<1
        i += 1
        clear_output(wait=True)
        print(i)
        print(f'#Candidates found : {CandidatesFound} ({round(100* CandidatesFound / (i - LastNumber), 2)}%)')
        print(f'Cheap Duration    : {round(t2 - t1, 4)} seconds')
        print(f'Total Duration    : {round(tt * (t1 - t0), 4)} minutes')
    
    
    # Verify caches
    assert Number  == 1<<i
    assert Divisor == 10**399
    assert (Number>>1) // Divisor == Cache[0][0]
    assert (Number>>1)  % Divisor == Cache[0][1]
    j            = len(Cache) - 1
    j0thDivision = (Number>>1) // (10**(399*(j  )))
    j1thDivision = (Number>>1) // (10**(399*(j+1)))
    assert j0thDivision != 0
    assert j1thDivision           == Cache[j][0] == 0
    assert j0thDivision % Divisor == Cache[j][1]
    assert j+1 not in Cache
    del j, j0thDivision, j1thDivision
    # Dump Cache to disk
    PATH.write_text(f'{i}');
    Checkpoint = checkpoint(i, Cache)
    with PATH_CHECKPOINT.open(mode='wb') as f:
        pickle.dump(Checkpoint, f)
    print('Checkpoint saved to disk!')
    # sleep(10)
    break

1755000
#Candidates found : 65 (1.3%)
Cheap Duration    : 0.1711 seconds
Total Duration    : 14.8831 minutes


TypeError: __init__() takes 3 positional arguments but 4 were given

In [51]:
i

1755000

In [52]:
# Verify cache
assert Number  == 1<<i
assert Divisor == 10**399
assert (Number>>1) // Divisor == Cache[0][0]
assert (Number>>1)  % Divisor == Cache[0][1]
j            = len(Cache) - 1
j0thDivision = (Number>>1) // (10**(399*(j  )))
j1thDivision = (Number>>1) // (10**(399*(j+1)))
assert j0thDivision != 0
assert j1thDivision           == Cache[j][0] == 0
assert j0thDivision % Divisor == Cache[j][1]
assert j+1 not in Cache
del j, j0thDivision, j1thDivision

### save state v1

In [147]:
PATH.write_text(f'{i}');

### Save state v2

In [148]:
PATH.write_text(f'{i}');

In [149]:
with PATH_DIVISORS.open(mode='wb') as f:
    pickle.dump(Divisors, f)

In [150]:
with PATH_CACHE.open(mode='wb') as f:
    pickle.dump(Cache, f)

### Save state v3

In [151]:
Checkpoint = checkpoint(i, Divisors, Cache)

In [152]:
with PATH_CHECKPOINT.open(mode='wb') as f:
    pickle.dump(Checkpoint, f)

### Save state v4

In [53]:
Checkpoint = checkpoint(i, Cache)

In [54]:
with PATH_CHECKPOINT.open(mode='wb') as f:
    pickle.dump(Checkpoint, f)

### Profiling

In [55]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 869.917 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_1912\369381277.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(N, D, Cache):
     4      5001      44272.0      8.9      0.0      SixCounter    = 0
     5      5001      19684.0      3.9      0.0      SixCounterMax = 0
     6      5001      17250.0      3.4      0.0      j = 0
     7   6618373   21280476.0      3.2      0.2      while N > 0:
     8                                                   # TODO: Could divide first, which would allow us to remove the (j+1) from the divisor.
     9                                                   # TODO: use continued division for this as well? Or can't we just use the remainder instead of the number?
    10   6613373 6513440895.0    984.9     74.9          Digit = N % 10
    11

In [131]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 6860.08 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_7708\2766715933.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(Number, Divisors, Cache):
     4     49999     331421.0      6.6      0.0      SixCounter    = 0
     5     49999     134709.0      2.7      0.0      SixCounterMax = 0
     6     49999     153801.0      3.1      0.0      j = 0
     7     49999     137143.0      2.7      0.0      N = Number
     8  53829177  150020560.0      2.8      0.2      while N > 0:
     9  53779178        4e+10    740.6     58.1          Digit = N % 10
    10  53779178  148807493.0      2.8      0.2          if Digit == 6:
    11   5385654   16825128.0      3.1      0.0              SixCounter   += 1
    12   5385654   39580574.0      7.3      0.1              SixCounterMax = max(SixCo

In [84]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 22106.9 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_12220\4211618779.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(Number, Divisors, Cache):
     4    195347    1271589.0      6.5      0.0      SixCounter    = 0
     5    195347     629752.0      3.2      0.0      SixCounterMax = 0
     6    195347     448973.0      2.3      0.0      i = 0
     7    195347     554875.0      2.8      0.0      N = Number
     8 192232259  547501112.0      2.8      0.2      while N > 0:
     9 192036912        1e+11    672.5     58.4          Digit = N % 10
    10 192036912  528765814.0      2.8      0.2          if Digit == 6:
    11  19237145   59780868.0      3.1      0.0              SixCounter   += 1
    12  19237145  139016938.0      7.2      0.1              SixCounterMax = max(SixC

In [43]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 94.5 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_12220\4211618779.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(Number, Divisors, Cache):
     4      1000       6186.0      6.2      0.0      SixCounter    = 0
     5      1000       2747.0      2.7      0.0      SixCounterMax = 0
     6      1000       2364.0      2.4      0.0      i = 0
     7      1000       3094.0      3.1      0.0      N = Number
     8    902424    2429788.0      2.7      0.3      while N > 0:
     9    901424  553922105.0    614.5     58.6          Digit = N % 10
    10    901424    2409905.0      2.7      0.3          if Digit == 6:
    11     90467     277185.0      3.1      0.0              SixCounter   += 1
    12     90467     679282.0      7.5      0.1              SixCounterMax = max(SixCoun

In [33]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 95.9172 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_12220\3907487493.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(Number, Divisors, Cache):
     4      1000       7890.0      7.9      0.0      SixCounter    = 0
     5      1000       3153.0      3.2      0.0      SixCounterMax = 0
     6      1000       2561.0      2.6      0.0      i = 0
     7      1000       2442.0      2.4      0.0      N = Number
     8    901749    2333698.0      2.6      0.2      while N > 0:
     9    900749  557088560.0    618.5     58.1          Digit = N % 10
    10    900749    2485078.0      2.8      0.3          if Digit == 6:
    11     90253     288829.0      3.2      0.0              SixCounter   += 1
    12     90253     715613.0      7.9      0.1              SixCounterMax = max(SixC

In [15]:
profiler.print_stats()

Timer unit: 1e-07 s

Total time: 2697.76 s
File: C:\Users\Florian\AppData\Local\Temp\ipykernel_1796\3027381636.py
Function: count_sixes at line 2

Line #      Hits         Time  Per Hit   % Time  Line Contents
     2                                           @profiler
     3                                           def count_sixes(Number, Divisors, Cache):
     4     94909     681102.0      7.2      0.0      SixCounter    = 0
     5     94909     338009.0      3.6      0.0      SixCounterMax = 0
     6     94909     249670.0      2.6      0.0      i = 0
     7     94909     301435.0      3.2      0.0      N = Number
     8  44363833  118510216.0      2.7      0.4      while N > 0:
     9  44268925        1e+10    329.3     54.0          Digit    = N % 10
    10  44268925  665760860.0     15.0      2.5          Divisor  = Divisors[i]
    11  44268925  122325401.0      2.8      0.5          if i in Cache:
    12  44268423  317412938.0      7.2      1.2              Q, R = Cache[i]
    1