In [2]:
import numpy as np

In [86]:
BATCH_SIZE = 1024

In [87]:
points = np.random.rand(BATCH_SIZE * 3, 2)

In [88]:
edges = np.abs(np.roll(points, BATCH_SIZE, axis=0) - points)

In [89]:
areas = (edges[:, 0] * edges[:, 1]).reshape((BATCH_SIZE, -1))

In [90]:
areas.sort(axis=1)

In [97]:
areas[:, -1].mean()

0.2204088464559777

In [195]:
def compute_batch_mean_area(batch_size=BATCH_SIZE, mean=True):
    points = np.random.rand(batch_size * 3, 2)
    edges = np.abs(np.roll(points, batch_size, axis=0) - points)
    areas = (edges[:, 0] * edges[:, 1]).reshape((batch_size, -1))
    areas.sort(axis=1)
    if mean:
        return areas[:, 1].mean()
    else:
        return areas[:, 1]

In [102]:
for i in range(5):
    print(compute_batch_mean_area())

0.08741139079024891
0.08580751709236588
0.08721988291811844
0.08781836672669532
0.087446106754198


In [163]:
%timeit compute_batch_mean_area(128)
%timeit compute_batch_mean_area(256)
%timeit compute_batch_mean_area(512)
%timeit compute_batch_mean_area(1024)

57.7 µs ± 444 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
71.2 µs ± 1.44 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
95.8 µs ± 791 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
147 µs ± 6.66 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [164]:
%timeit -n 1000 compute_batch_mean_area(8192)
%timeit -n 1000 compute_batch_mean_area(16384)
%timeit -n 1000 compute_batch_mean_area(65536)

844 µs ± 20.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.14 ms ± 42.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
11.4 ms ± 402 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [123]:
from difflib import SequenceMatcher

def num_leading_digits_in_common(n1, n2):
    s1 = str(n1)
    s2 = str(n2)
    match = SequenceMatcher(None, s1, s2).find_longest_match(0, len(s1), 0, len(s2))
    return match.size - 2 if match.a + match.b == 0 else 0  # For 0.

In [137]:
BATCH_SIZE = 8192
MIN_DIGITS = 10
MIN_MINOR_DIFFS = 3

In [145]:
samples = 0
running_avg = 0
num_minor_diffs = 0

while True:
    samples += 1
    if samples > 1220703:
        break
        
    sample_avg = compute_batch_mean_area(BATCH_SIZE)
    new_avg = running_avg + (sample_avg - running_avg) / samples
    if num_leading_digits_in_common(new_avg, running_avg) >= MIN_DIGITS:
        print(f'[{samples}] new avg. common to {MIN_DIGITS} digits: {running_avg:.010f}, {new_avg:.010f}')
        num_minor_diffs += 1
        if num_minor_diffs >= MIN_MINOR_DIFFS:
            print('[{samples}] final!')
            break
    else:
        if num_minor_diffs > 0:
            print(f'[{samples}] resetting! saw {new_avg:.010f}')
        num_minor_diffs = 0
    running_avg = new_avg

[8444] new avg. common to 10 digits: 0.0880660769, 0.0880660769
[8445] resetting! saw 0.0880660771
[9673] new avg. common to 10 digits: 0.0880662167, 0.0880662167
[9674] resetting! saw 0.0880661481
[13887] new avg. common to 10 digits: 0.0880664072, 0.0880664072
[13888] resetting! saw 0.0880664970
[15521] new avg. common to 10 digits: 0.0880660371, 0.0880660371
[15522] resetting! saw 0.0880660626
[16349] new avg. common to 10 digits: 0.0880670128, 0.0880670127
[16350] resetting! saw 0.0880669879
[18433] new avg. common to 10 digits: 0.0880696634, 0.0880696634
[18434] resetting! saw 0.0880695521
[19509] new avg. common to 10 digits: 0.0880691102, 0.0880691103
[19510] resetting! saw 0.0880690529
[19943] new avg. common to 10 digits: 0.0880694649, 0.0880694649
[19944] resetting! saw 0.0880694944
[21454] new avg. common to 10 digits: 0.0880700634, 0.0880700635
[21455] resetting! saw 0.0880700884
[21749] new avg. common to 10 digits: 0.0880710945, 0.0880710945
[21750] resetting! saw 0.08807

KeyboardInterrupt: 

In [156]:
(10 ** MIN_DIGITS) / 8192

1220703.125

In [194]:
for i in range(10):
    print(compute_batch_mean_area(1))

0.02324668512555124
0.1379408271881305
0.01972411069789717
0.17160346694157755
0.14565455520427817
0.014668016067845855
0.14618422016281837
0.2035242087391498
0.048259311699909435
0.05207778043964428


In [203]:
BATCH_SIZE = 8192
samples = 10000000000
running_avg = 0.0880714201
num_minor_diffs = 0
max_samples = 10 ** (MIN_DIGITS + 1)

while True:    
    for sample_avg in compute_batch_mean_area(BATCH_SIZE, mean=False):
        samples += 1
        delta = (sample_avg - running_avg) / samples
        running_avg += delta
        if samples % 10000000 == 0:
            print(f'[{samples}] running avg: {running_avg:.010f}, delta: {delta:.010f}')
        if samples > max_samples:
            print(f'[{samples}] final! {running_avg:.010f}')
            break

[10010000000] running avg: 0.0880714127, delta: -0.0000000000
[10020000000] running avg: 0.0880713902, delta: -0.0000000000
[10030000000] running avg: 0.0880713542, delta: -0.0000000000
[10040000000] running avg: 0.0880713422, delta: 0.0000000000
[10050000000] running avg: 0.0880713472, delta: -0.0000000000
[10060000000] running avg: 0.0880713554, delta: -0.0000000000
[10070000000] running avg: 0.0880713449, delta: -0.0000000000
[10080000000] running avg: 0.0880713249, delta: -0.0000000000
[10090000000] running avg: 0.0880713154, delta: -0.0000000000


KeyboardInterrupt: 

In [143]:
[11690000000] running avg: 0.0880712192
[11700000000] running avg: 0.0880712118
[11710000000] running avg: 0.0880711937
[11720000000] running avg: 0.0880711876
[11730000000] running avg: 0.0880712180
[11740000000] running avg: 0.0880712113
[11750000000] running avg: 0.0880712169
[11760000000] running avg: 0.0880712352
[11770000000] running avg: 0.0880712686
[11780000000] running avg: 0.0880712331

1220703.125

In [152]:
(10**10) / 8192

1220703.125