In [130]:
import numpy as np

# Set args like this for now
min_obj_mb = 30
max_obj_mb = 32
region_size_mb = 32
heap_size_gb = 80
heap_live_gb = 60

min_hum_obj_mb = max(region_size_mb / 2, min_obj_mb)
num_heap_regions = np.ceil(heap_size_gb * 1024 / region_size_mb)
pct_heap_live = heap_live_gb / heap_size_gb * 100

# So we can handle all objects being the same size
if min_obj_mb == max_obj_mb:
    max_obj_mb += 0.00001

# Average object size
avg_obj_mb = (max_obj_mb + min_obj_mb) / 2
overall_heap_integral = (max_obj_mb + min_obj_mb) * (max_obj_mb - min_obj_mb) / 2

if max_obj_mb < min_hum_obj_mb:
    # No humongous objects
    pct_hum_objects = 0
    pct_heap_humongous = 0
    avg_hum_obj_mb = 0
else:
    # Percent of objects that are humongous
    pct_hum_objects = 100 * (max_obj_mb - min_hum_obj_mb) / (max_obj_mb - min_obj_mb)

    # Capacity taken up by humongous objects
    humongous_integral = (
        (max_obj_mb + min_hum_obj_mb) * (max_obj_mb - min_hum_obj_mb) / 2
    )
    pct_heap_humongous = 100 * humongous_integral / overall_heap_integral

    # Average humongous object size
    avg_hum_obj_mb = (max_obj_mb + min_hum_obj_mb) / 2

# Number of GB humongous and non humongous
num_gb_humongous = pct_heap_humongous * heap_live_gb / 100
num_gb_non_humongous = (100 - pct_heap_humongous) * heap_live_gb / 100
num_mb_humongous = num_gb_humongous * 1024
num_mb_non_humongous = num_gb_non_humongous * 1024

# Humongous objects can span multiple regions.
# Humongous objects can not share regions (one humongous object per region)
total_humongous_integral = (
    (max_obj_mb + min_hum_obj_mb) * (max_obj_mb - min_hum_obj_mb) / 2
)
num_hum_regions_needed = 0
# Calculate how many regions are needed for humongous objects in each size range
if total_humongous_integral > 0:
    min_obj_in_range_mb = min_hum_obj_mb
    # if min object size < region size < max object size: max obj in range = region size
    # if min object size < max object size < region size: max obj in range = max object 
    # if region size < min object size < max object size: max obj in range = max object
    max_obj_in_range_mb = min(region_size_mb, max_obj_mb) if region_size_mb > min_hum_obj_mb else max_obj_mb
    max_regions_per_humongous_object = np.ceil(max_obj_mb / region_size_mb)
    min_regions_per_humongous_object = np.ceil((min_hum_obj_mb+.000000001) / region_size_mb)
    num_regions_for_objects_in_range = min_regions_per_humongous_object
    # Iterate through size ranges of humongous objects
    while num_regions_for_objects_in_range <= max_regions_per_humongous_object:
        range_integral = (
            (max_obj_in_range_mb + min_obj_in_range_mb)
            * (max_obj_in_range_mb - min_obj_in_range_mb)
            / 2
        )
        pct_humongous_data_in_this_range = range_integral / total_humongous_integral * 100
        num_mb_humongous_in_this_range = (
            num_mb_humongous * pct_humongous_data_in_this_range / 100
        )
        avg_hum_obj_mb_in_this_range = (max_obj_in_range_mb + min_obj_in_range_mb) / 2
        num_hum_regions_needed_from_this_range = np.ceil(
            num_regions_for_objects_in_range
            * num_mb_humongous_in_this_range
            / avg_hum_obj_mb_in_this_range
        )
        # Print stats for humongous objects in this size range
        print(
            "%0.2f%% of humongous data (%0.1f MB) lives in objects between %0.1f MB and %0.1f MB"
            % (
                pct_humongous_data_in_this_range,
                num_mb_humongous_in_this_range,
                min_obj_in_range_mb,
                max_obj_in_range_mb,
            )
        )
        print(
            "These objects take up %d region(s) each (%d regions total)."
            % (num_regions_for_objects_in_range, num_hum_regions_needed_from_this_range)
        )
        # Add stats for this range to our running totals
        num_hum_regions_needed += num_hum_regions_needed_from_this_range
        # Set for next range
        num_regions_for_objects_in_range += 1
        min_obj_in_range_mb = max_obj_in_range_mb
        max_obj_in_range_mb = min(max_obj_in_range_mb + region_size_mb, max_obj_mb)

# Number of heap regions needed
num_non_hum_regions_needed = np.ceil(num_mb_non_humongous / region_size_mb)
total_regions_needed = num_non_hum_regions_needed + num_hum_regions_needed

# Heap space taken
space_taken_by_humongous_gb = num_hum_regions_needed * region_size_mb / 1024
unreachable_space_in_humongous_regions_gb = space_taken_by_humongous_gb - num_gb_humongous
pct_humongous_regions_live = num_gb_humongous / space_taken_by_humongous_gb * 100
pct_humongous_regions_unreachable = unreachable_space_in_humongous_regions_gb / space_taken_by_humongous_gb * 100
total_space_taken_gb = num_gb_non_humongous + space_taken_by_humongous_gb
pct_heap_taken = total_space_taken_gb / heap_size_gb * 100
pct_heap_wasted = unreachable_space_in_humongous_regions_gb / heap_size_gb * 100

# Report results
print("----")
print(
    "%d GB heap with %d MB regions (%d total regions). Humongous objects are >= %0.1f MB"
    % (heap_size_gb, region_size_mb, num_heap_regions, min_hum_obj_mb)
)
print(
    "Min object size = %0.1f MB. Max object size = %0.1f MB."
    % (min_obj_mb, max_obj_mb)
)
print(
    "Avg object size = %0.2f MB. Avg humongous object size = %0.2f MB"
    % (avg_obj_mb, avg_hum_obj_mb)
)
print(
    "%0.2f%% of objects are humongous. %0.2f%% of data volume is in humongous objects."
    % (pct_hum_objects, pct_heap_humongous)
)
print(
    "%d GB data is live (%0.2f GB not humongous, %0.2f GB humongous)."
    % (heap_live_gb, num_gb_non_humongous, num_gb_humongous)
)
print(
    "%0.2f GB of humongous objects take %0.2f GB of heap space. Used humongous regions are %0.2f%% live data and %0.2f%% unreachable space."
    % (
        num_gb_humongous,
        space_taken_by_humongous_gb,
        pct_humongous_regions_live,
        pct_humongous_regions_unreachable
    )
)
print(
    "Total heap space taken = %0.2f/%d GB. (%0.2f non-humongous, %0.2f humongous)."
    % (
        total_space_taken_gb,
        heap_size_gb,
        num_gb_non_humongous,
        space_taken_by_humongous_gb
    )
)
print(
    "Total heap regions used = %d/%d regions. (%d non-humongous, %d humongous)."
    % (
        total_regions_needed,
        num_heap_regions,
        num_non_hum_regions_needed,
        num_hum_regions_needed,
    )
)
print(
    "The heap is at %0.2f%% capacity. %d GB live data takes %0.02f%% of heap capacity and %0.02f GB trapped in humongous regions makes %0.02f%% of the heap unreachable."
    % (
        pct_heap_taken,
        heap_live_gb,
        pct_heap_live,
        unreachable_space_in_humongous_regions_gb,
        pct_heap_wasted
    )
)
if pct_heap_taken >= 100:
    print("YOU WILL RUN OUT OF MEMORY!!!")

100.00% of humongous data (61440.0 MB) lives in objects between 30.0 MB and 32.0 MB
These objects take up 1 region(s) each (1982 regions total).
----
80 GB heap with 32 MB regions (2560 total regions). Humongous objects are >= 30.0 MB
Min object size = 30.0 MB. Max object size = 32.0 MB.
Avg object size = 31.00 MB. Avg humongous object size = 31.00 MB
100.00% of objects are humongous. 100.00% of data volume is in humongous objects.
60 GB data is live (0.00 GB not humongous, 60.00 GB humongous).
60.00 GB of humongous objects take 61.94 GB of heap space. Used humongous regions are 96.87% live data and 3.13% unreachable space.
Total heap space taken = 61.94/80 GB. (0.00 non-humongous, 61.94 humongous).
Total heap regions used = 1982/2560 regions. (0 non-humongous, 1982 humongous).
The heap is at 77.42% capacity. 60 GB live data takes 75.00% of heap capacity and 1.94 GB trapped in humongous regions makes 2.42% of the heap unreachable.
