In [13]:
# For investigating lower and upper bound problems
N = 128
NP = 3
NCORES = 1
P = NP * NCORES


lower_bounds = []
upper_bounds = []
ub_diff_lb_greater_than_one = []

# In some cases the result of the mulplication is a bit strange...
# example, if N//P is always 1, how is it that a range like (30, 32) is possible?
for rank in range(P):
    lb = (rank)*(N//P)  # This gives the intended loads where the last load is unbalanced
    ub = (rank+1)*(N//P) if rank != P-1 else N
    lower_bounds.append(lb)
    upper_bounds.append(ub)

    if ub - lb > 1:
        print(rank, lb, ub)
        ub_diff_lb_greater_than_one.append((lb, ub))

ub_diff_lb_greater_than_one

0 0 42
1 42 84
2 84 128


[(0, 42), (42, 84), (84, 128)]

In [14]:
# This is the most evenly load balanced data but how can this be determiend programmatically
N = 128 

# Most load balanced recvcounts
def load_balanced_recvcounts(N, P):
    recvcounts = [N//P for p in range(P)]
    for rank in range(N%P, 0, -1):
        recvcounts[rank] += 1
    return recvcounts 

for NP in range(1, 9):
    for NCORES in [1, 4, 16]:
        P = NP * NCORES
        recvcounts = load_balanced_recvcounts(N, P)
        assert sum(recvcounts) == N
        print(recvcounts)


[128]
[32, 32, 32, 32]
[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
[64, 64]
[16, 16, 16, 16, 16, 16, 16, 16]
[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[42, 43, 43]
[10, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10]
[2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
[32, 32, 32, 32]
[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
[25, 26, 26, 26, 25]
[6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]
[1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [18]:
# Given the recvcounts (which is the difference between ub and lb)
# compute the lb and ub for the processes
N = 128
P = 3 

# Displacements are cum sum of recvcounts (recvcounts)
recvcounts = load_balanced_recvcounts(N=N, P=P)

def get_displacements_from_recvcounts(recvcounts):
    displacements = [0 for p in range(len(recvcounts))]
    for rank in range(1, P):
        displacements[rank] = displacements[rank-1] + recvcounts[rank-1]
    return displacements

displacements = get_displacements_from_recvcounts(recvcounts=recvcounts)

print(recvcounts)
print(displacements)

[42, 43, 43]
[0, 42, 85]


In [21]:
def bounds_from_displacements_and_recvcounts(displacements, recvcounts):
    lower_bounds = []
    upper_bounds = []
    for rank in range(len(recvcounts)):
        lower_bounds.append(displacements[rank])
        upper_bounds.append(displacements[rank] + recvcounts[rank])
    return zip(lower_bounds, upper_bounds)

for p in bounds_from_displacements_and_recvcounts(displacements, recvcounts):
    print(p)

(0, 42)
(42, 85)
(85, 128)


In [None]:
N//P

42