In [2]:
import random
import time
from celery import group
from mergesort import sort, merge


In [3]:
# Create a list of 1,000,000 elements in random order.
sequence = list(range(1000000))
random.shuffle(sequence)


In [11]:

t0 = time.time()

# Split the sequence in a number of chunks and process those
# independently.
n = 4
l = len(sequence) // n
subseqs = [sequence[i * l:(i + 1) * l] for i in range(n - 1)]
subseqs.append(sequence[(n - 1) * l:])

# Ask the Celery workers to sort each sub-sequence.
# Use a group to run the individual independent tasks as a unit of work.
# celery.group creates a group of tasks to be executed in parallel.

lazy_partials = group(sort.s(seq) for seq in subseqs)() # call remote workers to run the sort task 

partials = lazy_partials.get() # will wait for the tasks to return

# Merge all the individual sorted sub-lists into our final result.
result = partials[0]
for partial in partials[1:]:
    result = merge(result, partial) # local merge

dt = time.time() - t0
print('Distributed mergesort took %.02fs' % (dt))



Distributed mergesort took 8.58s


In [12]:
# Do the same thing locally and compare the times.
t0 = time.time()
truth = sort(sequence)
dt = time.time() - t0
print('Local mergesort took %.02fs' % (dt))



Local mergesort took 21.97s


In [6]:
# Final sanity checks.
assert result == truth
assert result == sorted(sequence)


In [10]:
len(partials[0])

250000

In [17]:
t0 = time.time()
lazy_partials = group(sort.s(seq) for seq in subseqs)() # call remote workers to run the sort task 
dt = time.time() - t0
print(' took %.02fs' % (dt))


 took 0.85s


In [18]:
t0 = time.time()
partials = lazy_partials.get() # will wait for the tasks to return
dt = time.time() - t0
print('took %.02fs' % (dt))


took 5.71s
