# Multiprocessing & Multi-Threading
__Belle Peng__          |      __Sept 2018__

In [1]:
import os
import time
import threading
import multiprocessing as mp
import random

random.seed(42)

In [102]:
NUM_WORKERS = 4
output = mp.Queue()

def only_sleep():
    """ Do nothing, wait for a timer to expire """
    print("PID: %s, Process Name: %s, Thread Name: %s" % (
        os.getpid(),
        mp.current_process().name,
        threading.current_thread().name)
    )
    time.sleep(1)
 
 
def crunch_numbers(pos, output):
    """ Do some computations """
    print("PID: %s, Process Name: %s, Thread Name: %s" % (
        os.getpid(),
        mp.current_process().name,
        threading.current_thread().name)
    )
    x = 0
    while x < 10000000:
        x += 1
    output.put((pos, x))
    time.sleep(1)

In [103]:
## Run tasks serially
start_time = time.time()
for _ in range(NUM_WORKERS):
    only_sleep()
end_time = time.time()
print("Serial time=", end_time - start_time)
print()

# Run tasks using threads
start_time = time.time()
threads = [threading.Thread(target=only_sleep) for _ in range(NUM_WORKERS)]
[thread.start() for thread in threads]
[thread.join() for thread in threads]
end_time = time.time()
print("Threads time=", end_time - start_time)
print()

# Run tasks using processes
start_time = time.time()
processes = [mp.Process(target=only_sleep()) for _ in range(NUM_WORKERS)]
[process.start() for process in processes]
[process.join() for process in processes]
end_time = time.time() 
print("Parallel time=", end_time - start_time)
print()

PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
Serial time= 4.007478713989258

PID: 15088, Process Name: MainProcess, Thread Name: Thread-148
PID: 15088, Process Name: MainProcess, Thread Name: Thread-149
PID: 15088, Process Name: MainProcess, Thread Name: Thread-150
PID: 15088, Process Name: MainProcess, Thread Name: Thread-151
Threads time= 1.005012035369873

PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
Parallel time= 4.043342113494873



In [104]:
# Seriel Processing
start_time = time.time()
for _ in range(NUM_WORKERS):
    output = mp.Queue()
    crunch_numbers(pos=1, output=output)
end_time = time.time()
 
print("Serial time=", end_time - start_time)
print()

# Thread Concurrent Processing
start_time = time.time()
threads = [threading.Thread(target=crunch_numbers, args=(i, output)) for i in range(NUM_WORKERS)]
[thread.start() for thread in threads]
[thread.join() for thread in threads]
end_time = time.time()

results_thread = [output.get() for p in processes]
print("Threads time=", end_time - start_time)
print()

# Parallel Processing
start_time = time.time()
processes = [mp.Process(target=crunch_numbers, args=(i, output)) for i in range(NUM_WORKERS)]
[process.start() for process in processes]
[process.join() for process in processes]
end_time = time.time()

results_parallel = [output.get() for p in processes]
print("Parallel time=", end_time - start_time)
print()

# Retrieve Results
print("Results")
print("Threading Results", results_thread)
print("Parallel Results", results_parallel)

PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
PID: 15088, Process Name: MainProcess, Thread Name: MainThread
Serial time= 7.150249004364014

PID: 15088, Process Name: MainProcess, Thread Name: Thread-152
PID: 15088, Process Name: MainProcess, Thread Name: Thread-153
PID: 15088, Process Name: MainProcess, Thread Name: Thread-154
PID: 15088, Process Name: MainProcess, Thread Name: Thread-155
Threads time= 3.880600929260254

PID: 15487, Process Name: Process-146, Thread Name: MainThread
PID: 15488, Process Name: Process-147, Thread Name: MainThread
PID: 15490, Process Name: Process-149, Thread Name: MainThread
PID: 15489, Process Name: Process-148, Thread Name: MainThread
Parallel time= 2.9316160678863525

Results
Threading Results [(1, 10000000), (0, 10000000), (2, 10000000), (1, 10000000)]
Parallel Results [(3, 10000000), (0, 10000000), (2, 10000

The order of the obtained results does not necessarily have to match the order of the processes (in the processes list). Since we eventually use the .get() method to retrieve the results from the Queuesequentially, the order in which the processes finished determines the order of our results.  


If our application required us to retrieve results in a particular order, one possibility would be to refer to the processes’ ._identity attribute.

__Pooling__  
The Pool.map and Pool.apply will lock the main program until all processes are finished, which is quite useful if we want to obtain results in a particular order for certain applications. 
In contrast, the async variants will submit all processes at once and retrieve the results as soon as they are finished. One more difference is that we need to use the get method after the apply_async() call in order to obtain the return values of the finished processes.

In [93]:
def cube(x):
    return x**3

In [94]:
pool = mp.Pool(processes=4)
results = [pool.apply(cube, args=(x,)) for x in range(1,7)]
print(results)

[1, 8, 27, 64, 125, 216]


In [95]:
pool = mp.Pool(processes=4)
results = pool.map(cube, range(1,7))
print(results)

[1, 8, 27, 64, 125, 216]


In [96]:
pool = mp.Pool(processes=4)
results = [pool.apply_async(cube, args=(x,)) for x in range(1,7)]
output = [p.get() for p in results]
print(output)

[1, 8, 27, 64, 125, 216]
