# Threading and multiprocessing

## Threading: running tasks in the background of the current process

In [1]:
import threading

### Method 1: defining your background task as a function

In [2]:
def do_in_background(sleep):
    print('Background process starting...')
    import time
    time.sleep(sleep)
    print('Background process finished!')

In [9]:
task = threading.Thread(target=do_in_background, args=(1,))
task.start()

Background process starting...
Background process finished!

### Method 2: defining your task as a subclass of threading.Thread

In [10]:
class MyTask(threading.Thread):
        
    def run(self):
        print(self.getName(), 'started.')
        import time
        time.sleep(10)
        print(self.getName(), 'finished.')




In [11]:
tasks = []

# Start some tasks and add them to our list.
for i in range(0,3):
    task = MyTask()
    task.start()
    tasks.append(task)

print('Tasks started.')
print('These statements will be executed even though the ones above have not finished!')

# Wait for all tasks to complete
for task in tasks:
    task.join()
print('Done!')

Thread-7 started.
Thread-8 started.
Tasks started.
These statements will be executed even though the ones above have not finished!
Thread-9 started.
Thread-7 finished.
Thread-8 finished.
Thread-9 finished.
Done!


## Multiprocessing: running tasks as separate processes (i.e. allowing you to use more than one core)

### Pool of workers

In [1]:
from multiprocessing import Pool

def f(x):
    return x*x

pool = Pool(processes=4)               # start 4 worker processes
print(pool.map(f, range(10)))          # prints "[0, 1, 4,..., 81]"

result = pool.map_async(f, range(10))
print("Results of map function are being calculated in the background (asynchronously)...")
print(result.get(timeout=1))           # waits for the results to become available

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
Results of map function are being calculated in the background (asynchronously)...
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


### Processes

In [7]:
import multiprocessing

cpu_count = multiprocessing.cpu_count()
print("{} cores available.".format(cpu_count))

def my_background_task():
    import time
    time.sleep(3)

proc = multiprocessing.Process(target=my_background_task)
proc.start()
proc.join()

8 cores available.


#### N.B. Printing in a background process might crash the IPython Notebook!

## Communication between threads or processes

<h3>Queues</h3>
<ul>
<li>queue.Queue (FIFO)</li>
<li>queue.LifoQueue (LIFO)</li>
<li>queue.PriorityQueue</li>
</ul>

In [6]:
from multiprocessing import Process, Queue

def task(q):
    # Do some calculations
    result = 42
    q.put(result)

    
# Create a new queue
q = Queue()
# Create a process, pass our queue as argument
p = Process(target=task, args=(q,))
# Start the process
p.start()
# Retrieve and print the (first) value in the queue;
# the command will wait until a value becomes available.
print(q.get())
# Make sure the process terminates
p.join()

42


### Pipes: direct, two-way communication channel

In [10]:
from multiprocessing import Process, Pipe

def child_task(pipe):
    # Receive input value at child's end of the pipe (sent by the parent)
    input = pipe.recv()
    # Calculate some output value
    output = input + 1
    # Send output from child's end of the pipe (to the parent)
    pipe.send(output)
    # Close this end of the pipe
    pipe.close()

# Create a Pipe, which returns two "ends" of the pipe
pipe_parent, pipe_child = Pipe()
# Create and start a child process and pass the child's end of the pipe
p = Process(target=child_task, args=(pipe_child,))
p.start()
# Send an input value to the child
pipe_parent.send(10)
# Wait for an output value to be sent by the child
print(pipe_parent.recv())
# Make sure the process terminates
p.join()

11


#### N.B.: two different processes/threads should not try to read from or write to the same end of a pipe!

## Synchronization between processes using locks

In [3]:
from multiprocessing import Process, Lock

def print_task(lock):
    lock.acquire()
    # Do something which requires locking,
    # e.g. writing to a file.
    lock.release()


lock = Lock()

for num in range(10):
    Process(target=print_task, args=(lock,)).start()

### <h3>A lot of other, additional modules are available that allow you to exploit different kinds of parallelism. See for instance:</h3>     <a href="https://wiki.python.org/moin/ParallelProcessing" target="_blank">Python Wiki page about Parallel Processing</a>