# Parallel in Python
The cannonical implemntion of Python, `CPython`, uses a *Global Interpreter Lock*, which only allows one thread to interpret the binaries at a time. Hence it is easier to be multithreaded but cannot take advantage of multiprocessor architecture (or also multicore?). To get around that we need some specialized libraries that instead of using the usual Python threads use *subprocesses*, like `multiprocessing`, see [docs](https://docs.python.org/2/library/multiprocessing.html).

Another possible module is `threading`, but it doesn't have the `p = Pool(processes=4)` which is useful to **parallelize the execution of a function accross different inputs** by then using `

In [1]:
import multiprocessing as mp
def f(x):
    return x*x

p = mp.Pool(4)
res = p.map(f,range(10))
print res

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [22]:
mp.cpu_count()

8

In [20]:
from multiprocessing import Process
import os

def info(title):
    print title
    print 'module name:', __name__
    if hasattr(os, 'getppid'):  # only available on Unix
        print 'parent process:', os.getppid()
    print 'process id:', os.getpid(),'\n'

def f(name):
    info('function f')
    print 'hello', name,'\n----\n'

if __name__ == '__main__':
    info('main line')
    p = Process(target=f, args=([1,2],))
    info('main line')
    p.start()
    info('main line')
    p.join()
    info('main line')
    p.run()
    p.join()

main line
module name: __main__
parent process: 90430
process id: 95479 

main line
module name: __main__
parent process: 90430
process id: 95479 

function f
module name: __main__
parent process: 95479
process id: 95540 

hello [1, 2] 
----

main line
module name: __main__
parent process: 90430
process id: 95479 

main line
module name: __main__
parent process: 90430
process id: 95479 

function f
module name: __main__
parent process: 90430
process id: 95479 

hello [1, 2] 
----



In [6]:
from __future__ import print_function
import numpy as np
import theano
import theano.tensor as TT
import multiprocessing as mp
from contextlib import closing


def new_shared_mem_array(init_val):
    typecode = init_val.dtype.char
    arr = mp.Array(typecode, np.prod(init_val.shape))
    nparr = np.frombuffer(arr.get_obj())
    nparr.shape = init_val.shape
    return nparr


def init_worker(x):
    global x_
    x_ = x
    x_var = theano.shared(np.zeros_like(x), borrow=True)
    x_var.set_value(x_, borrow=True)
    global f_update
    f_update = theano.function(
        inputs=[],
        outputs=[],
        updates=[(x_var, x_var + TT.ones_like(x_var))]
    )


def update_x(*args, **kwargs):
    global f_update
    f_update()


def main():
    x = np.zeros(10)
    x = new_shared_mem_array(x)
    with closing(mp.Pool(initializer=init_worker, initargs=(x,))) as p:
        p.map(update_x, xrange(10000))
    print(x)

main()

[ 10000.  10000.  10000.  10000.  10000.  10000.  10000.  10000.  10000.
  10000.]


In [10]:
a = np.zeros(10, dtype=int)
a.dtype.char

'l'

How to launch different processes simultaneously?? This seems that even without the lock, the processes are started only one after the other!

In [11]:
from multiprocessing import Process, Lock

def f(l, i):
#     l.acquire()
    print 'hello world', i
#     l.release()

if __name__ == '__main__':
    lock = Lock()

    for num in range(10):
        Process(target=f, args=(lock, num)).start()

hello world 0
hello world 1
hello world 2
hello world 3
hello world 4
hello world 5
hello world 6
hello world 7
hello world 8
hello world 9


## Shared memory
It explicitly says "These shared objects will be process and thread-safe."!! Hence there is no way to do hogwild..