| | Process | Threading |
| - | - | - |
| Definition | Instance of a program | Execution of a process |
| OS | Unit of source distribution\nContext switching, PID | Unit of execution |
| Shared memory | X | O |
| True parallel | O | X |
| Python lib | subprocess, multiprocessing | threading |
| Python feature | Good at both CPU-bound and I/O-bound, but use more memory | Good at I/O bound |

### Subprocess

In [1]:
import subprocess # new version of os.system / os.popen()

subprocess.call(["ls", "-l"])
subprocess.call('sleep 5', shell=True) # non-parallel, no output
print(5)

total 64
-rw-r--r--  1 jameschao  staff  4332 Dec  3 20:18 Iteration_iterator.ipynb
-rw-r--r--  1 jameschao  staff  3635 Dec  3 20:18 Synchronization.ipynb
-rw-r--r--  1 jameschao  staff  7902 Dec  3 20:18 closure_decorators.ipynb
-rw-r--r--  1 jameschao  staff  3574 Dec  3 20:18 parallel_multiprocessing.ipynb
-rw-r--r--  1 jameschao  staff  3047 Dec  3 20:18 parallel_subprocess_threading.ipynb
-rw-r--r--  1 jameschao  staff  2972 Dec  3 20:18 re.ipynb
drwxr-xr-x  8 jameschao  staff   256 Dec  3 20:18 [1m[36msocket[m[m
5


In [2]:
p = subprocess.Popen('sleep 5 && touch a.txt', shell=True) # parallel, no output
# p.wait()
print(5)

5


In [3]:
r = subprocess.check_output('sleep 5 && ls', shell=True) # non-parallel # has output # str(.)[2:-1].split('\\n')
print(r, 5)

b'Iteration_iterator.ipynb\nSynchronization.ipynb\nclosure_decorators.ipynb\nparallel_multiprocessing.ipynb\nparallel_subprocess_threading.ipynb\nre.ipynb\n\x1b[1m\x1b[36msocket\x1b[m\x1b[m\n' 5


### Multiprocessing

In [2]:
import multiprocessing as mp
import time

print(mp.cpu_count()) # process should < cpu_count

4


In [None]:
def square(n):
    time.sleep(1)
    return n*n

inputs = list(range(10))
pool = mp.Pool(processes=10)
pool_outputs = pool.map(square, inputs)
print("parent process do wait child processes")
print(pool_outputs)

parent process do wait child processes
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [None]:
def square(n):
    time.sleep(1)
    return n*n

inputs = list(range(10))
pool = mp.Pool(processes=10)
pool_outputs = pool.map_async(square, inputs)
print("parent process do not wait child processes")
pool.close()
pool.join()
print(pool_outputs.get())

parent process do not wait child processes
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [None]:
def mySum(x,y):
    time.sleep(1)
    return x+y

inputs = [ (i,i+1) for i in range(10) ]
pool = mp.Pool(processes=10)
pool_outputs = pool.starmap(mySum, inputs)
print("parent process do wait child processes")
print(pool_outputs)
print([0, 1, 4, 9, 16, 25, 36, 49, 64, 81])

parent process do wait child processes
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [None]:
def mySum(x,y):
    time.sleep(1)
    return x+y

inputs = [ (i,i+1) for i in range(10) ]
pool = mp.Pool(processes=10)
pool_outputs = pool.starmap_async(mySum, inputs)
print("parent process do not wait child processes")
print(pool_outputs.get())

parent process do not wait child processes
[1, 3, 5, 7, 9, 11, 13, 15, 17, 19]


### Threading and lock
+ lock: owner thread cannot acquire again -> recursion not suitable 
+ rlock: owner thread can acquire again -> recursion suitable
+ semaphore: allow at most n processes enter

In [1]:
import threading, time

def job(t, msg):
    for i in range(5):
        print(msg, end=" ")
        time.sleep(t)

ta = threading.Thread(target=job, args=(0.01,"a"))
tb = threading.Thread(target=job, args=(0.02,"b"))

In [2]:
ta.start()
tb.start()

tb.join()
print("Done")

ab  a a b a a b b b Done


In [3]:
lock = threading.Lock()
def job(t, msg):
    for i in range(5):
        lock.acquire()
        print(msg, end=" ")
        lock.release()
        time.sleep(t)

ta = threading.Thread(target=job, args=(0.01,"a"))
tb = threading.Thread(target=job, args=(0.02,"b"))

In [4]:
ta.start()
tb.start()

tb.join()
print("Done")

a b a a b a a b b b Done


In [9]:
semaphore = threading.Semaphore(2)
lock = threading.Lock()
def job(t, msg):
    semaphore.acquire()
    for i in range(5):
        lock.acquire()
        print(msg, end=" ")
        lock.release()
        time.sleep(t)
    semaphore.release()

In [10]:
ta = threading.Thread(target=job, args=(0.01,"a"))
tb = threading.Thread(target=job, args=(0.02,"b"))
tc = threading.Thread(target=job, args=(0.01,"c"))
td = threading.Thread(target=job, args=(0.02,"d"))
ta.start()
tb.start()
tc.start()
td.start()

a b 

a a b a a b c c b c c b c d d d d d 