In [1]:
from multiprocessing import Pool, cpu_count, Process
import numpy as np
import time
from itertools import repeat
from pprint import pprint
import matplotlib.pyplot as plt
import glob
import os
from PIL import Image, ImageFilter
import threading
import requests

### Example 1: processing multiple images. Mix multiprocessing and multithreading.
The idea is that each processor will work on two images and for each processor two threads will be launched.


In [2]:
def img_proc_thread(img_name, results_path):
    
    img = Image.open(img_name).convert('L') # load image in grayscale
    img = img.resize((img.size[0]*8,img.size[1]*8)) # resize four times bigger

    # img_neg = img.point(lambda i: 255 - i)  # compute the negative of each pixel (lambda is an intrisic function)
    img_fil = img.filter(ImageFilter.FIND_EDGES) # fine edges in the image
        
    img_fil.save(os.path.join(results_path,img_names[i].split('/')[-1]))

In [3]:
def img_filter(img_names, idx, results_path):
    threads_list = []
    for i in idx:
        img_to_thread = threading.Thread(target=img_proc_thread, args=(img_names[i], results_path))
        threads_list.append(img_to_thread)
        
    for eachThread in threads_list:
        eachThread.start()
    
    for eachThread in threads_list:
        eachThread.join()

In [4]:
# Define the path for the images
img_path = './data'
img_names = sorted(glob.glob(os.path.join(img_path,'*.png')))
results_path = './results'

idx = list(range(len(img_names))) # we want to process all img in data folder
pprint(img_names)

['./data/barbara_gray512.png',
 './data/boats_gray.png',
 './data/goldhill_gray.png',
 './data/horse.png',
 './data/lena_gray_512.png',
 './data/lighthouse_gray.png',
 './data/mandrill_gray.png',
 './data/peppers_gray.png']


In [5]:
# Parallel processing:
NUM_PROC = 4 # number of processors in PC to be used
# NUM_PROC = cpu_count() # number of processors in PC to be used
imgs_per_proc = (len(idx)//NUM_PROC)

t2 = time.time()
jobs = []
for i in range(NUM_PROC):
    print(f'Processor {i}: img_names{idx[i*imgs_per_proc:(i+1)*imgs_per_proc]}')
    # each process will execute the target function in the same number of images.
    process = Process(target=img_filter, args=(img_names, idx[i*imgs_per_proc:(i+1)*imgs_per_proc], results_path))
    jobs.append(process)

for j in jobs:
    j.start()

for j in jobs:
    j.join()  

tp = time.time() - t2  
print("Parallel execution time: ", tp)

Processor 0: img_names[0, 1]
Processor 1: img_names[2, 3]
Processor 2: img_names[4, 5]
Processor 3: img_names[6, 7]
Parallel execution time:  1.8403654098510742


### Example 2: downloading images using multithreading.

In [6]:
images = [
    	# Photo credits: https://unsplash.com/photos/IKUYGCFmfw4 
    	'https://images.unsplash.com/photo-1509718443690-d8e2fb3474b7',

    	# Photo credits: https://unsplash.com/photos/vpOeXr5wmR4
    	'https://images.unsplash.com/photo-1587620962725-abab7fe55159',

    	# Photo credits: https://unsplash.com/photos/iacpoKgpBAM
    	'https://images.unsplash.com/photo-1493119508027-2b584f234d6c',

    	# Photo credits: https://unsplash.com/photos/b18TRXc8UPQ
    	'https://images.unsplash.com/photo-1482062364825-616fd23b8fc1',

    	# Photo credits: https://unsplash.com/photos/XMFZqrGyV-Q
    	'https://images.unsplash.com/photo-1521185496955-15097b20c5fe',

    	# Photo credits: https://unsplash.com/photos/9SoCnyQmkzI
    	'https://images.unsplash.com/photo-1510915228340-29c85a43dcfe',
]

def http_get_request(url, file_name):
    req = requests.get(url)
    f = open(file_name, "wb")
    f.write(req.content)
    f.close()

In [7]:
# Serial processing:
start_time = time.time()
for i in range(len(images)):
    file_name = 'img' + str(i) + '.jpg'
    http_get_request(images[i], file_name)
ts = time.time() - start_time
print("Serial execution time: ", ts)

Serial execution time:  10.49658989906311


In [8]:
# Parallel processing using multi-threading: 
threads_list = []

start_time = time.time()
for i in range(len(images)):
    file_name = 'img' + str(i) + '.jpg'
    download_url = images[i]
    url_hilo = threading.Thread(target= http_get_request, args=(download_url, file_name))
    threads_list.append(url_hilo)
    threads_list[i].start()

for eachThread in threads_list:
    eachThread.join()

tp_mth = time.time() - start_time
print("Parallel execution time: ", tp_mth)

Parallel execution time:  4.777956008911133


In [9]:
print("Speed-up (multi-threading): ", ts/tp_mth)

Speed-up (multi-threading):  2.196878723765232


In [10]:
# Parallel processing using multi-processing: 
processes_list = []

start_time = time.time()
for i in range(len(images)):
    file_name = 'img' + str(i) + '.jpg'
    download_url = images[i]
    url_hilo = Process(target= http_get_request, args=(download_url, file_name))
    processes_list.append(url_hilo)
    processes_list[i].start()

for eachProcess in processes_list:
    eachProcess.join()

tp_mp = time.time() - start_time
print("Parallel execution time: ", tp_mp)

Parallel execution time:  5.518742799758911


In [11]:
print("Speed-up (multi-processing): ", ts/tp_mp)

Speed-up (multi-processing):  1.9019893261779945
