<a href="https://colab.research.google.com/github/a0pro0b/Implementing-Multi-threading-And-Multi-processing-In-Data-Science-Github-/blob/main/Implementing%20Multi-threading%20And%20Multi-processing%20In%20Data%20Science.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import time
import cv2
import dlib
import glob
import random
import nltk
import numpy as np
from functools import reduce
import concurrent.futures
import xml.etree.ElementTree as ET
from nltk.stem import PorterStemmer, LancasterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize

Simple Implementaion of Multi Threading

In [None]:
def sleep_fun(seconds):
    print("Sleeping for {} second(s)".format(seconds))
    time.sleep(seconds)

In [None]:
sleep_times = [1,2,3]
start = time.time()
for i in sleep_times:
    sleep_fun(i)
end = time.time()
print("Normal computation: {} secs ".format(end - start))

Sleeping for 1 second(s)
Sleeping for 2 second(s)
Sleeping for 3 second(s)
Normal computation: 6.004661798477173 secs 


In [None]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(sleep_fun, sleep_times) 
end = time.time()
print("Multithreading computation: {} secs ".format(end - start))

Sleeping for 1 second(s)
Sleeping for 2 second(s)Sleeping for 3 second(s)

Multithreading computation: 3.0062460899353027 secs 


Simple Implementation of Multi Procceessing

In [None]:
def calculation(number):
    random_list = random.sample(range(10000000), number)
    return reduce(lambda x, y: x*y, random_list)

In [None]:
numbers = [200000, 200000, 200000]
start = time.time()
for i in numbers:
    result = calculation(i)
end = time.time()
print("Normal computation: {} sec".format(end - start))

Normal computation: 64.69023323059082 sec


In [None]:
start = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(calculation, numbers) 
end = time.time()
print("MultiProcessing computation: {} secs ".format(end - start))

MultiProcessing computation: 57.81829500198364 secs 


Implementing Computer Vision using Multithreading & Multiprocessing

In [None]:
def face_detection(image_path):
    image_name = os.path.basename(image_path)
    image = cv2.imread(image_path)
    face_rect = image.copy()
    faces = face_detector(image)
    if len(faces) !=0:
        for face in faces:
            x1 = face.left()
            y1 = face.top()
            x2 = face.right()
            y2 = face.bottom()
            face_rect = cv2.rectangle(face_rect, (x1,y1), (x2,y2), (255,0,0), 5)
        cv2.imwrite("./Datasets/face_processed/" + image_name, face_rect)

In [None]:
face_detector = dlib.get_frontal_face_detector()
images = list(glob.iglob("./Datasets/face_raw/*.jpg"))
images.sort()
start = time.time()
for i in images:
    face_detection(i)
end = time.time()
print("Normal computation: {} seconds".format(end - start))

Normal computation: 8.20159912109375e-05 seconds


In [None]:
start = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(face_detection, images)    
end = time.time()
print("Multiprocessing computation: {} sec".format(end - start))

Multiprocessing computation: 0.002071380615234375 sec


In [None]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(face_detection, images)  
end = time.time()
print("Multithreading computation: {} sec".format(end - start))

Multithreading computation: 0.00023293495178222656 sec


Implementing NLP using Multithreading & Multiprocessing

In [None]:
def stemSentence(sentence, stemmer):
    token_words=word_tokenize(sentence)
    stem_sentence=[]
    for word in token_words:
        stem_sentence.append(stemmer.stem(word))
        stem_sentence.append(" ")
    return "".join(stem_sentence)
def xml_process(xml_path):
    
    try:
        root = ET.parse(xml_path).getroot()
        posts = []
        file_name = os.path.basename(xml_path)[:-4] + ".txt"
        
        for i in root.iter("post"):
            posts.append(i.text)
        porter = PorterStemmer()    
        sentences = map(lambda x: stemSentence(x, porter), posts)
        
        for i in sentences:
            with open("./Datasets/blog_processed/" + file_name, "a+") as file:
                file.write(i)
                file.write("\n")
    except:
        pass

In [None]:
xml_files = glob.glob("./Datasets/blog_xml/*.xml")
start = time.time()
for i in xml_files:
    xml_process(i)
end = time.time()
print("Normal computation: {} seconds".format(end - start))

Normal computation: 7.295608520507812e-05 seconds


In [None]:
start = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(xml_process, xml_files)    
end = time.time()
print("Multiprocessing computation: {} sec".format(end - start))

Multiprocessing computation: 0.0013365745544433594 sec


In [None]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(xml_process, xml_files)  
end = time.time()
print("Multithreading computation: {} sec".format(end - start))

Multithreading computation: 0.00017404556274414062 sec
