In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy.io
import collections
import pandas as pd

In [2]:
def loadMatFile(file_path, file_name, key):
    """
    Load a mat file and return an item of the dictionary loaded.
    """    
    # read mat file dictionary
    dictionary = scipy.io.loadmat(file_path + file_name)
    
    # access item of a dictionary
    array = dictionary[key]
    
    return array

In [3]:
# Load hologram dataset
path = 'C:\\Users\\ferna\\Desktop\\computerGeneratedHolography\\'
# path = 'C:\\Users\\flucasamar\\Desktop\\Github\\computerGeneratedHolography\\'
file_path = path + '\\output\\dataset\\'

file_name = 'hDataset.mat'
key = 'hDataset'

# Load dictionary
dat = loadMatFile(file_path, file_name, key)
print(dat.shape)

# Number of holograms
nb_holograms = dat.shape[2]

# Number of class
nb_class = 5

# Number of holograms per class
nb_holograms_class = int(nb_holograms/nb_class)

print('Total number of holograms: ' + str(nb_holograms))
print('Number of holograms per class: ' + str(nb_holograms_class))

(200, 200, 1500)
Total number of holograms: 1500
Number of holograms per class: 300


In [4]:
# Dimension
rows = dat.shape[0]
columns = dat.shape[1]

# Reshape the dataset so that the first dimension is the number of holograms
dat_r = np.ones([nb_holograms, rows, columns], dtype = complex)
print(dat_r.shape)

for i in range(nb_holograms):
    dat_r[i,:,:] = dat[:,:,i]

(1500, 200, 200)


In [7]:
data_1D = np.reshape(dat_r, (1500, 40000))
print(data_1D.shape)

(1500, 40000)


***Parallel processing in python***

In [8]:
import multiprocessing as mp

In [9]:
print('Number of processors: ', mp.cpu_count())

Number of processors:  4


In [10]:
import numpy as np
from time import time

# Prepare data
np.random.RandomState(100)
arr = np.random.randint(0, 10, size=[200000, 5])
data = arr.tolist()
data[:5]

[[7, 4, 8, 2, 6],
 [3, 5, 5, 4, 4],
 [4, 4, 0, 1, 5],
 [7, 0, 7, 4, 7],
 [9, 5, 9, 3, 6]]

In [11]:
%%time
# Solution Without Paralleization

def howmany_within_range(row, minimum, maximum):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return count

results = []
for row in data:
    results.append(howmany_within_range(row, minimum=4, maximum=8))

print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

[4, 4, 3, 4, 2, 3, 3, 3, 2, 1]
Wall time: 200 ms


In [None]:
%%time
# Parallelizing using Pool.apply()

import multiprocessing as mp

# Step 1: Init multiprocessing.Pool()
pool = mp.Pool(mp.cpu_count())

# Step 2: `pool.apply` the `howmany_within_range()`
results = [pool.apply(howmany_within_range, args=(row, 4, 8)) for row in data]

# Step 3: Don't forget to close
pool.close()    

print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

In [3]:
import workers
import multiprocessing as mp

In [35]:
%%time

if __name__ ==  '__main__': 
    
    num_processors = 3
    p = mp.Pool(processes = num_processors)
    
    output = p.map(workers.worker,[i for i in range(0,100000)])
    #print(output)

Wall time: 324 ms


In [36]:
def w(x):
    return x*x

In [37]:
%%time

output2 = []
for i in range(0,100000):
    output2.append(w(i))
    
#print(output2)

Wall time: 44 ms
