In [5]:
import numpy as np
import threading
import queue
testArray = np.array([[1, 2], [2, 1]])

(2, 2)

In [6]:
def simple_NMF(a, k, i):
    m, n = np.shape(a)
    w = np.random.rand(m, k)
    h = np.random.rand(k, n)
    for _ in range(i):
        w = w * (a @ h.T) / (w @ h @ h.T)
        h = h * (w.T @ a) / (w.T @ w @ h)
    return w @ h

simple_NMF(testArray, 2, 1000)

array([[1., 2.],
       [2., 1.]])

In [54]:
def thread_function_w(a, w, h, q, i):
    q.put((i, w * (a @ h.T) / (w @ h @ h.T)))

def thread_function_h(a, w, h, q, i):
    q.put((i, h * (w.T @ a) / (w.T @ w @ h)))

def naive_parallel_NMF(a, k, p, numIter):
    m, n = np.shape(a)
    if m % p > 0:
        raise TypeError('Input first dimension not divisible by number of threads')
    if n % p > 0:
        raise TypeError('Input second dimension not divisible by number of threads')
    w = np.random.rand(m, k)
    h = np.random.rand(k, n)
    a_pieces_1 = np.split(a, p, 0) # cut a into p pieces of shape m/p x n
    a_pieces_2 = np.split(a, p, 1) # cut a into p pieces of shape m x n/p
    
    for _ in range(numIter):
        w_pieces = np.split(w, p, 0) # cut w into p pieces of shape m/p x n
        w_threads = []
        w_queue = queue.Queue()
        for j in range(p): # split into p threads to calculate updates for each piece
            newThread = threading.Thread(target = thread_function_w, args = (a_pieces_1[j], w_pieces[j], h, w_queue, j))
            newThread.start()
            w_threads.append(newThread)
        for thread in w_threads: # wait for all threads to complete
            thread.join()
        while not w_queue.empty(): # reconstitute and update w
            i, value = w_queue.get()
            w_pieces[i] = value
        w = np.concatenate(w_pieces, 0)

        h_pieces = np.split(h, p, 1) # cut h into p pieces of shape m x n/p
        h_threads = []
        h_queue = queue.Queue()
        for j in range(p): # split into p threads to calculate updates for each piece
            newThread = threading.Thread(target = thread_function_h, args = (a_pieces_2[j], w, h_pieces[j], h_queue, j))
            newThread.start()
            h_threads.append(newThread)
        for thread in h_threads: # wait for all threads to complete
            thread.join()
        while not h_queue.empty(): # reconstitute and update h
            i, v = h_queue.get()
            h_pieces[i] = v
        h = np.concatenate(h_pieces, 1)
    return w @ h
    
naive_parallel_NMF(testArray, 2, 2, 10000)

array([[1., 2.],
       [2., 1.]])

In [85]:
def vertical_thread_function_u(h, q):
    q.put(h @ h.T)

def vertical_thread_function_v(a, h, q, i):
    q.put((i, a @ h.T))

def vertical_thread_function_w(w, u, v, q, i, j):
    q.put(i, j, w * v / (w @ u))

def vertical_thread_function_x(w, q, i, j):
    q.put((i, j, w.T @ w))

def vertical_thread_function_y(a, w, q, i, j):
    q.put((i, j, w.T @ a))

def vertical_HPC_NMF(a, k, p_row, p_col, numIter):
    m, n = np.shape(a)
    if m % (p_row*p_col) > 0:
        raise TypeError('Input first dimension not divisible by number of threads')
    if n % (p_row*p_col) > 0:
        raise TypeError('Input second dimension not divisible by number of threads')
    w = np.random.rand(m, k)
    h = np.random.rand(k, n)

    a_pieces = [np.split(x, p_row, 1) for x in np.split(a, p_row, 0)] # cut a into p_row x p_col pieces of shape m/p_row x n/p_col

    for _ in range(numIter):
        u = np.zeros((k, k))
        h_pieces_u = np.split(h, p_row*p_col, 1) # cut h into p_row*p_col pieces of shape k x n/(p_row*p_col)
        threads_u = []
        thread_queue_u = queue.Queue()
        for i in range(p_row*p_col): # split into p_row*p_col threads to calculate u for each piece
            newThread = threading.Thread(target = vertical_thread_function_u, args = (h_pieces_u[i], thread_queue_u))
            newThread.start()
            threads_u.append(newThread)
        for thread in threads_u: # wait for all threads to complete
            thread.join()
        while not thread_queue_u.empty(): # sum up u
            u += thread_queue_u.get()

        v_pieces = [np.zeros((int(m/p_row), k)) for _ in range(p_row)] # initialize v cut into p_row pieces of shape m/p_row x k
        h_pieces_v = np.split(h, p_row, 1) # cut h into p_row pieces of shape k x n/p_row
        threads_v = []
        thread_queue_v = queue.Queue()
        for i in range(p_row*p_col): # split into p_rpw*p_col threads to calculate updates for each piece
            newThread = threading.Thread(target = vertical_thread_function_v, args = (a_pieces[int(i/p_row)][i%p_row], h_pieces_v[i%p_row], thread_queue_v, i))
            newThread.start()
            threads_v.append(newThread)
        for thread in threads_v:
            thread.join()
        while not thread_queue_v.empty():
            i, val = thread_queue_v.get()
            v_pieces[i%p_row] += val

        x = np.zeros((k, k))
        y = np.zeros((k, n))

    print(f"u:\n{u}\n\nv:\n{np.shape(np.concatenate(v_pieces, 0))}")
    return

vertical_HPC_NMF(np.random.rand(64, 32), 2, 4, 2, 1)

u:
[[12.61798205  9.75981838]
 [ 9.75981838 10.62633052]]

v:
(64, 2)
