In [5]:
import numpy as np
import threading
import queue
testArray = np.array([[1, 2], [2, 1]])
np.shape(testArray)

(2, 2)

In [6]:
def simple_NMF(A, k, i):
    m, n = np.shape(A)
    W = np.random.rand(m, k)
    H = np.random.rand(k, n)
    for _ in range(i):
        W = W * (A @ H.T) / (W @ H @ H.T)
        H = H * (W.T @ A) / (W.T @ W @ H)
    return W @ H

simple_NMF(testArray, 2, 1000)

array([[1., 2.],
       [2., 1.]])

In [7]:
def thread_function_w(A, w, h, q, i):
    q.put((i, w * (A @ h.T) / (w @ h @ h.T)))

def thread_function_h(A, w, h, q, i):
    q.put((i, h * (w.T @ A) / (w.T @ w @ h)))

def naive_parallel_NMF(A, k, p, numIter):
    m, n = np.shape(A)
    if m % p > 0:
        raise TypeError('Input first dimension not divisible by number of threads')
    if n % p > 0:
        raise TypeError('Input second dimension not divisible by number of threads')
    w = np.random.rand(m, k)
    h = np.random.rand(k, n)
    a_pieces_1 = [A[int(i*m/p):int((i+1)*m/p), :] for i in range(p)]
    a_pieces_2 = [A[:, int(i*n/p):int((i+1)*n/p)] for i in range(p)]
    
    for _ in range(numIter):
        w_pieces = [w[int(i*m/p):int((i+1)*m/p), :] for i in range(p)] # chop up w
        w_threads = []
        w_queue = queue.Queue()
        for j in range(p):
            newThread = threading.Thread(target = thread_function_w, args = (a_pieces_1[j], w_pieces[j], h, w_queue, j)) # each thread updates one section
            newThread.start()
            w_threads.append(newThread)
        for thread in w_threads: # wait for all threads to complete
            thread.join()
        while not w_queue.empty(): # reconstitute and update w
            i, v = w_queue.get()
            w_pieces[i] = v
        w = np.concatenate(w_pieces, 0)

        h_pieces = [h[:, int(i*n/p):int((i+1)*n/p)] for i in range(p)] # same procedure for h
        h_threads = []
        h_queue = queue.Queue()
        for j in range(p):
            newThread = threading.Thread(target = thread_function_h, args = (a_pieces_2[j], w, h_pieces[j], h_queue, j))
            newThread.start()
            h_threads.append(newThread)
        for thread in h_threads:
            thread.join()
        while not h_queue.empty():
            i, v = h_queue.get()
            h_pieces[i] = v
        h = np.concatenate(h_pieces, 1)
    return w @ h
    
naive_parallel_NMF(testArray, 2, 2, 10000)

array([[1., 2.],
       [2., 1.]])

In [42]:
def vertical_thread_function_u(h, q):
    q.put(h @ h.T)

def vertical_thread_function_v(A, h, q, i, j):
    q.put((i, j, A @ h.T))

def vertical_thread_function_x(w, q, i, j):
    q.put((i, j, w.T @ w))

def vertical_thread_function_y(A, w, q, i, j):
    q.put((i, j, w.T @ A))

def vertical_HPC_NMF(A, k, p1, p2, numIter):
    m, n = np.shape(A)
    if m % (p1*p2) > 0:
        raise TypeError('Input first dimension not divisible by number of threads')
    if n % (p1*p2) > 0:
        raise TypeError('Input second dimension not divisible by number of threads')
    w = np.random.rand(m, k)
    h = np.random.rand(k, n)

    for _ in range(numIter):
        u = np.zeros((k, k)) # temporary, for proof-of-concept
        h_pieces_u = [[np.random.rand(k, int(n/(p1*p2))) for i in range(p1)] for j in range(p2)] # chop up h into full grid
        thread_grid_u = [[] for _ in range(p2)]
        thread_queue_u = queue.Queue()
        for i in range(p2): # keep threads in a p1 by p2 grid, like the paper does
            for j in range(p1):
                newThread = threading.Thread(target = vertical_thread_function_u, args = (h_pieces_u[i][j], thread_queue_u))
                newThread.start()
                thread_grid_u[i].append(newThread)
        for thread in [item for sublist in thread_grid_u for item in sublist]:
            thread.join()
        while not thread_queue_u.empty():
            u += thread_queue_u.get()

        

    print(f"u:\n{u}\n\nv:\n{[]}")
    return

vertical_HPC_NMF(np.zeros((16, 8)), 2, 4, 2, 1)

u:
[[3.11894438 2.92345926]
 [2.92345926 3.2851652 ]]

v:
[]
