In [1]:
import time
import operator
import functools
import dask 
import dask.array as da
import numpy as np 

In [2]:
REPEAT_OLD = False

## warmstart vector x0

In [3]:
def cg_init_dsk(A, b, state0, x_init=None):
    x0, r0, p0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p'))
    scal_x = float(x_init is not None)
    vec_x = b if x_init is None else x_init
    def init_x(x_or_b): 
        return scal_x * x_or_b
    def init_p(ri): return 1 * ri
    dsk = dict()
    vblocks, hblocks = A.numblocks
    for i in range(vblocks):
        dsk[(x0, i)] = (init_x, (vec_x.name, i))
        dsk[(r0, i)] = (operator.sub,
                (da.core.dotmany, [(A.name, i, j) for j in range(hblocks)], [(x0, j) for j in range(hblocks)]),
                (b.name, i))
        dsk[(p0, i)] = (init_p, (r0, i))
    return dsk

In [4]:
def cg_calcs_proto(shape, chunks, dtype, dsk, key):
    x = da.Array(dsk, 'x-' + key, shape=shape, chunks=chunks, dtype=dtype)
    r = da.Array(dsk, 'r-' + key, shape=shape, chunks=chunks, dtype=dtype)
    p = da.Array(dsk, 'p-' + key, shape=shape, chunks=chunks, dtype=dtype)
    (x, r, p) = dask.persist(x, r, p, optimize_graph=False, traverse=False)
    (res,) = dask.compute(da.linalg.norm(r))
    return x, r, p, res

In [5]:
m, mc = 1000, 100
Arand = np.random.random((m, m))
Arand = Arand.T.dot(Arand)
brand, xrand = np.random.random(m), np.random.random(m)
AA = da.from_array(Arand, chunks=mc, name='A')
bb = da.from_array(brand, chunks=mc, name='b')
xx = da.from_array(xrand, chunks=mc, name='x0')
cg_calcs = functools.partial(cg_calcs_proto, bb.shape, bb.chunks, bb.dtype)

In [6]:
dsk = dask.sharedict.merge(AA.dask, bb.dask, cg_init_dsk(AA, bb, 'cg-iter0'))

In [7]:
x, r, p, res = cg_calcs(dsk, 'cg-iter0')

In [8]:
dsk = dask.sharedict.merge(AA.dask, bb.dask, xx.dask, cg_init_dsk(AA, bb, 'cg-iter0', x_init=xx))

In [9]:
x, r, p, res = cg_calcs(dsk, 'cg-iter0')

In [10]:
dsk = cg_init_dsk(AA, bb, 'cg-iter0', x_init=xx)
dasks = [AA.dask, bb.dask, dsk]
dasks += [xx.dask]
dsk = dask.sharedict.merge(*dasks)
x, r, p, res = cg_calcs(dsk, 'cg-iter0')

## linear operator A 

In [11]:
def graph_dot(A, key_in, key_out):
    """ For A <: dask.array.Array """
    vblocks, hblocks = A.numblocks
    dsk = dict()
    for i in range(vblocks):
        dsk[(key_out, i)] = (
                da.core.dotmany,
                [(A.name, i, j) for j in range(hblocks)],
                [(key_in, j) for j in range(hblocks)])
    return dsk

In [12]:
def graph_gemv(key_alpha, A, key_x, key_beta, key_y, key_out):
    """ For A <: dask.array.Array """
    vblocks, hblocks = A.numblocks
    key_Ax = A.name + '-mul-' + key_x
    dsk = dict()
    def gemv(alpha, Axi, beta, yi): return alpha * Axi + beta * yi
    for i in range(vblocks):
        dsk[(key_out, i)] = (gemv, key_alpha, (key_Ax, i), key_beta, (key_y, i))
    dsk.update(graph_dot(A, key_x, key_Ax))
    return dsk

In [13]:
def cg_init_graph1(A, b, state0, x_init=None):
    x0, r0, p0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p'))
    scal_x = float(x_init is not None)
    vec_x = b if x_init is None else x_init
    def init_x(x_or_b): return scal_x * x_or_b
    def init_p(ri): return 1 * ri
    dsk = dict()
    vblocks, hblocks = A.numblocks
    dsk_Ax = graph_dot(A, x0, 'Ax')
    for i in range(vblocks):
        dsk[(x0, i)] = (init_x, (vec_x.name, i))
        dsk[(p0, i)] = (init_p, (r0, i))
        dsk[(r0, i)] = (operator.sub, dsk_Ax[('Ax', i)], (b.name, i))
    return dsk

In [14]:
dsk = cg_init_graph1(AA, bb, 'cg-iter0', x_init=xx)
dasks = [AA.dask, bb.dask, dsk]
dasks += [xx.dask]
dsk = dask.sharedict.merge(*dasks)
x, r, p, res = cg_calcs(dsk, 'cg-iter0')

In [15]:
def cg_init_graph2(A, b, state0, x_init=None):
    x0, r0, p0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p'))
    scal_x = float(x_init is not None)
    vec_x = b if x_init is None else x_init
    def init_x(x_or_b): return scal_x * x_or_b
    def init_p(ri): return 1 * ri
    dsk = dict()
    vblocks, hblocks = A.numblocks
    for i in range(vblocks):
        dsk[(x0, i)] = (init_x, (vec_x.name, i))
        dsk[(p0, i)] = (init_p, (r0, i))
    dsk.update(graph_gemv(1, A, x0, -1, b.name, r0))
    return dsk

In [16]:
dsk = cg_init_graph2(AA, bb, 'cg-iter0', x_init=xx)
dasks = [AA.dask, bb.dask, dsk]
dasks += [xx.dask]
dsk = dask.sharedict.merge(*dasks)
x, r, p, res = cg_calcs(dsk, 'cg-iter0')

In [17]:
def cg_iterate_graph1(A, state0, state1):
    Ap, pAp = 'Ap-' + state0, 'pAp-' + state0
    x0, r0, p0, gamma0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p', 'gamma'))
    x1, r1, p1, gamma1 = map(lambda nm: nm + '-' + state1, ('x', 'r', 'p', 'gamma'))
    def update_x(x, gamma, pAp, p): return x + (gamma / pAp) * p
    def update_r(r, gamma, pAp, Ap): return r - (gamma / pAp) * Ap
    def update_p(p, gamma, gamma_next, r): return r + (gamma_next / gamma) * p
    dsk = dict()
    vblocks, hblocks = A.numblocks
    dsk.update(graph_dot(A, p0, Ap))
    dsk[gamma0] = (da.core.dotmany, [(r0, i) for i in range(vblocks)], [(r0, i) for i in range(vblocks)])
    dsk[pAp] = (da.core.dotmany, [(p0, i) for i in range(vblocks)], [(Ap, i) for i in range(vblocks)])
    for i in range(vblocks):
        dsk[(x1, i)] = (update_x, (x0, i), gamma0, pAp, (p0, i))
        dsk[(r1, i)] = (update_r, (r0, i), gamma0, pAp, (Ap, i))
        dsk[(p1, i)] = (update_p, (p0, i), gamma0, gamma1, (r1, i))
    dsk[gamma1] = (da.core.dotmany, [(r1, i) for i in range(vblocks)], [(r1, i) for i in range(vblocks)])
    return dsk

In [18]:
dsk = cg_iterate_graph1(AA, 'cg-iter0', 'cg-iter1')
dasks = [AA.dask, x.dask, r.dask, p.dask, dsk]
dsk = dask.sharedict.merge(*dasks)
x, r, p, res = cg_calcs(dsk, 'cg-iter1')

### preconditioned CG

In [19]:
def cg_init_graph3(A, b, state0, x_init=None):
    M = A.preconditioner
    A = A.linear_operator
    x0, r0, p0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p'))
    scal_x = float(x_init is not None)
    vec_x = b if x_init is None else x_init
    def init_x(x_or_b): return scal_x * x_or_b
    def init_p(ri): return 1 * ri
    dsk = dict()
    vblocks, hblocks = A.numblocks
    for i in range(vblocks):
        dsk[(x0, i)] = (init_x, (vec_x.name, i))
        dsk[(p0, i)] = (init_p, (r0, i))
    dsk.update(graph_gemv(1, A, x0, -1, b.name, r0))
    dsk.update(graph_dot(M, r0, p0))
    return dsk

In [20]:
def list_blocks(nm, blocks=0): return [(nm, i) for i in range(blocks)]
def dict_blocks(dictionary, nm, blocks=0): return [dictionary[(nm, i)] for i in range(blocks)]
    
def cg_iterate_graph2(A, state0, state1):
    M = A.preconditioner
    A = A.linear_operator
    Ap, pAp = 'Ap-' + state0, 'pAp-' + state0
    x0, r0, p0, rMr0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p', 'rMr'))
    x1, r1, p1, rMr1 = map(lambda nm: nm + '-' + state1, ('x', 'r', 'p', 'rMr'))
    def update_x(x, rMr, pAp, p): return x + (rMr / pAp) * p
    def update_r(r, rMr, pAp, Ap): return r - (rMr / pAp) * Ap
    def update_p(p, rMr, rMr_next, Mr): return Mr + (rMr_next / rMr) * p
    dsk = dict()    
    vblocks, hblocks = A.numblocks
    get_blocks = functools.partial(list_blocks, blocks=vblocks)
    get_blocks_d = functools.partial(dict_blocks, blocks=vblocks)
    dsk.update(graph_dot(A, p0, Ap))
    dsk_Mr0 = graph_dot(M, r0, 'Mr0')
    dsk_Mr1 = graph_dot(M, r1, 'Mr1')
    dsk[rMr0] = (da.core.dotmany, get_blocks(r0), get_blocks_d(dsk_Mr0, 'Mr0'))
    dsk[rMr1] = (da.core.dotmany, get_blocks(r1), get_blocks_d(dsk_Mr1, 'Mr1'))
    dsk[pAp] = (da.core.dotmany, get_blocks(p0), get_blocks(Ap))
    for i in range(vblocks):
        dsk[(x1, i)] = (update_x, (x0, i), rMr0, pAp, (p0, i))
        dsk[(r1, i)] = (update_r, (r0, i), rMr0, pAp, (Ap, i))
        dsk[(p1, i)] = (update_p, (p0, i), rMr0, rMr1, dsk_Mr1[('Mr1', i)])
    return dsk

In [21]:
MM = da.from_array(np.diag(1. / np.diag(Arand)), chunks=AA.chunks)

In [22]:
class PrecondLinop:
    def __init__(self, linop, preconditioner):
        self.linear_operator = linop
        self.preconditioner = preconditioner

P = PrecondLinop(AA, MM)

In [23]:
dsk = cg_init_graph3(P, bb, 'cg-iter0', x_init=xx)
dasks = [AA.dask, MM.dask, bb.dask, dsk]
dasks += [xx.dask]
dsk = dask.sharedict.merge(*dasks)
x, r, p, res = cg_calcs(dsk, 'cg-iter0')

In [24]:
dsk = cg_iterate_graph2(P, 'cg-iter0', 'cg-iter1')
dasks = [AA.dask, MM.dask, x.dask, r.dask, p.dask, dsk]
dsk = dask.sharedict.merge(*dasks)
x, r, p, res = cg_calcs(dsk, 'cg-iter1')

In [25]:
def iter_options(graph_iters=1, verbose=0, print_iters=0, time_iters=0):
    graph_iters = max(1, int(graph_iters))
    time_iters = max(0, int(time_iters))
    if int(print_iters) < 1 and verbose > 0:
        print_iters = max(0, max(int(print_iters), int(10**(3 - verbose))))
    if print_iters > 0:
        print_iters = max(print_iters, graph_iters)
    return graph_iters, print_iters, time_iters

def cg_graph_dense(A, M, b, x_init=None, tol=1e-5, maxiter=500, **options):
    P = PrecondLinop(A, M)
    cg_calcs = functools.partial(cg_calcs_proto, b.shape, b.chunks, b.dtype)
    graph_iters, print_iters, time_iters = iter_options(**options)
    key_init = 'cg-iter0'
    dsk = dask.sharedict.merge(A.dask, M.dask, b.dask, cg_init_graph3(P, b, key_init, x_init=x_init))
    x, r, p, res = cg_calcs(dsk, key_init)
    if time_iters > 0:
        start = time.time()
    dsk = dict()
    for i in range(1, maxiter + 1):
        key0 = 'cg-iter{}'.format(i - 1)
        key1 = 'cg-iter{}'.format(i)
        calculate = bool(i % graph_iters == 0)
        dsk.update(cg_iterate_graph2(P, key0, key1))
        if calculate:
            dsk = dask.sharedict.merge(A.dask, M.dask, x.dask, r.dask, p.dask, dsk)
            x, r, p, res = cg_calcs(dsk, key1)
            dsk = dict()
            if print_iters > 0 and i % print_iters == 0:
                print '\t\t\t{}: residual = {:.1e}'.format(i, res)
            if res < tol:
                break
        if time_iters > 0 and i % time_iters == 0:
            print '{}: {:.1e} seconds'.format(i, time.time() - start)
            start = time.time()
    if i == maxiter:
        dsk = dask.sharedict.merge(A.dask, M.dask, x.dask, r.dask, p.dask, dsk)
        x, _, _, res = cg_calcs(dsk, key1)
    return x, res, i

In [26]:
diagA = 0.5 + np.sqrt(range(1, m + 1))
Acg = (
    np.eye(m, m, -1) 
    + np.eye(m, m, 1) 
    + np.eye(m, m, -m/10)
    + np.eye(m, m, m/10)
    + np.diag(diagA))
Mcg = np.diag(1. / diagA)

In [27]:
AA = da.from_array(Acg, chunks=mc)
MM = da.from_array(Mcg, chunks=mc)
II = da.from_array(np.eye(m), chunks=mc)

In [28]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph_dense(AA, MM, bb, verbose=2, time_iters=5, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [29]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph_dense(AA, II, bb, verbose=2, time_iters=5, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [30]:
import sys
import os
PROJECT_PATH = os.path.realpath(os.path.join(os.getcwd(), '..'))
print PROJECT_PATH
sys.path.append(PROJECT_PATH)

/Users/Baris/Documents/Thesis/modules/scs-dask


In [31]:
from scs_dask.linalg import linear_operator as linop
from scs_dask.linalg import atoms2

In [134]:
import multipledispatch

namespace_atoms2 = dict()
dispatch = functools.partial(multipledispatch.dispatch, namespace=namespace_atoms2)

@dispatch(da.Array, str, str)
def graph_dot(array, input_key, output_key, transpose=False, **options):
    """ TODO: docstring """
    matvec = functools.partial(da.core.dotmany, leftfunc=np.transpose) if transpose else da.core.dotmany
    def Aij(i, j): return (array.name, j, i) if transpose else (array.name, i, j)
    blocks_out, blocks_in = array.numblocks[::-1] if transpose else array.numblocks
    dsk = dict()
    for i in range(blocks_out):
        dsk[(output_key, i)] = (matvec,
                             [Aij(i, j) for j in range(blocks_in)],
                             [(input_key, j) for j in range(blocks_in)])
    return dsk

@dispatch(linop.DLODense, str, str)
def graph_dot(dense_op, input_key, output_key, transpose=False, **options):
    """ Implementation of :func:`graph_dot` for a dense linear operator.
    """
    return graph_dot(dense_op.data, input_key, output_key, transpose=transpose)

@dispatch(linop.DLODiagonal, str, str)
def graph_dot(diag_op, input_key, output_key, **options):
    """ Implementation of :func:`graph_dot` for a diagonal linear operator.
    """
    vec = diag_op.data
    dsk = dict()
    for i in range(vec.numblocks[0]):
        dsk[(output_key, i)] = (operator.mul, (vec.name, i), (input_key, i))
    return dsk

@dispatch(linop.DLOGram, str, str)
def graph_dot(gram_op, input_key, output_key, **options):
    """ Implementation of :func:`graph_dot` for a gram operator.
    """
    mid_key = gram_op.name + '-gramA-' + input_key
    dsk = graph_dot(gram_op.data, input_key, mid_key, transpose=gram_op.transpose)
    dsk.update(graph_dot(gram_op.data, mid_key, output_key, transpose=(not gram_op.transpose)))
    return dsk

@dispatch(linop.DLORegularizedGram, str, str)
def graph_dot(gram_op, input_key, output_key, **options):
    """ Implementation of :func:`graph_dot` for a regularized operator.
    """
    mid_key = gram_op.name + '-gramAA-' + input_key
    def wrap_gram(data):
        return data if isinstance(data, linop.DLOGram) else linop.DLOGram(data, transpose=gram_op.transpose)
    def add_regularization(AAxi, xi):
        return AAxi + gram_op.regularization * xi

    dsk = graph_dot(wrap_gram(gram_op.data), input_key, mid_key)
    for i in range(gram_op.numblocks[0]):
        dsk[(output_key, i)] = (add_regularization, (mid_key, i), (input_key, i))
    return dsk


def list_blocks(nm, blocks=0): return [(nm, i) for i in range(blocks)]
def dict_blocks(dictionary, nm, blocks=0): return [dictionary[(nm, i)] for i in range(blocks)]

def cg_init_graph(A, b, state0, x_init=None, M=None, M12=None):
    if x_init is not None and M is not None and M12 is None:
        raise ValueError('warm start (x0) and preconditioner (M) given, M^{1/2} required')
    x0, r0, p0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p'))
    scal_x = float(x_init is not None)
    vec_x = b if x_init is None else x_init
    def init_x(x_or_b): return scal_x * x_or_b
    def init_p(ri): return 1 * ri
    dsk = dict()
    vblocks, hblocks = A.numblocks
    for i in range(vblocks):
        dsk[(x0, i)] = (init_x, (vec_x.name, i))
    dsk.update(graph_gemv(-1, A, x0, 1, b.name, r0))
    if M is None:
        for i in range(vblocks):
            dsk[(p0, i)] = (init_p, (r0, i))
    else:
        dsk.update(graph_dot(M, r0, p0))
    return dsk

def cg_iterate_graph(A, state0, state1, M=None):
    Ap, pAp = 'Ap-' + state0, 'pAp-' + state0
    x0, r0, p0, rMr0 = map(lambda nm: nm + '-' + state0, ('x', 'r', 'p', 'rMr'))
    x1, r1, p1, rMr1 = map(lambda nm: nm + '-' + state1, ('x', 'r', 'p', 'rMr'))
    def update_x(x, rMr, pAp, p): return x + (rMr / pAp) * p
    def update_r(r, rMr, pAp, Ap): return r - (rMr / pAp) * Ap
    def update_p(p, rMr, rMr_next, Mr): return Mr + (rMr_next / rMr) * p
    dsk = dict()
    vblocks, hblocks = A.numblocks
    get_blocks = functools.partial(list_blocks, blocks=vblocks)
    get_blocks_d = functools.partial(dict_blocks, blocks=vblocks)
    dsk.update(graph_dot(A, p0, Ap))
    if M is None:
        dsk_Mr0 = {('Mr0', i): (r0, i) for i in range(vblocks)}
        dsk_Mr1 = {('Mr1', i): (r1, i) for i in range(vblocks)}
    else:
        dsk_Mr0 = graph_dot(M, r0, 'Mr0')
        dsk_Mr1 = graph_dot(M, r1, 'Mr1')
    dsk[rMr0] = (da.core.dotmany, get_blocks(r0), get_blocks_d(dsk_Mr0, 'Mr0'))
    dsk[rMr1] = (da.core.dotmany, get_blocks(r1), get_blocks_d(dsk_Mr1, 'Mr1'))

    dsk[pAp] = (da.core.dotmany, get_blocks(p0), get_blocks(Ap))
    for i in range(vblocks):
        dsk[(x1, i)] = (update_x, (x0, i), rMr0, pAp, (p0, i))
        dsk[(r1, i)] = (update_r, (r0, i), rMr0, pAp, (Ap, i))
        dsk[(p1, i)] = (update_p, (p0, i), rMr0, rMr1, dsk_Mr1[('Mr1', i)])
    return dsk

def cg_calcs_proto(shape, chunks, dtype, dsk, key, optimize=False, **options):
    if options.pop('finish', False):
        dsk_final = dict()
        key_final = 'cg-output'
        for i in range(len(chunks[0])):
            dsk_final[('x-' + key_final, i)] = ('x-' + key, i)
            dsk_final[('r-' + key_final, i)] = ('r-' + key, i)
            dsk_final[('p-' + key_final, i)] = ('p-' + key, i)
            dsk = dask.sharedict.merge(dsk, dsk_final)
        key = key_final
    x = da.Array(dsk, 'x-' + key, shape=shape, chunks=chunks, dtype=dtype)
    r = da.Array(dsk, 'r-' + key, shape=shape, chunks=chunks, dtype=dtype)
    p = da.Array(dsk, 'p-' + key, shape=shape, chunks=chunks, dtype=dtype)
    if optimize:
        (x, r, p) = dask.optimize(x, r, p)
    (x, r, p) = dask.persist(x, r, p, optimize_graph=False, traverse=False)
    (res,) = dask.compute(da.linalg.norm(r))
    return x, r, p, res

def cg_graph(A, b, preconditioner=None, x_init=None, tol=1e-5, maxiter=500, **options):
    M = preconditioner
    M12 = options.pop('preconditioner12', None)
    optimize = options.pop('optimize', False)
    cg_calcs = functools.partial(cg_calcs_proto, b.shape, b.chunks, b.dtype, optimize=optimize)
    graph_iters, print_iters, time_iters = iter_options(**options)
    key_init = 'cg-iter0'
    dsk = cg_init_graph(A, b, key_init, x_init=x_init, M=M, M12=M12)
    dsks = [A.dask, b.dask, dsk]
    dsks += [x_init.dask] if x_init is not None else []
    dsks += [M.dask] if M is not None else []
    x, r, p, res0 = cg_calcs(dask.sharedict.merge(*dsks), key_init, **options)
    if res0 < tol or maxiter == 0:
        return x, res0, 0

    if time_iters > 0:
        start = time.time()
    dsk = dict()
    for i in range(1, maxiter + 1):
        key0 = 'cg-iter{}'.format(i - 1)
        key1 = 'cg-iter{}'.format(i)
        calculate = bool(i % graph_iters == 0)
        dsk.update(cg_iterate_graph(A, key0, key1, M=M))
        if calculate:
            dsks = [A.dask, x.dask, r.dask, p.dask, dsk]
            dsks += [M.dask] if M is not None else []
            x, r, p, res = cg_calcs(dask.sharedict.merge(*dsks), key1)
            dsk = dict()
            if print_iters > 0 and i % print_iters == 0:
                print '\t\t\t{}: residual = {:.1e}'.format(i, res)
            if res < tol:
                break
        if time_iters > 0 and i % time_iters == 0:
            print '{}: {:.1e} seconds'.format(i, time.time() - start)
            start = time.time()
    dsks = [x.dask, r.dask, p.dask]
    dsks += [A.dask, dsk] if i == maxiter else []
    dsks += [M.dask] if (i == maxiter and M is not None) else []
    x, _, _, res = cg_calcs(dask.sharedict.merge(*dsks), key1, finish=True)
    return x, res, i

In [135]:
AA = da.from_array(Acg, chunks=mc, name='A')
bb = da.from_array(np.ones(m), chunks=mc, name='b')
mdiag = da.from_array(1. / diagA, chunks=mc, name='M')
MM = linop.DLODiagonal(mdiag)
print "done"

done


In [136]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(AA, bb, time_iters=10, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [137]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(AA, bb, time_iters=10, graph_iters=10, optimize=True)
    print "TOTAL", time.time() - start, iters

In [138]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(AA, bb, preconditioner=MM, verbose=2, time_iters=5, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [139]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(linop.DLODense(AA), bb, preconditioner=MM, verbose=2, time_iters=5, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [140]:
Apy = np.random.normal(0, 1. / np.sqrt(m), (m + mc, m))
AApy = Apy.T.dot(Apy)
A_tall = da.from_array(Apy, chunks=mc, name='Atall')
AA_dense = linop.DLODense(da.from_array(AApy, chunks=mc), name='gram2daskmat')
AA_dlo = linop.DLOGram(da.from_array(Apy, chunks=mc), name='mat2daskgram')

In [141]:
dsk = graph_dot(AA_dense, bb.name, 'x_out', transpose=True)
dsk = dask.sharedict.merge(AA_dense.dask, bb.dask, dsk)
x_out = da.Array(dsk, 'x_out', dtype=bb.dtype, chunks=bb.chunks, shape=bb.shape)
x_out.persist()

dask.array<x_out, shape=(1000,), dtype=float64, chunksize=(100,)>

In [142]:
dsk = graph_dot(A_tall, bb.name, 'x_out')
dsk = dask.sharedict.merge(A_tall.dask, bb.dask, dsk)
x_out = da.Array(dsk, 'x_out', dtype=A_tall.dtype, chunks=(A_tall.chunks[0],), shape=(A_tall.shape[0],))
x_out.persist()

dask.array<x_out, shape=(1100,), dtype=float64, chunksize=(100,)>

In [143]:
dsk = graph_dot(A_tall, x_out.name, 'y_out', tranpose=True)
dsk = dask.sharedict.merge(A_tall.dask, x_out.dask, dsk)
y_out = da.Array(dsk, 'y_out', dtype=A_tall.dtype, chunks=(A_tall.chunks[1],), shape=(A_tall.shape[1],))
y_out.persist()

dask.array<y_out, shape=(1000,), dtype=float64, chunksize=(100,)>

In [144]:
dsk = cg_init_graph(AA_dense, bb, 'cg-iter0', M=None)
dsk = dask.sharedict.merge(AA_dense.dask, bb.dask, dsk)
x_out = da.Array(dsk, 'x-cg-iter0', dtype=bb.dtype, chunks=bb.chunks, shape=bb.shape)
x_out.persist()

dask.array<x-cg, shape=(1000,), dtype=float64, chunksize=(100,)>

In [145]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(AA_dense, bb, print_iters=10, time_iters=10, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [146]:
get_array = functools.partial(da.Array, dtype=bb.dtype, chunks=bb.chunks, shape=bb.shape)
dsk = cg_init_graph(AA_dlo, bb, 'cg-iter0', M=None)
dsk = dask.sharedict.merge(AA_dlo.dask, bb.dask, dsk)
x = get_array(dsk, 'x-cg-iter0')
r = get_array(dsk, 'r-cg-iter0')
p = get_array(dsk, 'p-cg-iter0')
(x, r, p) = dask.persist(x, r, p)

In [147]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, print_iters=10, time_iters=10, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [148]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, print_iters=10, time_iters=10, graph_iters=30, optimize=True)
    print "TOTAL", time.time() - start, iters

In [149]:
if REPEAT_OLD:
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, print_iters=10, time_iters=10, graph_iters=30)
    print "TOTAL", time.time() - start, iters

In [150]:
if REPEAT_OLD:
    MM_dlo = linop.DLODiagonal(da.from_array(1. / np.diag(AApy), chunks=mc))
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, preconditioner=MM_dlo, print_iters=10, time_iters=10, graph_iters=30)
    print "TOTAL", time.time() - start, iters

In [151]:
if REPEAT_OLD:
    AA_dlo = linop.DLORegularizedGram(A_tall)
    MM_dlo = linop.DLODiagonal(da.from_array(1. / (1 + np.diag(AApy)), chunks=mc))
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, preconditioner=MM_dlo, print_iters=10, time_iters=10, graph_iters=30)
    print "TOTAL", time.time() - start, iters

In [152]:
if REPEAT_OLD:
    AA_dlo = linop.DLORegularizedGram(A_tall)
    MM_dlo = linop.DLODiagonal(da.from_array(1. / (1 + np.diag(AApy)), chunks=mc))
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, preconditioner=MM_dlo, print_iters=10, time_iters=10, graph_iters=10)
    print "TOTAL", time.time() - start, iters

In [153]:
if REPEAT_OLD:
    AA_dlo = linop.DLORegularizedGram(A_tall)
    MM_dlo = linop.DLODiagonal(da.from_array(1. / (1 + np.diag(AApy)), chunks=mc))
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, preconditioner=MM_dlo, print_iters=10, time_iters=10, graph_iters=20)
    print "TOTAL", time.time() - start, iters

In [154]:
if REPEAT_OLD:
    AA_dlo = linop.DLORegularizedGram(A_tall)
    MM_dlo = linop.DLODiagonal(da.from_array(1. / (1 + np.diag(AApy)), chunks=mc))
    start = time.time()
    x, res, iters = cg_graph(AA_dlo, bb, preconditioner=MM_dlo, print_iters=10, time_iters=10, graph_iters=5)
    print "TOTAL", time.time() - start, iters

### warm start

- ensure variables built correctly

In [155]:
if REPEAT_OLD:
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    dsk = cg_init_graph(AA_dlo, bb, 'cg-iter0', x_init=x, M=None, M12=None)

    build_array = functools.partial(da.Array, shape=x.shape, chunks=x.chunks, dtype=x.dtype)
    build_array_m = functools.partial(da.Array, shape=(A_tall.shape[0],), chunks=(A_tall.chunks[0],), dtype=x.dtype)
    dmerge = dask.sharedict.merge(AA_dlo.dask, bb.dask, x.dask, dsk)
    x0 = build_array(dmerge, 'x-cg-iter0')
    assert da.linalg.norm(x - x0).compute() < 1e-15 * (1 + np.sqrt(x0.size))
    Ax0 = build_array_m(dmerge, 'DLO-gram-Atall-gramA-x-cg-iter0')
    Ax0d = A_tall.dot(x)
    assert da.linalg.norm(Ax0d - Ax0).compute() < 1e-15 * (1 + np.sqrt(Ax0.size))
    AAx0 = build_array(dmerge, 'DLO-regularized-gram-Atall-gramAA-x-cg-iter0')
    AAx0d = A_tall.T.dot(Ax0d)
    assert da.linalg.norm(AAx0d - AAx0).compute() < 1e-15 * (1 + np.sqrt(AAx0.size))
    IAAx0 = build_array(dmerge, 'DLO-regularized-gram-Atall-mul-x-cg-iter0')
    IAAx0d = x + AAx0d
    assert da.linalg.norm(IAAx0d - IAAx0).compute() < 1e-15 * (1 + np.sqrt(IAAx0.size))
    r0 = build_array(dmerge, 'r-cg-iter0')
    r0d = IAAx0d - bb
    assert da.linalg.norm(r0 - r0d).compute() < 1e-15 * (1 + np.sqrt(r0.size))
    print 'all pass'

- ensure residual small on warmstart 

In [156]:
if REPEAT_OLD:
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    dsk = cg_init_graph(AA_dlo, bb, 'cg-iter0', x_init=x, M=None, M12=None)
    dmerge = dask.sharedict.merge(AA_dlo.dask, bb.dask, x.dask, dsk)
    r0 = build_array(dmerge, 'r-cg-iter0')
    assert da.linalg.norm(r0).compute() - res < 1e-15 * (1 + np.sqrt(r0.size))

- ensure 0 iterations on exact warmstart

In [157]:
if REPEAT_OLD:
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    x_ws, res_ws, iters_ws = cg_graph(AA_dlo, bb, x_init=x, graph_iters=1, maxiter=300)
    assert iters_ws == 0 

- ensure iterations deterministic w/ warmstart

In [158]:
if REPEAT_OLD:
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    x_partial, res_partial, iters_partial = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=10)
    x_ws, res_ws, iters_ws = cg_graph(AA_dlo, bb, x_init=x_partial, graph_iters=1, maxiter=300)
    assert iters_partial + iters_ws == iters

- try nearby b: |r_0| = |Ax0 - b_perturb| =approx= |b - b_perturb|

In [159]:
if REPEAT_OLD:
    bb_perturb = bb * (1 + da.random.normal(0, 0.001, m, chunks=mc))
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    dsk = cg_init_graph(AA_dlo, bb_perturb, 'cg-iter0', x_init=x, M=None, M12=None)
    dmerge = dask.sharedict.merge(AA_dlo.dask, bb_perturb.dask, x.dask, dsk)
    r0 = build_array(dmerge, 'r-cg-iter0')
    assert da.linalg.norm(r0).compute() - da.linalg.norm(bb - bb_perturb).compute() < res 


- try nearby b: iters(A, b_perturb, x^\star)  < iters(A, b)

In [160]:
if REPEAT_OLD:
    bb_perturb = bb * (1 + da.random.normal(0, 0.001, m, chunks=mc))
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    x_perturb, res_perturb, iters_perturb = cg_graph(AA_dlo, bb_perturb, x_init=x, graph_iters=1, maxiter=300)
    print iters, iters_perturb
    assert iters > iters_perturb

- try nearby x:

In [161]:
if REPEAT_OLD:
    perturb = 0.1 * da.mean(x).compute() / (x.size**0.5)
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    x0 = x * (1 + da.random.normal(0, perturb, m, chunks=mc))
    x_perturb, res_perturb, iters_perturb = cg_graph(AA_dlo, bb, x_init=x_perturb, graph_iters=1, maxiter=300)
    print iters, iters_perturb
    assert iters > iters_perturb

- try nearby x, b:

In [162]:
if REPEAT_OLD:
    x, res, iters = cg_graph(AA_dlo, bb, graph_iters=1, maxiter=300)
    perturb_x = 0.1 * da.mean(x).compute() / (x.size**0.5)
    perturb_b = 0.1 * da.mean(bb).compute() / (bb.size**0.5)
    xp = x * (1 + da.random.normal(0, perturb_x, m, chunks=mc))
    bbp = bb * (1 + da.random.normal(0, perturb_b, m, chunks=mc))
    x_perturb, res_perturb, iters_perturb = cg_graph(AA_dlo, bbp, x_init=xp, graph_iters=1, maxiter=300)
    print iters, iters_perturb
    assert iters > iters_perturb

### Graph projection

- solve (I + A'A)x = A'y + xi

In [163]:
GP_TEST0 = REPEAT_OLD
if GP_TEST0:
    AA_dlo.regularization = 1
    yy = da.random.random(A_tall.shape[0], chunks=mc)
    xx = 0
    ATyx = A_tall.T.dot(yy) + xx
    yy, ATyy = dask.persist(yy, ATyx)
    x_out, res, iters = cg_graph(AA_dlo, ATyx, tol=1e-8, print_iters=10, graph_iters=1, maxiter=300)
    y_out = A_tall.dot(x).persist()

In [164]:
if GP_TEST0:
    ATyx_next = A_tall.T.dot(y_out) + x_out
    ATyx_next.persist()
    x_next, res_next, iters_next = cg_graph(AA_dlo, ATyx_next, tol=1e-8, print_iters=10, graph_iters=1, maxiter=300)
    assert da.linalg.norm(x_next - x_out).compute() < 1e-15 * (1 + x.size**0.5)

## CGLS $$\mathrm{minimize }\quad \|Ax - b\|_2^2 + \rho\|x\|_2^2$$

In [165]:
def cgls(A, b, rho, **options):
    b_hat = da.dot(A.T, b)
    A_hat = linop.DLORegularizedGram(A, regularization=rho, transpose=False)
    return cg_graph(A_hat, b_hat, **options)

In [166]:
CGLS_TALL = REPEAT_OLD
# CGLS_TALL = True
if CGLS_TALL:
    y_in = da.random.random(A_tall.shape[0], chunks=mc)
    x_in = da.random.random(A_tall.shape[1], chunks=mc)
    Ax_in = A_tall.dot(x_in)
    b_in = y_in - Ax_in
    A_tall, b_in = dask.persist(A_tall, b_in)
    x, res, iters = cgls(A_tall, b_in, 1.)

In [167]:
if CGLS_TALL:
    x_in = da.random.random(A_tall.shape[1], chunks=mc)
    y_in = A_tall.dot(x_in)
    b_in = y_in - Ax_in
    A_tall, b_in = dask.persist(A_tall, b_in)
    x, res, iters = cgls(A_tall, b_in, 1.)
    x_out = x + x_in 
    y_out = A_tall.dot(x_out)
    x_out, y_out = dask.persist(x_out, y_out)

### fat CG

In [168]:
A_fat = (A_tall.T).persist()

- CG fat?

In [169]:
Afat_gram = linop.DLOGram(A_fat)
b_fat = da.ones(Afat_gram.shape[1], chunks=mc)
A_fat, b_fat = dask.persist(A_fat, b_fat)
build_array = functools.partial(da.Array, dtype=b_fat.dtype, chunks=b_fat.chunks, shape=b_fat.shape)

In [170]:
dsk = cg_init_graph(Afat_gram, b_fat, 'cg-iter0')
dsk = dask.sharedict.merge(dsk, Afat_gram.dask, b_fat.dask)
x = build_array(dsk, 'x-cg-iter0').persist()
r = build_array(dsk, 'r-cg-iter0')
p = build_array(dsk, 'p-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [171]:
dsk = cg_iterate_graph(Afat_gram, 'cg-iter0', 'cg-iter1')
dsk = dask.sharedict.merge(dsk, Afat_gram.dask, x.dask, r.dask, p.dask)
x = build_array(dsk, 'x-cg-iter0')
r = build_array(dsk, 'x-cg-iter0')
p = build_array(dsk, 'x-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [172]:
if REPEAT_OLD:
# if True:
    x, res, i = cg_graph(Afat_gram, b_fat, print_iters=10, graph_iters=10)
    print res, i
    print x.shape, Afat_gram.shape

- CG fat w/ forced form $$A_{gram} = A^TA$$
- convergence not expected since A'A singular for A fat

In [173]:
ATA_fat = linop.DLOGram(A_fat, transpose=False)
b_fat = da.ones(ATA_fat.shape[1], chunks=mc)
A_fat, b_fat = dask.persist(A_fat, b_fat)
build_array = functools.partial(da.Array, dtype=b_fat.dtype, chunks=b_fat.chunks, shape=b_fat.shape)

In [174]:
dsk = cg_init_graph(ATA_fat, b_fat, 'cg-iter0')
dsk = dask.sharedict.merge(dsk, ATA_fat.dask, b_fat.dask)
x = build_array(dsk, 'x-cg-iter0')
r = build_array(dsk, 'r-cg-iter0')
p = build_array(dsk, 'p-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [175]:
dsk = cg_iterate_graph(ATA_fat, 'cg-iter0', 'cg-iter1')
dsk = dask.sharedict.merge(dsk, ATA_fat.dask, x.dask, r.dask, p.dask)
x = build_array(dsk, 'x-cg-iter0')
r = build_array(dsk, 'r-cg-iter0')
p = build_array(dsk, 'p-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [176]:
if REPEAT_OLD:
# if True:
    x, res, i = cg_graph(ATA_fat, b_fat, print_iters=10, graph_iters=10)
    print res, i
    print x.shape, ATA_fat.shape

- CG fat regularized

In [178]:
Afat_rgram = linop.DLORegularizedGram(A_fat)
b_fat = da.ones(Afat_rgram.shape[1], chunks=mc)
A_fat, b_fat = dask.persist(A_fat, b_fat)
build_array = functools.partial(da.Array, dtype=b_fat.dtype, chunks=b_fat.chunks, shape=b_fat.shape)

In [179]:
dsk = cg_init_graph(Afat_rgram, b_fat, 'cg-iter0')
dsk = dask.sharedict.merge(dsk, Afat_gram.dask, b_fat.dask)
x = build_array(dsk, 'x-cg-iter0')
r = build_array(dsk, 'r-cg-iter0')
p = build_array(dsk, 'p-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [180]:
dsk = cg_iterate_graph(Afat_rgram, 'cg-iter0', 'cg-iter1')
dsk = dask.sharedict.merge(dsk, Afat_rgram.dask, x.dask, r.dask, p.dask)
x = build_array(dsk, 'x-cg-iter0')
r = build_array(dsk, 'r-cg-iter0')
p = build_array(dsk, 'p-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [181]:
if REPEAT_OLD:
# if True:
    x, res, i = cg_graph(Afat_rgram, b_fat, print_iters=10, graph_iters=10)
    print res, i
    print x.shape, Afat_rgram.shape

- CG fat regularized with forced form $$A_{gram} = A^TA$$

In [182]:
ATA_fatreg = linop.DLORegularizedGram(A_fat, transpose=False)
b_fatreg = da.ones(ATA_fatreg.shape[1], chunks=mc)
A_fat, b_fatreg = dask.persist(A_fat, b_fatreg)
build_array = functools.partial(da.Array, dtype=A_fat.dtype, chunks=(A_fat.chunks[1],), shape=(A_fat.shape[1],))
build_array_m = functools.partial(da.Array, dtype=A_fat.dtype, chunks=(A_fat.chunks[0],), shape=(A_fat.shape[0],))

In [183]:
dsk = cg_init_graph(ATA_fatreg, b_fatreg, 'cg-iter0')
dsk = dask.sharedict.merge(dsk, ATA_fatreg.dask, b_fatreg.dask)
x = build_array(dsk, 'x-cg-iter0')
r = build_array(dsk, 'r-cg-iter0')
p = build_array(dsk, 'p-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [184]:
dsk = cg_iterate_graph(ATA_fatreg, 'cg-iter0', 'cg-iter1')
dsk = dask.sharedict.merge(dsk, ATA_fatreg.dask, x.dask, r.dask, p.dask)
x = build_array(dsk, 'x-cg-iter0')
r = build_array(dsk, 'r-cg-iter0')
p = build_array(dsk, 'p-cg-iter0')
x, r, p = dask.persist(x, r, p)

In [185]:
if REPEAT_OLD:
# if True:
    x, res, i = cg_graph(ATA_fatreg, b_fatreg, print_iters=10, graph_iters=10, maxiter=300)
    print res, i
    print x.shape, ATA_fatreg.shape

			10: residual = 3.7e-03
			20: residual = 2.7e-07
2.7277470348e-07 20
(1100,) (1100, 1100)


In [186]:
CGLS_FAT = REPEAT_OLD
# CGLS_FAT = True
if CGLS_FAT:
    y_in = da.random.random(A_fat.shape[0], chunks=mc)
    x_in = da.random.random(A_fat.shape[1], chunks=mc)
    Ax_in = A_fat.dot(x_in)
    b_in = y_in - Ax_in
    A_tall, b_in = dask.persist(A_fat, b_in)
    x, res, iters = cgls(A_fat, b_in, 1.)

In [187]:
if CGLS_FAT:
    x_in = da.random.random(A_fat.shape[1], chunks=mc)
    y_in = A_fat.dot(x_in)
    b_in = y_in - Ax_in
    A_fat, b_in = dask.persist(A_fat, b_in)
    x, res, iters = cgls(A_fat, b_in, 1.)
    x_out = x + x_in 
    y_out = A_fat.dot(x_out)
    x_out, y_out = dask.persist(x_out, y_out)

### Graph projection methods

### CGLS: update $$x = x_0 + \mathrm{argmin}_x \|Ax - (y_0 - Ax_0)\| + \|x\|_2 \\ y = Ax$$

In [188]:
def cgls_project(A, x, y, tol=1e-8, **options):
    b = y - A.dot(x)
    A, b = dask.persist(A, b)
    x_cg, res, iters  = cgls(A, b, 1, tol=tol)
    x_out = x + x_cg
    y_out = A.dot(x_out)
    x_out, y_out = dask.persist(x_out, y_out)
    return x_out, y_out, res, iters    

In [189]:
if REPEAT_OLD:
    xrand = da.random.random(A_tall.shape[1], chunks=mc)
    yrand = da.random.random(A_tall.shape[0], chunks=mc)
    start = time.time()
    xo, yo, res, iters = cgls_project(A_tall, xrand, yrand)
    print "first solve time", time.time() - start
    xoo, yoo, res_, iters_ = cgls_project(A_tall, xo, yo)
    print iters
    assert iters_ == 0
    assert da.linalg.norm(xo - xoo) < 1e-15 * (1 + xo.size**0.5)

In [190]:
if REPEAT_OLD:
# if True:
    xrand = da.random.random(A_fat.shape[1], chunks=mc)
    yrand = da.random.random(A_fat.shape[0], chunks=mc)
    start = time.time()
    xo, yo, res, iters = cgls_project(A_fat, xrand, yrand)
    print "first solve time", time.time() - start
    xoo, yoo, res_, iters_ = cgls_project(A_fat, xo, yo)
    print iters
    assert iters_ == 0
    assert da.linalg.norm(xo - xoo) < 1e-15 * (1 + xo.size**0.5)

### CG: update $$x = (I + A^TA)^{-1}(A^Ty_0 + x_0) \\y = Ax$$


In [203]:
def cg_project(A, x, y, tol=1e-8, **options):
    b = A.T.dot(y) + x
    A_hat = linop.DLORegularizedGram(A, transpose=False)
    x_out, res, iters = cg_graph(A_hat, b, tol=tol, **options)
    y_out = A.dot(x_out)
    x_out, y_out = dask.persist(x_out, y_out)
    return x_out, y_out, res, iters    

In [206]:
# if REPEAT_OLD:
if True:
    xrand = da.random.random(A_tall.shape[1], chunks=mc)
    yrand = da.random.random(A_tall.shape[0], chunks=mc)
    start = time.time()
    xo, yo, res, iters = cg_project(A_tall, xrand, yrand)
    print "first solve time", time.time() - start
    xoo, yoo, res_, iters_ = cg_project(A_tall, xo, yo, x_init=xo)
    print iters
    assert iters_ == 0
    assert da.linalg.norm(xo - xoo) < 1e-15 * (1 + xo.size**0.5)

first solve time 3.06351184845
24


In [205]:
# if REPEAT_OLD:
if True:
    xrand = da.random.random(A_fat.shape[1], chunks=mc)
    yrand = da.random.random(A_fat.shape[0], chunks=mc)
    start = time.time()
    xo, yo, res, iters = cg_project(A_fat, xrand, yrand)
    print "first solve time", time.time() - start
    xoo, yoo, res_, iters_ = cg_project(A_fat, xo, yo, x_init=xo)
    print iters
    assert iters_ == 0
    assert da.linalg.norm(xo - xoo) < 1e-15 * (1 + xo.size**0.5)

first solve time 3.00991201401
24
