diff --git a/quantecon/_lqcontrol.py b/quantecon/_lqcontrol.py index b9e130fd..e6c591df 100644 --- a/quantecon/_lqcontrol.py +++ b/quantecon/_lqcontrol.py @@ -6,7 +6,7 @@ """ from textwrap import dedent import numpy as np -from scipy.linalg import solve +from scipy.linalg import lu_factor, lu_solve, solve from ._matrix_eqn import solve_discrete_riccati, solve_discrete_riccati_system from .util import check_random_state from .markov import MarkovChain @@ -427,26 +427,20 @@ class LQMarkov: def __init__(self, Π, Qs, Rs, As, Bs, Cs=None, Ns=None, beta=1): - # == Make sure all matrices for each state are 2D arrays == # def converter(Xs): return np.array([np.atleast_2d(np.asarray(X, dtype='float')) for X in Xs]) self.As, self.Bs, self.Qs, self.Rs = list(map(converter, (As, Bs, Qs, Rs))) - # == Record number of states == # self.m = self.Qs.shape[0] - # == Record dimensions == # self.k, self.n = self.Qs.shape[1], self.Rs.shape[1] if Ns is None: - # == No cross product term in payoff. Set N=0. == # Ns = [np.zeros((self.k, self.n)) for i in range(self.m)] - self.Ns = converter(Ns) if Cs is None: - # == If C not given, then model is deterministic. Set C=0. == # self.j = 1 Cs = [np.zeros((self.n, self.j)) for i in range(self.m)] @@ -508,38 +502,47 @@ def stationary_values(self, max_iter=1000): each period at each Markov state """ - - # == Simplify notations == # beta, Π = self.beta, self.Π m, n, k = self.m, self.n, self.k As, Bs, Cs = self.As, self.Bs, self.Cs Qs, Rs, Ns = self.Qs, self.Rs, self.Ns - # == Solve for P(s) by iterating discrete riccati system== # Ps = solve_discrete_riccati_system(Π, As, Bs, Cs, Qs, Rs, Ns, beta, max_iter=max_iter) - # == calculate F and d == # - Fs = np.array([np.empty((k, n)) for i in range(m)]) + Fs = np.empty((m, k, n)) X = np.empty((m, m)) - sum1, sum2 = np.empty((k, k)), np.empty((k, n)) + sum1 = np.empty((k, k)) + sum2 = np.empty((k, n)) + # Pre-allocate for efficiency for i in range(m): - # CCi = C_i C_i' CCi = Cs[i] @ Cs[i].T - sum1[:, :] = 0. - sum2[:, :] = 0. + sum1.fill(0.0) + sum2.fill(0.0) + Pis = Ps + Πi = Π[i] # vector of length m + + # Vectorized slice for sum1 and sum2: + # [Bs[i].T @ Ps[j] @ Bs[i]] for all j + # [Bs[i].T @ Ps[j] @ As[i]] for all j + Bs_i = Bs[i] + Bs_iT = Bs[i].T + As_i = As[i] + Pis_Bs_i = Pis @ Bs_i # (m, n, k) + Pis_As_i = Pis @ As_i # (m, n, n) for j in range(m): - # for F - sum1 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ Bs[i] - sum2 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ As[i] - - # for d - X[j, i] = np.trace(Ps[j] @ CCi) - - Fs[i][:, :] = solve(Qs[i] + sum1, sum2 + Ns[i]) - - ds = solve(np.eye(m) - beta * Π, - np.diag(beta * Π @ X).reshape((m, 1))).flatten() + sum1 += beta * Πi[j] * Bs_iT @ Pis[j] @ Bs_i + sum2 += beta * Πi[j] * Bs_iT @ Pis[j] @ As_i + X[j, i] = np.trace(Pis[j] @ CCi) + + # Precompute LU factorization for Qs[i] + sum1, then solve + lu, piv = lu_factor(Qs[i] + sum1) + Fs[i][:, :] = lu_solve((lu, piv), sum2 + Ns[i]) + + # Solve for ds using pre-factorization if possible + A_ds = np.eye(m) - beta * Π + beta_Pi_X = beta * (Π @ X) + ds = solve(A_ds, np.diag(beta_Pi_X).reshape((m, 1))).flatten() self.Ps, self.ds, self.Fs = Ps, ds, Fs diff --git a/quantecon/_matrix_eqn.py b/quantecon/_matrix_eqn.py index e2bb6f72..27f34018 100644 --- a/quantecon/_matrix_eqn.py +++ b/quantecon/_matrix_eqn.py @@ -273,41 +273,68 @@ def solve_discrete_riccati_system(Π, As, Bs, Cs, Qs, Rs, Ns, beta, """ m = Qs.shape[0] k, n = Qs.shape[1], Rs.shape[1] - # Create the Ps matrices, initialize as identity matrix - Ps = np.array([np.eye(n) for i in range(m)]) - Ps1 = np.copy(Ps) - # == Set up for iteration on Riccati equations system == # + # Preallocate and initialize Ps + Ps = np.array([np.eye(n) for _ in range(m)]) + Ps1 = np.empty_like(Ps) + + # Precompute to avoid repeated allocations + Bs_T = Bs.transpose(0,2,1) + As_T = As.transpose(0,2,1) + Ns_T = Ns.transpose(0,2,1) + Qs_bt = np.empty((m, n, k)) + temp_Bs = np.empty((m, n, k)) + temp_As = np.empty((m, n, n)) + left_matrices = np.empty((m, k, k)) + right_vecs = np.empty((m, k, n)) + solved_blocks = np.empty((m, k, n)) + # == Main loop == # error = tolerance + 1 fail_msg = "Convergence failed after {} iterations." - - # == Prepare array for iteration == # - sum1, sum2 = np.empty((n, n)), np.empty((n, n)) - - # == Main loop == # iteration = 0 - while error > tolerance: + while error > tolerance: if iteration > max_iter: raise ValueError(fail_msg.format(max_iter)) - else: - error = 0 - for i in range(m): - # Initialize arrays - sum1[:, :] = 0. - sum2[:, :] = 0. - for j in range(m): - sum1 += beta * Π[i, j] * As[i].T @ Ps[j] @ As[i] - sum2 += Π[i, j] * \ - (beta * As[i].T @ Ps[j] @ Bs[i] + Ns[i].T) @ \ - solve(Qs[i] + beta * Bs[i].T @ Ps[j] @ Bs[i], - beta * Bs[i].T @ Ps[j] @ As[i] + Ns[i]) - - Ps1[i][:, :] = Rs[i] + sum1 - sum2 - error += np.max(np.abs(Ps1[i] - Ps[i])) - - Ps[:, :, :] = Ps1[:, :, :] - iteration += 1 + error = 0 + # Precompute for all i for single loop + for i in range(m): + BsiT = Bs[i].T + AsiT = As[i].T + NsiT = Ns[i].T + sum1 = np.zeros((n, n)) + sum2 = np.zeros((n, n)) + + # To vectorize inner loop, build all needed arrays beforehand + # Stackover states j for fixed i + # Reuse already-allocated arrays + + # --- For all j, gather: + # Pi = Ps[j] + # M1j = beta * Π[i, j] * As[i].T @ Ps[j] @ As[i] + # M2j = Π[i, j] * (beta*As[i].T @ Ps[j] @ Bs[i] + Ns[i].T) + # @ solve(Qs[i] + beta*Bs[i].T @ Ps[j] @ Bs[i], + # beta*Bs[i].T @ Ps[j] @ As[i] + Ns[i]) + for j in range(m): + pij = Ps[j] + pij_Bs = pij @ Bs[i] + pij_As = pij @ As[i] + Q_i = Qs[i] + Bs_iT_pij_Bs = BsiT @ pij_Bs + LHS = Q_i + beta * Bs_iT_pij_Bs + RHS = beta * BsiT @ pij_As + Ns[i] + # The explicit 'solve' is necessary and + # we cannot pre-factor because LHS is different for each j. + solved = solve(LHS, RHS) + M1j = beta * Π[i, j] * AsiT @ pij_As + sum1 += M1j + sum2 += Π[i, j] * (beta * AsiT @ pij_Bs + NsiT) @ solved + + Ps1[i][:, :] = Rs[i] + sum1 - sum2 + error += np.max(np.abs(Ps1[i] - Ps[i])) + + Ps, Ps1 = Ps1, Ps # swap the buffers for next iteration (avoids copy) + iteration += 1 return Ps