In [None]:
import numpy as np

def project_simplex(z):
    z = np.maximum(z, 1e-12)
    u = np.sort(z)[::-1]
    cssv = np.cumsum(u)
    rho = np.nonzero(u * np.arange(1, len(u)+1) > (cssv - 1))[0][-1]
    theta = (cssv[rho] - 1) / (rho + 1.0)
    return np.maximum(z - theta, 0.0)

class MultiAssetMarket:
    def __init__(self, n=5, seed=0):
        self.n = n
        self.rng = np.random.default_rng(seed)
        self.mu = np.linspace(0.0002, 0.0008, n)
        base = 0.5*np.ones((n,n)) + 0.5*np.eye(n)
        self.Sigma = 0.0001 * base
    def step(self):
        return self.rng.multivariate_normal(self.mu, self.Sigma)

class RiskAdjustedSearch:
    
    #Neuneier 1997 sin redes: búsqueda de política en el simplex.
    #Objetivo: maximizar E[w^T r] - lam * (w^T Sigma w) - tx_cost * turnover.

    def __init__(self, n_assets=5, lam=5.0, tx_cost=0.001, cand=128, seed=0):
        self.n = n_assets
        self.lam = lam
        self.tx_cost = tx_cost
        self.cand = cand
        self.mkt = MultiAssetMarket(n=n_assets, seed=seed)
        self.rng = np.random.default_rng(seed)
        self.policy = np.ones(self.n) / self.n

    def objective_mc(self, w, steps=2000, ridge=1e-8):
        w = project_simplex(w)
        wealth = 1.0
        prev = self.policy
        R_mat = np.zeros((steps, self.n))
        costs = 0.0
        for t in range(steps):
            r = self.mkt.step()                    # (n,)
            turnover = np.sum(np.abs(w - prev))    # L1
            ret = float(w @ r)
            costs += self.tx_cost * turnover
            wealth *= (1.0 + ret - self.tx_cost * turnover)
            prev = w
            R_mat[t] = r
        # riesgo cuadrático de portafolio
        if steps > 1:
            Sigma = np.cov(R_mat.T) + ridge*np.eye(self.n)
        else:
            Sigma = ridge*np.eye(self.n)
        risk = float(w @ Sigma @ w)
        mean_ret = float((R_mat @ w).mean())
        score = mean_ret - self.lam * risk - costs/steps
        return score

    def improve(self, iters=20, steps_eval=2000, noise=0.1, verbose=1):
        for k in range(1, iters+1):
            # muestrea candidatos alrededor de la política actual y uniformes
            W_local = [project_simplex(self.policy + noise * self.rng.normal(size=self.n)) for _ in range(self.cand//2)]
            W_rand  = [project_simplex(self.rng.random(self.n)) for _ in range(self.cand - len(W_local))]
            W = np.stack(W_local + W_rand, axis=0)
            # evalúa
            scores = np.array([self.objective_mc(w, steps=steps_eval) for w in W])
            best = int(np.argmax(scores))
            self.policy = W[best]
            if verbose:
                print(f"iter={k}  best_score={scores[best]:.6f}  L1|w|={np.sum(np.abs(self.policy)):.3f}", flush=True)
        return self

    def simulate(self, T=5000):
        w = self.policy.copy()
        wealth = 1.0
        prev = w
        for _ in range(T):
            r = self.mkt.step()
            turnover = np.sum(np.abs(w - prev))
            ret = float(w @ r) - self.tx_cost * turnover
            wealth *= (1.0 + ret)
            prev = w
        print(f"sim_wealth={wealth:.4f}")

if __name__ == "__main__":
    # parámetros razonables para ver salida inmediata
    agent = RiskAdjustedSearch(n_assets=5, lam=5.0, tx_cost=0.001, cand=64, seed=0)
    agent.improve(iters=8, steps_eval=500, noise=0.1, verbose=1)
    agent.simulate(T=2000)





