In [1]:
from bwb.transports import MySinkhornTransport, MyEMDTransport

In [2]:
import numpy as np

data = np.load(r"..\data\face.npy")
data

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [3]:
%%time
from bwb.distributions import DistributionDrawBuilder


CEIL = 50
n_faces = len(data)
rng = np.random.default_rng(42)


# Builder para hacer las caritas
dist_draw_builder = DistributionDrawBuilder(
    seed=rng,
)

# Conjunto de caritas originales (distintos soportes)
faces_original = list()
for i in range(n_faces):
    arr = 255 - data[i, :].reshape(28, 28)
    dd_arr = dist_draw_builder.create_fromarray(arr)
    faces_original.append(dd_arr)

CPU times: total: 4.44 s
Wall time: 4.44 s


In [4]:
from bwb.utils import plot_list_of_draws

plot_list_of_draws(faces_original)

In [None]:
from matplotlib import pyplot as plt
from bwb.utils import plot_histogram_from_points
from bwb.distributions import DistributionDraw

first_face = DistributionDraw.fromarray(
    255 - data[0, :].reshape(28, 28),
    seed=42
)

x = first_face.rvs(size=10)

plot_histogram_from_points(x, histplot_kwargs=dict(bins=28))
plt.show()

# Instanciando y "quemando" el MCMC

In [None]:
%%time
from bwb.distributions import GibbsPosteriorPiN

pi_n = GibbsPosteriorPiN(data=x, models=faces_original[1:], seed=rng)

In [None]:
%%time
pi_n.rvs(size=1_000)
pi_n

In [None]:
print(f"{len(pi_n.counter.most_common()) = }")
pi_n.counter.most_common()

In [None]:
plot_list_of_draws(pi_n.most_common())

# Algoritmo del descenso del gradiente estocástico

Ahora se programará el descenso del gradiente estocástico usando la posterior $\Pi_n$

In [None]:
def as_matrix(weights, places, shape):
    assert len(weights) == len(places), f"El tamaño de los pesos y de los puntos soporte no coinciden: {len(weights) = }, {len(places) = }"

    to_return = np.zeros(shape)
    for row, weight in zip(places.astype(int), weights):
        to_return[tuple(row)] += weight

    return to_return

# mat = as_matrix(mu_s, Xs, mu_0.shape)

from bwb.utils import plot_list_of_draws
from PIL import Image

def matrix_to_image_array(matrix: np.ndarray):
    matrix_: np.ndarray = np.ceil(255 - 255 * matrix / matrix.max())
    return matrix_.astype("uint8")

# im = Image.fromarray(matrix_to_image_array(mat))
# plot_list_of_draws([DistributionDraw(im)])

In [None]:
eps = 1e-3
alpha = 1. / 10
emd_transp = MyEMDTransport()
gamma = lambda k_: 1 / (k_ + 2)

# Camino de baricentros
mu = []

# Paso 1: samplear un mu_0
mu_0: DistributionDraw = pi_n.draw()
mu.append(mu_0)

# Calcular las masas y ubicaciones
Xs, mu_s = [], []
min_w, max_w = mu_0.weights.min(), mu_0.weights.max()

for x, w, n in zip(mu_0.support, mu_0.weights, np.ceil(alpha * mu_0.weights / min_w).astype(int)):
    for _ in range(n):
        Xs.append(x)
        mu_s.append(w / n)

Xs = np.array(Xs)
mu_s = np.array(mu_s)

print(f"{len(Xs) = }")

k = 0

while True:
    print(f"{k = }")
    # Paso 2: Samplear \tilde\mu_k
    t_mu_k: DistributionDraw = pi_n.draw()
    Xt, mu_t = np.array(t_mu_k.support), np.array(t_mu_k.weights)

    # Calcular transporte óptimo
    emd_transp.fit_wm(
        Xs=Xs, mu_s=mu_s,
        Xt=Xt, mu_t=mu_t,
    )

    # Calcular la distribución de mu_{k+1}
    gamma_k = gamma(k)
    transport = emd_transp.transform(Xs)
    Xs_ = (1 - gamma_k) * Xs + gamma_k * transport  # Basta con calcular las transformaciones, porque los pesos son los mismos

    # Calcular la distancia de Wasserstein
    diff = Xs - transport
    W_mu_kp1_mu_k = (gamma_k ** 2) * np.sum(mu_s * np.sqrt(diff[:, 0]**2 + diff[:, 1]**2))
    print(f"{W_mu_kp1_mu_k = }")

    # Condición de detención
    if W_mu_kp1_mu_k < eps:
        break

    Xs = Xs_
    mat = as_matrix(mu_s, Xs, mu_0.shape)
    im = Image.fromarray(matrix_to_image_array(mat))
    mu.append(DistributionDraw(im))
    k += 1


Primeras iteraciones del algoritmo

In [None]:
plot_list_of_draws(mu, max_images=36)

últimas iteraciones del algoritmo

In [None]:
plot_list_of_draws(mu[-36:], max_images=36)

# Descenso del gradiente estocástico, versión con Batch

In [None]:
eps = 1e-3
alpha = 1. / 10
emd_transp = MyEMDTransport()
gamma = lambda k_: 1 / (k_ + 2)
S = lambda k_: 5

# Camino de baricentros
mu = []

# Paso 1: samplear un mu_0
mu_0: DistributionDraw = pi_n.draw()
mu.append(mu_0)

# Calcular las masas y ubicaciones
Xs, mu_s = [], []
min_w, max_w = mu_0.weights.min(), mu_0.weights.max()

for x, w, n in zip(mu_0.support, mu_0.weights, np.ceil(alpha * mu_0.weights / min_w).astype(int)):
    for _ in range(n):
        Xs.append(x)
        mu_s.append(w / n)

Xs = np.array(Xs)
mu_s = np.array(mu_s)

print(f"{len(Xs) = }")

k = 0

while True:
    print(f"{k = }")
    transports = []
    for j in range(S(k)):
        print(f"{j = }")
        # Paso 2: Samplear \tilde\mu_k
        t_mu_k: DistributionDraw = pi_n.draw()
        Xt, mu_t = np.array(t_mu_k.support), np.array(t_mu_k.weights)

        # Calcular transporte óptimo
        emd_transp.fit_wm(
            Xs=Xs, mu_s=mu_s,
            Xt=Xt, mu_t=mu_t,
        )

        transports.append(emd_transp.transform(Xs))

    # Calcular la distribución de mu_{k+1}
    gamma_k = gamma(k)
    transport = np.mean(transports, axis=0)
    Xs_ = (1 - gamma_k) * Xs + gamma_k * transport  # Basta con calcular las transformaciones, porque los pesos son los mismos

    # Calcular la distancia de Wasserstein
    diff = Xs - transport
    W_mu_kp1_mu_k = (gamma_k ** 2) * np.sum(mu_s * np.sqrt(diff[:, 0]**2 + diff[:, 1]**2))
    print(f"{W_mu_kp1_mu_k = }")

    # Condición de detención
    if W_mu_kp1_mu_k < eps:
        break

    Xs = Xs_
    mat = as_matrix(mu_s, Xs, mu_0.shape)
    im = Image.fromarray(matrix_to_image_array(mat))
    mu.append(DistributionDraw(im))
    k += 1


In [None]:
plot_list_of_draws(mu, max_images=36)

In [None]:
plot_list_of_draws(mu[-33:], max_images=36)