This is testing inference with 2/10 generation scheme and then just performing MLE via gradient based optimization.

In [3]:
from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns

from scipy.stats import beta
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors

Constants

In [4]:
rho = 0.7
batch_size = 50
pop_size = 1000 # population size per timestep
epsilon    = 0.05 # exp prop of neutral words per speech

Data Generation

In [132]:
def true_dgp(rho, N, epsilon):
    sigma = 0.175 * (rho ** 2) - 0.3625 * rho + 0.1875
    a = rho * ((rho * (1 - rho)) / sigma - 1)
    b = (1 - rho) * ((rho * (1 - rho)) / sigma - 1)

    print(f'='*10)
    print(f'True Alpha: {a}')
    print(f"True Beta: {b}")

    ### u ~ pi Beta(a, b) + (1-pi) Beta(b,a), where pi = 0.5
    bmm = tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(probs=[0.5, 0.5]),
        components_distribution=tfd.Beta(
            concentration1=[a, b],
            concentration0=[b, a]
        )
    )
    
    # Scale transformation bijector: u = 2*u - 1 (Chain goes end to front of list)
    scale_transform = tfb.Chain([tfb.Shift(shift=-1.0), tfb.Scale(scale=2.0)])
    
    transformed_bmm = tfd.TransformedDistribution(
        distribution=bmm,
        bijector=scale_transform
    )
    
    u = yield tfp.distributions.JointDistributionCoroutineAutoBatched.Root(
        tfd.Sample(transformed_bmm, sample_shape=N, name='u')
    )

    # phi is deterministic given u/epsilon 
    phi = tf.stack([
        (1 - (u + 1) / 2) * (1 - epsilon),
        ((u + 1) / 2) * (1 - epsilon),
        tf.fill([N], epsilon)
    ], axis=-1)
    yield tfd.Deterministic(loc=phi, name='phi')

    # y = 1(u >= 0)
    y = tf.cast(u >= 0, dtype=tf.int32)
    yield tfd.Deterministic(loc=y, name='y')

In [144]:
true_joint = tfp.distributions.JointDistributionCoroutineAutoBatched(lambda: true_dgp(rho, pop_size, epsilon))
true_sample = true_joint.sample()
u_samp, phi_samp, y_samp = true_sample

True Alpha: 6.838461538461533
True Beta: 2.930769230769229
True Alpha: 6.838461538461533
True Beta: 2.930769230769229


In [145]:
print(f"u sample shape: {u_samp.shape}")
print(f"phi sample shape: {phi_samp.shape}")
print(f"y sample shape: {y_samp.shape}")

u sample shape: (1000,)
phi sample shape: (1000, 3)
y sample shape: (1000,)


In [146]:
print(f"example u:\n {u_samp[:5]}\n")
print(f"example phi:\n {phi_samp[:5]}\n")
print(f"example y:\n {y_samp[:5]}\n")

example u:
 [ 0.10957897 -0.39057928 -0.3381759   0.10645628 -0.3497519 ]

example phi:
 [[0.42295    0.52705    0.05      ]
 [0.66052514 0.28947484 0.05      ]
 [0.6356335  0.31436646 0.05      ]
 [0.42443326 0.5255667  0.05      ]
 [0.6411322  0.30886784 0.05      ]]

example y:
 [1 0 0 1 0]



Inference

In [147]:
### we can only use this for inference
dataset = phi_samp, y_samp  

DGP Model

In [172]:
def dgp(W, a, b):
    ### u ~ pi Beta(a, b) + (1-pi) Beta(b, a), where pi = 0.5
    bmm = tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(probs=[0.5, 0.5]),
        components_distribution=tfd.Beta(
            concentration1=[a, b],
            concentration0=[b, a]
        )
    )
    
    # Scale transformation bijector: u = 2*u - 1 (Chain goes end to front of list)
    scale_transform = tfb.Chain([tfb.Shift(shift=-1.0), tfb.Scale(scale=2.0)])
    
    transformed_bmm = tfd.TransformedDistribution(
        distribution=bmm,
        bijector=scale_transform,
        name='u'
    )
    
    # Define u without specifying sample_shape here
    u = yield tfp.distributions.JointDistributionCoroutineAutoBatched.Root(
        transformed_bmm
    )

    phi_values = tf.nn.softmax(W * u)
    phi = yield tfp.distributions.JointDistributionCoroutineAutoBatched.Root(
        tfd.Deterministic(phi_values, name='phi')
    )

    # Draw y = 1(u >= 0)
    y = tf.cast(u >= 0, dtype=tf.int32)
    yield tfp.distributions.JointDistributionCoroutineAutoBatched.Root(
        tfd.Deterministic(y, name='y')
    )

Trainable Variables

In [173]:
alpha_fit = tfp.util.TransformedVariable(
    2., 
    bijector=tfp.bijectors.Softplus(), 
    name='alpha_fit'
    )
alpha_fit = tf.convert_to_tensor(alpha_fit)
beta_fit = tfp.util.TransformedVariable(
    2.,
    bijector=tfp.bijectors.Softplus(),
    name='beta_fit'
    )
beta_fit = tf.convert_to_tensor(beta_fit)
w1_fit = tf.Variable(
    -3.,
    name='w1 (left) fit'
    )
w2_fit = tf.Variable(
    3.,
    name='w2 (right) fit'
    )
w3_fit = tf.Variable(
    0.,
    name='w3 (right) fit'
    )

W = tf.stack([w1_fit, w2_fit, w3_fit], axis=0)

In [175]:
dgp_fit = tfp.distributions.JointDistributionCoroutineAutoBatched(
    lambda: dgp(W, alpha_fit, beta_fit)
)
sample = dgp_fit.sample(10)



In [None]:
nll = lambda : -tf.reduce_sum(dgp_fit.log_prob(dataset))