In [None]:
def tf_printv(t, transform=None):
    def log_value(x):
        if transform is not None:
            v = transform(x)
            if v is not None:
                logger.info('{} - {}'.format(t.name, v))
        else:
            logger.info('{} - {}'.format(t.name, x))
        return x
    log_op = tf.py_func(log_value, [t], [t.dtype], name=t.name.split(':')[0])[0]
    with tf.control_dependencies([log_op]):
        r = tf.identity(t)
    return r

## Zero-Inflated Count Models

In [None]:
from edward.models import Normal, Laplace, PointMass, NegativeBinomial, Bernoulli, Poisson
from ml.tensorflow.utilities import tf_print
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Example of using shifted poisson as positive distribution in hurdle model:
# https://www.casact.org/education/annual/2011/handouts/P2-Boucher.pdf

class ModelBuilder(object):
    
    def __init__(self, inference_fn):
        self.map = inference_fn == ed.MAP
        self.latent_map = {}
        self.tensor_map = {}
        
        
    def add(self, dist, loc, scale, name, loc_transform=tf.identity, scale_transform=tf.nn.softplus,
           scale_coef=.1):
        shape = loc.get_shape().as_list()
        model = dist(loc, scale * tf.ones_like(loc))

        lm = self.latent_map
        tm = self.tensor_map
        if self.map:
            q = PointMass(params=loc_transform(tf.Variable(tf.random_normal(shape, stddev=scale*scale_coef))))
            lm[model] = q
            tm[name] = model
            tm[name + '.q'] = q.params
            #tf.summary.histogram(name + '.q', tm[name + '.q'])
        else:
            q = dist(
                loc_transform(tf.Variable(tf.random_normal(shape, stddev=scale*scale_coef))), 
                scale_transform(tf.Variable(tf.random_normal(shape, stddev=scale*scale_coef)))
            )
            lm[model] = q
            tm[name] = model
            if dist in [Normal, Laplace]:
                tm[name + '.q'] = q.loc
                tm[name + '.s'] = q.scale
            else:
                raise ValueError('Distribution "{}" not yet supported'.format(dist))
            #tf.summary.histogram(name + '.q', tm[name + '.q'])
            #tf.summary.histogram(name + '.s', tm[name + '.s'])


class CountModel(ed_models.BayesianModel):
    
    def __init__(
        self, inference_fn, 
        x_thresh=100., y_thresh=None,
        ys_max=10., u_sat=10.,
        link='nbinom'
    ):
        self.x_thresh = x_thresh
        self.y_thresh = y_thresh
        self.ys_max = ys_max
        self.u_sat = u_sat
        assert link in ['poisson', 'nbinom'], 'Link must be either "nbinom" or "poisson"'
        self.link = link
        
        self.u_sat_ = None
        self.group_encoder_ = None
        self.model = ModelBuilder(inference_fn)
        
    def value_link_inverse(self, v):
        return tf.exp(self.u_sat * tf.nn.tanh(v / self.u_sat))
        #return tf.exp(v)
    
    def add(self, *args):
        self.builder.add(*args)
        
    def set_params(self, y):
        assert np.all(y >= 0)
        y_max = y.max()
        self.u_sat_ = np.log(y_max)
    
    def inference_args(self, data, groups):
        
        tm, lm = {}, {}
        
        # Extract true X and Y values, clipping X values to be
        # <= self.x_thresh standard deviations (crucial for gradient descent to not give nans)
        dX, dY = np.clip(data['X'], -self.x_thresh, self.x_thresh), data['Y']
        tm['dX'] = dX
        self.set_params(dY)
        
        assert groups is not None
        assert len(groups) == dX.shape[0]
        nG = len(np.unique(groups))
        self.group_encoder_ = LabelEncoder().fit(groups)
        dG = self.group_encoder_.transform(groups).astype(np.int32)
    
        # Placeholders
        assert np.all(dY >= 0.)
        P = dX.shape[1]
        Xp = tf.placeholder(tf.float32, [None, P])
        tm['Xp'] = Xp
        X = tf.clip_by_value(Xp, -self.x_thresh, self.x_thresh)
        G = tf.placeholder(tf.int32, [None])
        tm['G'] = G
        Yp = tf.placeholder(tf.int32, [None])
        Y = tf.identity(Yp)
                
        # ##### Model Weights #### #
        
        self.model.add(Normal, tf.zeros([nG]), .0001, 'wgv')
        self.model.add(Normal, tf.zeros([P, 1]), 1., 'wv')
        self.model.add(Normal, tf.zeros([]), 1., 'wbv')
        
        tm.update(self.model.tensor_map)
        lm.update(self.model.latent_map)
        
        
        # ##### Expectations #### #
        
        # Count expectation
        Yv_mu_link = tf.gather(tm['wgv'], G) + tm['wbv'] + tf.reshape(tf.matmul(X, tm['wv']), [-1])
        Yv_mu = self.value_link_inverse(Yv_mu_link)
        tf.summary.histogram('Yv_mu', tf.clip_by_value(Yv_mu, -10000., 10000.))
        
        
        # ##### Error Family Mapping #### #
        # Count value error distribution
        if self.link == 'nbinom':
            tm['Ys'] = tf.cast(self.ys_max * tf.exp(tf.Variable(np.log(1./self.ys_max))), tf.float32)
            #tm['Ys'] = 1.
            tf.summary.scalar('Ys', tm['Ys'])
            cYv = NegativeBinomial(tm['Ys'] * tf.ones_like(Yv_mu), probs=Yv_mu / (Yv_mu + tm['Ys']))
        else:
            cYv = Poisson(Yv_mu)
        
        # Sampling
        tm['n_samp'] = tf.placeholder(tf.int32)
        
        tm['Yv_samp'] = ed.copy(cYv, lm).sample(tm['n_samp'])
        #tm['Yv_pred'] = ed.copy(Yv_link, lm)
        #tm['Yv_prob'] = ed.copy(cYv, lm).log_prob(dY.astype(np.float32))

        
        def input_fn(d):
            return {Xp: dX, Yp: dY, G: dG, cYv: dY}
        
        return input_fn, lm, tm
        
    def criticism_args(self, sess, tm):
        
        def sample_fn(n, X, G):
            G = self.group_encoder_.transform(G).astype(np.int32)
            Y = sess.run(
                tm['Yv_samp'], 
                feed_dict={tm['Xp']: X, tm['G']: G, tm['n_samp']: n}
            )
            assert Y.shape == (n, X.shape[0])
            
            if self.y_thresh is not None:
                Y = np.clip(Y, -np.inf, self.y_thresh)
            return Y
                 
        return {
            'sample_fn': sample_fn
        }
    
WV = []
WZ = []
YVMU = []

class HurdleCountModel(ed_models.BayesianModel):
    
    def __init__(
        self, inference_fn, 
        x_thresh=100., y_thresh=None, 
        p_sat=None, p_sat_max=.3,
        ys_max=10., u_sat=10.,
        link='nbinom'
    ):
        self.x_thresh = x_thresh
        self.y_thresh = y_thresh
        self.p_sat = p_sat
        self.p_sat_max = p_sat_max
        self.u_sat = u_sat
        self.ys_max = ys_max
        assert link in ['poisson', 'nbinom'], 'Link must be either "nbinom" or "poisson"'
        self.link = link
        
        self.u_sat_ = None
        self.group_encoder_ = None
        self.model = ModelBuilder(inference_fn)
        
    def zero_link_inverse(self, v, sat):
        if sat is not None:
            return sat + (1. - 2*sat) * tf.nn.sigmoid(v)
        else:
            return tf.nn.sigmoid(v)
    
    def value_link_inverse(self, v):
        return tf.exp(self.u_sat * tf.nn.tanh(v / self.u_sat))
        #return tf.exp(v)
        #return tf.exp(self.u_sat - (tf.nn.softplus(self.u_sat - v)))
        #return tf.exp(tf.clip_by_value(v, -self.u_sat, self.u_sat))
        #return tf.nn.softplus(tf.clip_by_value(v, -self.u_sat, self.u_sat))
    
    def add(self, *args):
        self.builder.add(*args)
        
    def set_params(self, y):
        assert np.all(y >= 0)
        y_max = y.max()
        #self.u_sat_ = np.log(y_max)
    
    def inference_args(self, data, groups):
        
        tm, lm = {}, {}
        
        # Extract true X and Y values, clipping X values to be
        # <= self.x_thresh standard deviations (crucial for gradient descent to not give nans)
        dX, dY = np.clip(data['X'], -self.x_thresh, self.x_thresh), data['Y']
        tm['dX'] = dX
        self.set_params(dY)
        
        assert groups is not None
        assert len(groups) == dX.shape[0]
        nG = len(np.unique(groups))
        self.group_encoder_ = LabelEncoder().fit(groups)
        dG = self.group_encoder_.transform(groups).astype(np.int32)
    
        # Separate zeros from positive values
        assert np.all(dY >= 0.)
        P = dX.shape[1]
        Xp = tf.placeholder(tf.float32, [None, P])
        tm['Xp'] = Xp
        X = tf.clip_by_value(Xp, -self.x_thresh, self.x_thresh)
        G = tf.placeholder(tf.int32, [None])
        tm['G'] = G
        Yp = tf.placeholder(tf.int32, [None])
        Y = tf.identity(Yp)
        
        I = tf.squeeze(tf.where(Y > 0))
        #Yv = tf.gather(Y, I)
        Xv = tf.gather(X, I)
        Yi = tf.cast(Y <= 0, tf.int32)
        Gv = tf.gather(G, I)
        tf.summary.histogram('I', I)
                
        # ##### Model Weights #### #
        
        self.model.add(Normal, tf.zeros([nG]), .0001, 'wgv')
        self.model.add(Normal, tf.zeros([nG]), .0001, 'wgz')
        self.model.add(Normal, tf.zeros([P, 1]), 1., 'wv')
        self.model.add(Normal, tf.zeros([P, 1]), 1., 'wz')
        self.model.add(Normal, tf.zeros([]), 10., 'wbv')
        self.model.add(Normal, tf.zeros([]), 10., 'wbz')
        
        tm.update(self.model.tensor_map)
        lm.update(self.model.latent_map)
        
        
        def add_vweight(w):
            global WV
            WV.append(w[:,0])
            return None
        def add_zweight(w):
            global WZ
            WZ.append(w[:,0])
            return None
        #tm['wz'] = tf_printv(tm['wz'], add_zweight)
        #tm['wv'] = tf_printv(tm['wv'], add_vweight)
        # ##### Expectations #### #
        
        # Zero expectation
        if self.p_sat is None:
            Ps = self.p_sat_max * tf.exp(-tf.nn.softplus(tf.Variable(0.)))        
        else:
            Ps = tf.constant(self.p_sat, tf.float32)
            
        tf.summary.scalar('Ps', Ps)
        Pz_link = tf.gather(tm['wgz'], G) + tm['wbz'] + tf.reshape(tf.matmul(X, tm['wz']), [-1])
        #Pz_link = tm['wbz'] + tf.reshape(tf.matmul(X, tm['wz']), [-1])
        #tf.summary.histogram('Pz_link', Pz_link)
        Pz = self.zero_link_inverse(Pz_link, Ps)
        
        # Positive count expectation
        Yv_mu_link = tf.gather(tm['wgv'], Gv) + tm['wbv'] + tf.reshape(tf.matmul(Xv, tm['wv']), [-1])
        #Yv_mu_link = tm['wbv'] + tf.reshape(tf.matmul(Xv, tm['wv']), [-1])
        Yv_mu = self.value_link_inverse(Yv_mu_link)
        
        Yv_mu_samp = self.value_link_inverse(
            tf.gather(tm['wgv'], G) + tm['wbv'] + tf.reshape(tf.matmul(X, tm['wv']), [-1])
        )
        #Yv_mu = tf_print(Yv_mu, lambda x: pd.Series(x).quantile(q=[0, .1, .9, 1]).values)
        
        def add_yvmu(w):
            global YVMU
            YVMU.append(w)
            return None
        #Yv_mu = tf_printv(Yv_mu, add_yvmu)
        
        #Yv_mu = tf_print(Yv_mu, lambda x: [x.min(), x.max()])
        #tf.summary.histogram('Yv_mu', tf.clip_by_value(Yv_mu, -10000., 10000.))
        #tf.summary.scalar('Yv_mu', tf.reduce_mean(Yv_mu))
        #tf.summary.histogram('Yv_mu_link', Yv_mu_link)
        
        #tf.summary.histogram('Yv_mu_link_sat', Yv_mu_link_sat)
        
        
        # ##### Error Family Mapping #### #
        cYz = Bernoulli(probs=Pz)
        
        # Positive value error distribution
        if self.link == 'nbinom':
            tm['Ys'] = tf.cast(self.ys_max * tf.exp(tf.Variable(np.log(1./self.ys_max))), tf.float32)
            #tm['Ys'] = .2
            tf.summary.scalar('Ys', tm['Ys'])
            cYv = NegativeBinomial(tm['Ys'] * tf.ones_like(Yv_mu), probs=Yv_mu / (Yv_mu + tm['Ys']))
            cYv_samp = NegativeBinomial(tm['Ys'] * tf.ones_like(Yv_mu_samp), probs=Yv_mu_samp / (Yv_mu_samp + tm['Ys']))
        else:
            cYv = Poisson(Yv_mu)
            cYv_samp = Poisson(Yv_mu_samp)
            #cYv = Normal(Yv_mu, 1. * tf.ones_like(Yv_mu))
            #cYv_samp = Normal(Yv_mu_samp, 1. * tf.ones_like(Yv_mu_samp))
        
        # Sampling
        tm['n_samp'] = tf.placeholder(tf.int32)
        
        tm['Yz_samp'] = ed.copy(cYz, lm).sample(tm['n_samp'])
        #tm['Yz_pred'] = ed.copy(Pz_samp, lm)
        #tm['Yz_prob'] = ed.copy(Yz, lm).pmf(dIz.astype(np.int32))
        
        tm['Yv_samp'] = ed.copy(cYv_samp, lm).sample(tm['n_samp'])
        #tm['Yv_pred'] = ed.copy(Yv_link, lm)
        #tm['Yv_prob'] = ed.copy(cYv, lm).log_prob(dY.astype(np.float32))

        
        def input_fn(d):
            return {
                Xp: dX, Yp: dY, G: dG,
                cYz: (dY == 0).astype(np.int32),
                cYv: (dY[dY > 0] - 1).astype(np.int32)
            }
        
        return input_fn, lm, tm
        
    def criticism_args(self, sess, tm):
        
        def sample_fn(n, X, G):
            G = self.group_encoder_.transform(G).astype(np.int32)
            Yz, Yv = sess.run(
                [tm['Yz_samp'], tm['Yv_samp']], 
                feed_dict={tm['Xp']: X, tm['G']: G, tm['n_samp']: n}
            )
            assert Yz.shape == (n, X.shape[0])
            assert Yv.shape == (n, X.shape[0])
            
            Yv = Yv + 1 # Unshift
            Y = np.multiply(1. - Yz, Yv)
            if self.y_thresh is not None:
                Y = np.clip(Y, -np.inf, self.y_thresh)
            return Y
        
#         def mean_pred_fn(X, G):
#             G = self.group_encoder_.transform(G).astype(np.int32)
#             Yv = sess.run(
#                 tm['Yv_pred'], 
#                 feed_dict={tm['Xp']: X, tm['G']: G}
#             )
#             assert Yv.ndim == 1 and Yv.shape[0] == X.shape[0]
            
#             #print(pd.Series(self.y_scaler_.inverse_transform(Yv)).describe())
#             #Yv = np.exp(self.y_scaler_.inverse_transform(Yv) + .5 * (Ys**2)) # Arithmetic mean of log-normal

#             return Yv
        
        
#         def loglik_fn(X, G, train_data):
#             G = self.group_encoder_.transform(G).astype(np.int32)
#             Yv = sess.run(
#                 tm['Yv_prob'], 
#                 feed_dict={tm['Xp']: X, tm['G']: G}
#             )
            
#             X, y = train_data
#             idx_pos = np.argwhere(~tm['dIz'])[:,0]
#             assert y.iloc[idx_pos].min() > 0
            
#             d_prob = pd.DataFrame({
#                 'y_pos': y.iloc[idx_pos].values,
#                 'y_pos_prep': m.tensor_map_['dYv'],
#                 'y_pos_raw': m.tensor_map_['dYv_raw'][:, 0],
#                 'y_prob': Yv
#             }, index=y.iloc[idx_pos].index)
#             assert np.all(d_prob['y_pos'] == d_prob['y_pos_raw'])

#             d_prob = pd.concat([
#                 X, y.rename('ll:y_actual'), d_prob.add_prefix('ll:'),
#                 pd.Series(Yz, index=y.index, name='ll:zero_prob')
#             ], axis=1)
            
#             d_prob['ll:pos_prob'] = 1. - d_prob['ll:zero_prob']
            
#             return d_prob
                 
        return {
            'sample_fn': sample_fn, 
#             'mean_pred_fn': mean_pred_fn,
#             'loglik_fn': loglik_fn
        }
    

In [None]:
# Invocation
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

tf.reset_default_graph()

#inference_fn=ed.MAP
inference_fn=ed.KLqp

def get_model():
    y_max = max(y.max(), y_test.max())
    return HurdleCountModel(inference_fn, y_thresh=y_max, p_sat_max=.1, link='nbinom')

model = get_model()
est = ed_models.BayesianModelEstimator(
    model, n_collect=1, n_print_progress=30, max_steps=1500,
    random_state=1, fail_if_not_converged=False,
    inference_fn=inference_fn, n_samples=6, 
    tol=.05 if inference_fn == ed.MAP else 1.,
    #optimizer='adagrad'
)
! rm /tmp/sim/zicount/*
est.set_log_dir('/tmp/sim/zicount')
est = Pipeline([
    #('scale', StandardScaler()),
    ('est', est)
])


#est.fit(X.head(10000).values, y.head(10000).values, est__groups=X.head(10000).index.get_level_values('employer'))
#est.fit(X.values, y.values, est__groups=X.index.get_level_values('employer'))
est.fit(X.values, y.values, est__groups=X.index.get_level_values('employer'))

## Zero-Inflated Continuous

In [None]:
from edward.models import Normal, Gamma, Bernoulli, PointMass, Uniform, Laplace, InverseGamma
from ml.tensorflow.utilities import tf_print
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler, FunctionTransformer

class ZICostModel(ed_models.BayesianModel):
    
    def __init__(self, x_thresh=10., z_thresh=25., family='normal', link_fn=tf.identity, y_scaling=False):
        self.sample_fn_ = None
        self.x_thresh = x_thresh
        self.z_thresh = z_thresh
        assert family in ['normal', 'gamma']
        self.family = family
        self.link_fn = link_fn
        self.group_encoder_ = None
        self.y_scaling = y_scaling
        self.y_scaler_ = None
        
    def inference_args(self, data, groups=None):
        
        tm, lm = {}, {}
        
        # Extract true X and Y values, clipping X values to be
        # <= self.x_thresh standard deviations (crucial for gradient descent to not give nans)
        dX, dY = np.clip(data['X'], -self.x_thresh, self.x_thresh), data['Y']
        tm['dX'] = dX
        dzi = dY < ph_modeling.COST_ZERO_THRESH
        #print(pd.Series(dzi).value_counts())
        dXv, dYv = dX[~dzi, :], dY[~dzi]
        if self.y_scaling:
            #self.y_scaler_ = MinMaxScaler((1., 100.))
            self.y_scaler_ = FunctionTransformer(func=lambda x: x / 10000., inverse_func=lambda x: x * 10000.)
            #print(dYv.shape)
            dYv = self.y_scaler_.transform(np.reshape(dYv, [-1, 1]))[:, 0]
            #print(dYv.shape)
            
        dXz = dX[dzi, :]
        
        #print(pd.Series(dYv).describe())
        
        assert groups is not None
        assert len(groups) == dX.shape[0]
        nG = len(np.unique(groups))
        self.group_encoder_ = LabelEncoder().fit(groups)
        dG = self.group_encoder_.transform(groups).astype(np.int32)
        dGv = self.group_encoder_.transform(groups).astype(np.int32)[~dzi]
        
        # print(pd.Series(dYz).describe(), pd.Series(dYv).describe())
        
        P = dX.shape[1]
        X = tf.constant(dX, dtype=tf.float32)
        Xv = tf.constant(dXv, dtype=tf.float32)
        
        # ##### Weights #### #

        def get_ranef(mu, sigma):
            return Normal(mu, sigma)
        
        def get_fixef(mu, sigma):
            return Laplace(mu, sigma)
        
        # Group coefficients
        Gz = get_ranef(tf.zeros([nG]), 10.*tf.ones([nG]))
        qGz = PointMass(params=tf.Variable(tf.random_normal([nG], stddev=.1)))
        #qGz = PointMass(params=tf.zeros([nG]))
        bGz = tf.gather(Gz, dG)
        lm[Gz], tm['qGz'] = qGz, qGz.params
        tf.summary.histogram('qGz', qGz.params)
        
        Gv = get_ranef(tf.zeros([nG]), 10.*tf.ones([nG]))
        qGv = PointMass(params=tf.Variable(tf.random_normal([nG], stddev=.1)))
        #qGv = PointMass(params=tf.zeros([nG]))
        bGv = tf.gather(Gv, dGv)
        lm[Gv], tm['qGv'] = qGv, qGv.params
        tf.summary.histogram('qGv', qGv.params)
        
        # Zero-value coefficients
        Bz = get_fixef(tf.zeros([P, 1]), 1.*tf.ones([P, 1]))
        qBz = PointMass(params=tf.Variable(tf.random_normal([P, 1], stddev=.1)))
        lm[Bz], tm['qBz'] = qBz, qBz.params
        
        Bz0 = get_fixef(tf.zeros([]), 10.)
        qBz0 = PointMass(params=tf.Variable(tf.random_normal([], stddev=.1)))
        lm[Bz0], tm['qBz0'] = qBz0, qBz0.params
        tf.summary.histogram('qBz0', qBz0.params)
        
        
        # Positive-value coefficients
        Bv = get_fixef(tf.zeros([P, 1]), 1.*tf.ones([P, 1]))
        qBv = PointMass(params=tf.Variable(tf.random_normal([P, 1], stddev=.1)))
        lm[Bv], tm['qBv'] = qBv, qBv.params
        
        Bv0 = get_fixef(tf.zeros([]), 10.)
        qBv0 = PointMass(params=tf.Variable(tf.random_normal([], stddev=.1)))
        lm[Bv0], tm['qBv0'] = qBv0, qBv0.params
        tf.summary.histogram('qBv0', qBv0.params)

        
        # ##### Expectations #### #
        
        # Compute probability of a zero value
        Pz_link = bGz + Bz0 + tf.reshape(tf.matmul(X, Bz), [-1])
        Pz_link = tf.clip_by_value(Pz_link, -self.z_thresh, self.z_thresh)
        
        # Compute expectation of positive values
        Yv_link = bGv + Bv0 + tf.reshape(tf.matmul(Xv, Bv), [-1])
        Yv_mid = self.link_fn(Yv_link)
        #Yv_mid = tf_print(Yv_mid, lambda x: [x.min(), x.max()])

        
        # ##### Error Family Mapping #### #
        
        # Map positivity predictions
        Yz = Bernoulli(p=tf.nn.sigmoid(Pz_link))
        
        # Map non-zero value predictions
        if self.family == 'gamma':
            # Create Gamma scale parameter
            # See here for the rationale behind this parameterization:
            # http://seananderson.ca/2014/04/08/gamma-glms.html
            Ys = Uniform(.001, 100.)
            #Ys = Normal(5., 1.)
            #qYs = PointMass(params=tf.clip_by_value(tf.nn.softplus(tf.Variable(10.)), .001, 100.))
            qYs = PointMass(params=tf.exp(tf.nn.softplus(tf.Variable(1.))))
            #qYs = 5.
            # Yv = Gamma(alpha=Ys * tf.ones_like(Yv_mid), beta=Ys / Yv_mid)
            Yv = Gamma(alpha=Ys * tf.ones_like(Yv_mid), beta=Ys / Yv_mid)
        elif self.family == 'normal':
            # Create Normal variance parameter
            #Ys = InverseGamma(.001, .001)
            Ys = Uniform(0., 100000.)
            qYs = PointMass(params=tf.nn.softplus(tf.Variable(1.)))
            Yv = Normal(mu=Yv_mid, sigma=tf.sqrt(Ys * tf.ones_like(Yv_mid)))
            
        lm[Ys], tm['qYs'] = qYs, qYs.params
        #tm['qYs'] = qYs
        tf.summary.scalar('qYs', tm['qYs'])
        
        def add_sampling_graph():
            X = tf.placeholder(tf.float32, shape=[None, P])
            G = tf.placeholder(tf.int32, shape=[None])
            X_clip = tf.clip_by_value(X, -self.x_thresh, self.x_thresh)
            n = tf.placeholder(tf.int32, shape=[])
            
            Pz_link = tf.gather(tm['qGz'], G) + tm['qBz0'] + tf.reshape(tf.matmul(X_clip, tm['qBz']), [-1])
            Pz_link = tf.clip_by_value(Pz_link, -self.z_thresh, self.z_thresh)
            
            Yv_link = tf.gather(tm['qGv'], G) + tm['qBv0'] + tf.reshape(tf.matmul(X_clip, tm['qBv']), [-1])
            Yv_mid = self.link_fn(Yv_link)
            
            Ynz = Bernoulli(p=1. - tf.nn.sigmoid(Pz_link)).sample(n)
            
            if self.family == 'gamma':
                Yv = Gamma(alpha=tm['qYs']*tf.ones_like(Yv_mid), beta=tm['qYs']/Yv_mid).sample(n)
            elif self.family == 'normal':
                Yv = Normal(mu=Yv_mid, sigma=tf.sqrt(tm['qYs'] * tf.ones_like(Yv_mid))).sample(n)
                
            Y = tf.clip_by_value(tf.multiply(tf.cast(Ynz, tf.float32), Yv), 0., np.inf)
            return n, X, G, Y
        
        def add_prediction_graph():
            X = tf.placeholder(tf.float32, shape=[None, P])
            G = tf.placeholder(tf.int32, shape=[None])
            X_clip = tf.clip_by_value(X, -self.x_thresh, self.x_thresh)
            
            Pz_link = tf.gather(tm['qGz'], G) + tm['qBz0'] + tf.reshape(tf.matmul(X_clip, tm['qBz']), [-1])
            Pz = tf.nn.sigmoid(tf.clip_by_value(Pz_link, -self.z_thresh, self.z_thresh))
            
            Yv_link = tf.gather(tm['qGv'], G) + tm['qBv0'] + tf.reshape(tf.matmul(X_clip, tm['qBv']), [-1])
            Yv_mid = self.link_fn(Yv_link)
            
            Y = tf.clip_by_value(tf.multiply(Yv_mid, (1.-Pz)), 0., np.inf)
            return X, G, Y
        
        
        
        tm['n_samp'], tm['X_samp'], tm['G_samp'], tm['Y_samp'] = add_sampling_graph()
        tm['X_pred'], tm['G_pred'], tm['Y_pred'] = add_prediction_graph()
            
        def input_fn(d):
            return {
                Yz: dzi.astype(np.int64),
                Yv: dYv
            }
        
        return input_fn, lm, tm
        
    def criticism_args(self, sess, tm):
        
        def sample_fn(n, X, G):
            G = self.group_encoder_.transform(G).astype(np.int32)
            y = sess.run(tm['Y_samp'], feed_dict={tm['X_samp']: X, tm['G_samp']: G, tm['n_samp']: n})
            if self.y_scaling:
                y = self.y_scaler_.inverse_transform(y)
            return y
        
        def mean_pred_fn(X, G):
            G = self.group_encoder_.transform(G).astype(np.int32)
            y = sess.run(tm['Y_pred'], feed_dict={tm['X_pred']: X, tm['G_pred']: G}) 
            if self.y_scaling:
                y = self.y_scaler_.inverse_transform(y)
            return y
                        
        return {
            'sample_fn': sample_fn, 
            'mean_pred_fn': mean_pred_fn,
            'pred_fn': mean_pred_fn
        }

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

tf.reset_default_graph()

# Log-scale models
model = ZICostModel(x_thresh=10., z_thresh=10., link_fn=tf.identity, family='normal')

# Original-scale models
#model = ZICostModel(x_thresh=10., z_thresh=10., link_fn=tf.nn.softplus, family='normal', y_scaling=True)
#model = ZICostModel(x_thresh=10., z_thresh=10., link_fn=tf.nn.softplus, family='gamma', y_scaling=True)

est = ed_models.BayesianModelEstimator(
    model, n_collect=1, n_print_progress=30, max_steps=1500,
    random_state=1, fail_if_not_converged=False,
    inference_fn=ed.MAP
)
! rm /tmp/sim/zicost/*
est.set_log_dir('/tmp/sim/zicost')
est = Pipeline([
    ('scale', StandardScaler()),
    ('est', est)
])
est.fit(X.values, y.values, est__groups=X.index.get_level_values('employer'))