Hello, Welcome to the Internal Guide to this Uncertainy Library-- This python notebook should contain everything a man needs to use and alter this package as he sees fit.

Throughout this, we are going to use the same datasets for regression and classification. The regression dataset can be found at https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip and the classifiction dataset can be found at https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz. You will obviously need to get these to continue this notebook

In [9]:
import numpy as np
import tensorflow as tf
import types
from tensorflow.python.keras.layers.ops import core as core_ops
import mdn
from tensorflow.compat.v1.keras import layers
from tensorflow.python.keras import activations
from tensorflow_probability import distributions as tfd
from keras import backend as K
from keras import activations, initializers
import tensorflow_probability as tfp


In [10]:
'Here we make a Regression Dataset'

text_file = open("../../../YearPredictionMSD.txt", "r")
lines = text_file.readlines()
alldata = [[float(x) for x in line.split(',')] for line in lines]
text_file.close()
alldata = np.array(alldata)
Yreg = tf.convert_to_tensor(alldata[:,0], dtype = tf.float32)
Xreg = tf.convert_to_tensor(alldata[:,1:], dtype = tf.float32)
# Yreg = (Yreg - tf.math.reduce_mean(Yreg))/tf.math.reduce_std(Yreg)
# Xreg = (Xreg - tf.math.reduce_mean(Xreg, axis=0, keepdims=True))/tf.math.reduce_std(Xreg, axis=0, keepdims=True)



# Yreg = Yreg/tf.reduce_max(Yreg)

print(Xreg.shape, Yreg.shape)

'Here we make a classification Dataset'


text_file = open("../../../covtype.txt", "r")
lines = text_file.readlines()
alldata = [[float(x) for x in line.split(',')] for line in lines]
text_file.close()
alldata = np.array(alldata)
ImYclass = alldata[:,-1].astype(int)-1
Xclass = (alldata[:,:-1])
Yclass = np.zeros((ImYclass.size, ImYclass.max()+1))
Yclass[np.arange(ImYclass.size), ImYclass] = 1
Yclass = tf.convert_to_tensor(Yclass, dtype = tf.float32)
Xclass = tf.convert_to_tensor(Xclass, dtype = tf.float32)

print(Xclass.shape, Yclass.shape)





(515345, 90) (515345,)
(581012, 54) (581012, 7)


Now we're gonna make two models, very simple bois for regression and classification, respectively

In [11]:
import tensorflow as tf

class RegModel(tf.keras.Model):

    def __init__(self):
        super(RegModel, self).__init__()
        self.dense1 = tf.keras.layers.Dense(60, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(30, activation=tf.nn.relu)
        self.dense3 = tf.keras.layers.Dense(1)

    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        x = self.dense2(x)
        return self.dense3(x)
    
class ClassModel(tf.keras.Model):

    def __init__(self):
        super(ClassModel, self).__init__()
        self.dense1 = tf.keras.layers.Dense(36, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(36, activation=tf.nn.relu)
        self.dense3 = tf.keras.layers.Dense(7, activation = tf.nn.softmax)

    def call(self, inputs, training=False):
        x = self.dense1(inputs)

        x = self.dense2(x)
        return self.dense3(x)
    


You could then train these guys normally: 

In [38]:
regmodel = RegModel()
regmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.MSE)
regmodel.fit(Xreg, Yreg)
print(regmodel.evaluate(Xreg, Yreg))
print(regmodel.predict(Xreg[0:5]))







classmodel = ClassModel()
classmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.CategoricalCrossentropy())
classmodel.fit(Xclass, Yclass)
print(classmodel.evaluate(Xclass, Yclass))
print(classmodel.predict(Xclass[0:5]))



20316.392578125
[[2073.9182]
 [2105.907 ]
 [2041.5868]
 [1959.6626]
 [2106.3499]]
0.8111305832862854
[[5.08685820e-02 9.41301823e-01 2.03644072e-06 1.36389380e-21
  7.75355520e-03 7.40167670e-05 7.17037866e-13]
 [6.34897128e-02 9.28424835e-01 2.27449868e-06 2.12558457e-21
  7.98920821e-03 9.39508463e-05 5.88551885e-13]
 [1.27062500e-01 8.71733904e-01 1.91088861e-06 1.21502717e-14
  1.19192514e-03 8.20808236e-06 1.56824558e-06]
 [2.44414702e-01 7.54212081e-01 3.07503865e-06 2.66788275e-11
  1.33916445e-03 1.12530342e-05 1.96840101e-05]
 [1.06573924e-01 8.87791336e-01 2.09131440e-06 9.73061345e-21
  5.51222684e-03 1.20390410e-04 1.87630163e-12]]


In [39]:


class Ensemble():
    def __init__(self, Model, num_ens=1):
        self.ensemble = [Model() for _ in range(num_ens)]   
    def compile(self, *args, **kwargs):
        for submodel in self.ensemble:
            submodel.compile(*args, **kwargs)        
    def fit(self, *args, **kwargs):
        for submodel in self.ensemble:
            submodel.fit(*args, **kwargs)
    def evaluate(self, *args, **kwargs):
        results = []
        for submodel in self.ensemble:
            test_scores = submodel.evaluate(*args, **kwargs)
            results.append(test_scores)
        if type(results[0]) is tuple:
            return list(zip(*results))
        return results
    def predict(self, *args, return_std = True, **kwargs):
        predictions = [submodel.predict(*args, **kwargs) for submodel in self.ensemble]
        predictions = tf.stack(predictions)
        
        mean_preds = tf.reduce_mean(predictions, axis = 0)

        if not return_std:
            return mean_preds
        
        mean_preds = tf.expand_dims(mean_preds, 1)
        std_preds = tf.math.reduce_std(predictions, axis = 0)
        std_preds = tf.expand_dims(std_preds, 1)

        return tf.concat([mean_preds, std_preds], axis = 1)
    
    


In [None]:
regmodel = Ensemble(RegModel, 2)
regmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.MSE)
regmodel.fit(Xreg, Yreg)
print(regmodel.evaluate(Xreg, Yreg))
print(regmodel.predict(Xreg[0:5]))





classmodel = Ensemble(ClassModel, 2)
classmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.CategoricalCrossentropy())
classmodel.fit(Xclass, Yclass)
print(classmodel.evaluate(Xclass, Yclass))
print(classmodel.predict(Xclass[0:5]))




[17288.18359375, 16119.0126953125]
tf.Tensor(
[[[2031.3912  ]
  [  30.09021 ]]

 [[2107.0083  ]
  [  18.649536]]

 [[2055.0388  ]
  [  15.017639]]

 [[2030.432   ]
  [  11.272217]]

 [[2137.886   ]
  [  35.96997 ]]], shape=(5, 2, 1), dtype=float32)
 2524/18157 [===>..........................] - ETA: 14s - loss: 4.2214

In [None]:


class Dropout():
    def __init__(self, Model, rate, dropout_layers = [tf.keras.layers.Dense]):
        self.model = Model()
        
        def adddropout(denselayer):
            def func(self, inputs, **kwargs):
                x = core_ops.dense(inputs, self.kernel, self.bias, self.activation, dtype=self._compute_dtype_object)
                return tf.nn.dropout(x, noise_shape=None, rate=rate)
            return func
        
        for layer in self.model.layers[:-1]:
            if layer.__class__ in dropout_layers:
                func = adddropout(layer)
                layer.call = types.MethodType(func, layer)
                
    def compile(self, *args, **kwargs):
        self.model.compile(*args, **kwargs)        

    def fit(self, *args, **kwargs):
        self.model.fit(*args, **kwargs)
        
    def evaluate(self, *args, trials = 3, **kwargs):
        results = []
        for _ in range(trials):
            test_scores = self.model.evaluate(*args, **kwargs)
            results.append(test_scores)
        if type(results[0]) is tuple:
            return list(zip(*results))
        return results

    def predict(self, *args, trials = 3, return_std = True, **kwargs):
#         print(*args)
        predictions = [self.model.predict(*args, **kwargs) for _ in range(trials)]
        predictions = tf.stack(predictions)
#         print(predictions.shape)
#         print(predictions)
        mean_preds = tf.reduce_mean(predictions, axis = 0)

        if not return_std:
            return mean_preds
        
        
        std_preds = tf.math.reduce_std(predictions, axis = 0)
        mean_preds = tf.expand_dims(mean_preds, 1)

        std_preds = tf.expand_dims(std_preds, 1)


        return tf.concat([mean_preds, std_preds], axis = 1)



In [None]:
regmodel = Dropout(RegModel, 0.2)
regmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.MSE)
regmodel.fit(Xreg, Yreg)
print(regmodel.evaluate(Xreg, Yreg) )
print(regmodel.predict(Xreg[0:5], trials = 2))







classmodel = Dropout(ClassModel, 0.2)
classmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.CategoricalCrossentropy())
classmodel.fit(Xclass, Yclass)
print(classmodel.evaluate(Xclass, Yclass))
print(classmodel.predict(Xclass[0:5], trials = 2) )






In [None]:
class Self_Predict():
    def __init__(self, Model, Std_Model = None, error_activation = None):
        if Std_Model is None:
            Std_Model = Model
        self.model = Model()
        self.std_model = Std_Model()
        
        self.error_norm = 1
        self.error_activation = error_activation
        
        
        
        layer = self.std_model.layers[-1]
#         print(layer.activation)
        layer.activation = activations.get(error_activation)
#         print(layer.activation)

        
    def compile(self, *args, **kwargs):
        self.model.compile(*args, **kwargs)
        new_kwargs = kwargs
        new_kwargs['loss'] = tf.keras.losses.MSE
        self.std_model.compile(*args, **new_kwargs)

        
        
    def fit(self, *args, **kwargs):
        self.model.fit(*args, **kwargs)       
        preds = self.model.predict(args[0])
        preds = preds.reshape(args[1].shape)
        errors = ((args[1] - preds)**2)**.5       
        new_args = list(args)
        new_args[1] = tf.reshape(errors, args[1].shape)
        new_args = tuple(new_args)
        self.std_model.fit(*new_args, **kwargs)
        
    def evaluate(self, *args, **kwargs):
        return self.model.evaluate( *args, **kwargs)
   
    def predict(self, *args, return_std = True, **kwargs):
        mean_preds = self.model.predict(*args, **kwargs)
        std_preds = self.std_model.predict(*args, **kwargs)    
        mean_preds = tf.expand_dims(mean_preds, 1)
        std_preds = (tf.expand_dims(std_preds, 1) * self.error_norm)

        if not return_std:
            return np.mean(tf.stack(predictions), 0)
        return tf.concat([mean_preds, std_preds], axis = 1)





In [None]:
regmodel = Self_Predict(RegModel)
regmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.MSE)
regmodel.fit(Xreg, Yreg)
print(regmodel.evaluate(Xreg, Yreg)) 
print(regmodel.predict(Xreg[0:5]))






classmodel = Self_Predict(ClassModel)
classmodel.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.CategoricalCrossentropy())
classmodel.fit(Xclass, Yclass)
print(classmodel.evaluate(Xclass, Yclass))
print(classmodel.predict(Xclass[0:5]))






In [4]:

class Gaussian_Mixtures():
    def __init__(self, Model, num_mixtures=1):
        self.model = Model()
        
        
        layer = self.model.layers[-1]
        
        self.output_dim = layer.units
        layer.output_dim = layer.units
        self.num_mix = num_mixtures
        layer.num_mix = num_mixtures
        with tf.name_scope('MDN'):
            layer.mdn_mus = layers.Dense(layer.num_mix * layer.output_dim, name='mdn_mus')  # mix*output vals, no activation
            layer.mdn_sigmas = layers.Dense(self.num_mix * self.output_dim, activation=self.elu_plus_one_plus_epsilon, name='mdn_sigmas')  # mix*output vals exp activation
            layer.mdn_pi = layers.Dense(self.num_mix, name='mdn_pi')  # mix vals, logits
            

            
            
        
        def build(self, input_shape):
            with tf.name_scope('mus'):
                self.mdn_mus.build(input_shape)
            with tf.name_scope('sigmas'):
                self.mdn_sigmas.build(input_shape)
            with tf.name_scope('pis'):
                self.mdn_pi.build(input_shape)

        def call_func(self, x):
            with tf.name_scope('MDN'):
                mdn_out = layers.concatenate([self.mdn_mus(x),
                                              self.mdn_sigmas(x),
                                              self.mdn_pi(x)],
                                             name='mdn_outputs')
            return mdn_out

        
        def compute_output_shape(self, input_shape):
            """Returns output shape, showing the number of mixture parameters."""
            return (input_shape[0], (2 * self.output_dim * self.num_mix) + self.num_mix)
        
        def get_config(self):
            config = {
                "output_dimension": self.output_dim,
                "num_mixtures": self.num_mix
            }
            base_config = super(Dense, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

        layer.build = types.MethodType(build, layer)
        layer.call = types.MethodType(call_func, layer)
        layer._trainable_weights = layer.mdn_mus.trainable_weights + layer.mdn_sigmas.trainable_weights + layer.mdn_pi.trainable_weights
        layer._non_trainable_weights = layer.mdn_mus.non_trainable_weights + layer.mdn_sigmas.non_trainable_weights + layer.mdn_pi.non_trainable_weights
        layer.compute_output_shape = types.MethodType(compute_output_shape, layer)
        layer.get_config = types.MethodType(get_config, layer)
        
        
    def elu_plus_one_plus_epsilon(self, x):
        """ELU activation with a very small addition to help prevent
        NaN in loss."""
        return tf.keras.backend.elu(x) + 1 + .00001

    def compile(self, *args, loss=None, **kwargs):
        if loss is None:
            loss = mdn.get_mixture_loss_func(self.output_dim, self.num_mix)
        kwargs['loss'] = loss          
        self.model.compile(*args, **kwargs)        

    def fit(self, *args, **kwargs):
        self.model.fit(*args, **kwargs)
        
    def evaluate(self, *args, **kwargs):
        return self.model.evaluate( *args, **kwargs)
    
    
    def predict(self, *args, return_std = True, **kwargs):
        all_preds = self.model.predict(*args, **kwargs)
        return self.get_dist(all_preds)

    def get_dist(self, y_pred):
        num_mix = self.num_mix
        output_dim = self.output_dim
        y_pred = tf.reshape(y_pred, [-1, (2 * num_mix * output_dim) + num_mix], name='reshape_ypreds')
        out_mu, out_sigma, out_pi = tf.split(y_pred, num_or_size_splits=[num_mix * output_dim,
                                                                         num_mix * output_dim,
                                                                         num_mix],
                                             axis=1, name='mdn_coef_split')
        cat = tfd.Categorical(logits=out_pi)
        component_splits = [output_dim] * num_mix
        mus = tf.split(out_mu, num_or_size_splits=component_splits, axis=1)
        sigs = tf.split(out_sigma, num_or_size_splits=component_splits, axis=1)
        coll = [tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc, scale
                in zip(mus, sigs)]
        return tfd.Mixture(cat=cat, components=coll)
    
    
    
    
# class Mixture():
#     def __init__(self, y_pred, num_mix, output_dim):
#         self.out_dist = y_pred
#         self.num_mix = num_mix
#         self.output_dim = output_dim
        
#         y_pred = tf.reshape(y_pred, [-1, (2 * num_mix * output_dim) + num_mix], name='reshape_ypreds')
#         out_mu, out_sigma, out_pi = tf.split(y_pred, num_or_size_splits=[num_mix * output_dim,
#                                                                          num_mix * output_dim,
#                                                                          num_mix],
#                                              axis=1, name='mdn_coef_split')
#         cat = tfd.Categorical(logits=out_pi)
#         component_splits = [output_dim] * num_mix
#         mus = tf.split(out_mu, num_or_size_splits=component_splits, axis=1)
#         sigs = tf.split(out_sigma, num_or_size_splits=component_splits, axis=1)
#         coll = [tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc, scale
#                 in zip(mus, sigs)]
#         self.mixture = tfd.Mixture(cat=cat, components=coll)
        
        
#     def sample(self):
#         return self.mixture.sample()
    
#     def mse(self, y_true):
#         samp = self.mixture.sample()
#         y_true = tf.reshape(y_true, [-1, self.output_dim], name='reshape_ytrue')
#         mse = tf.reduce_mean(tf.square(samp - y_true), axis=-1)
#         # Todo: temperature adjustment for sampling functon.
#         return mse
    
#     def log_likelihood(self, y_true):
#         y_true = tf.reshape(y_true, [-1, self.output_dim], name='reshape_ytrue')
#         return self.mixture.log_prob(y_true)







In [28]:
# regmodel = Gaussian_Mixtures(RegModel, 5)
# regmodel.compile(optimizer = tf.keras.optimizers.Adam())
# regmodel.fit(Xreg, Yreg)
# print(regmodel.evaluate(Xreg, Yreg)) 
# regression_dist = regmodel.predict(Xreg[0:5])
# print(regression_dist.sample())






classmodel = Gaussian_Mixtures(ClassModel, 5)
classmodel.compile(optimizer = tf.keras.optimizers.Adam())
classmodel.fit(Xclass[0:5], Yclass[0:5], epochs = 1000, verbose = False)
print(classmodel.evaluate(Xclass[0:5], Yclass[0:5]))
classification_dist = classmodel.predict(Xclass[0:5])
print(classification_dist.sample())


37651701760.0
tf.Tensor(
[[-1.83145058e+00 -7.98284607e+01  4.14751589e-01 -1.38629242e+02
  -4.64119225e+01 -1.76372206e+00  9.31161041e+01]
 [ 2.77067947e+00 -1.41249710e+02  2.63736629e+00 -1.61776779e+02
  -2.12485294e+01  7.69042611e-01 -3.10771141e+01]
 [ 1.40198410e+02 -2.64096985e+02  1.07798442e-01  2.23764465e+02
  -1.29396808e+00 -1.90287679e-01 -7.29819477e-01]
 [-3.75903778e+01 -4.78315491e+02 -1.05292045e-01  3.53546631e+02
   1.30463421e+00  1.88017413e-01  7.29207218e-01]
 [-9.36701477e-01 -5.65424271e+01 -3.07723498e+00 -7.78165665e+01
   9.32977066e+01  1.00567365e+00  8.02884521e+01]], shape=(5, 7), dtype=float32)


In [30]:
print(Yclass[0:5])

tf.Tensor(
[[0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]], shape=(5, 7), dtype=float32)


In [12]:
class SVI():
    def __init__(self, Model, kl_weight=1., prior_sigma_1=1.5, prior_sigma_2=0.1, prior_pi=0.5, SVI_Layers=[tf.keras.layers.Dense], max_var = .8, min_var=.2, normalize = True, task = 'regression', one_hot = False, **kwargs):
                
        self.model = Model()
        
        self.SVI_Layers = SVI_Layers
        
        self.train_std = 0
        
        self.max_var = max_var
        
        self.min_var = min_var
        
        self.one_hot = one_hot
        
        
        self.xmean, self.xstd = 0., 1.
        
        self.ymean, self.ystd = 0., 1.
        
        self.use_normalization = normalize


        def compute_output_shape(self, input_shape):
            return input_shape[0], self.units
        
        self.task = task
        
        if task == 'regression':
            last_layer = self.model.layers[-1]
            self.dim = last_layer.units
            last_layer.units = 2 * last_layer.units
            
            
        if task == 'classification':
            self.dim = self.model.layers[-1].units


        def kl_loss(self, w, mu, sigma):
            variational_dist = tfp.distributions.Normal(mu, sigma)
            return self.kl_weight * K.sum(variational_dist.log_prob(w) - self.log_prior_prob(w))


            
        def build(self, input_shape):
            print("WE BUILD")
            self.kernel_mu = self.add_weight(name='kernel_mu',
                                             shape=(input_shape[1], self.units),
                                             initializer=initializers.RandomNormal(stddev=self.init_sigma),
                                             trainable=True)
            self.bias_mu = self.add_weight(name='bias_mu',
                                           shape=(self.units,),
                                           initializer=initializers.RandomNormal(stddev=self.init_sigma),
                                           trainable=True)
            self.kernel_rho = self.add_weight(name='kernel_rho',
                                              shape=(input_shape[1], self.units),
                                              initializer=initializers.Constant(0.0),
                                              trainable=True)
            self.bias_rho = self.add_weight(name='bias_rho',
                                            shape=(self.units,),
                                            initializer=initializers.Constant(0.0),
                                            trainable=True)
            self._trainable_weights = [self.kernel_mu, self.bias_mu, self.kernel_rho, self.bias_rho]# 
                
                


            
        def call(self, inputs, **kwargs):
            
            if self.built == False:
                self.build(inputs.shape)
                self.built = True
            
            
            kernel_sigma = tf.math.softplus(self.kernel_rho)
            kernel = self.kernel_mu + kernel_sigma * tf.random.normal(self.kernel_mu.shape)

            bias_sigma = tf.math.softplus(self.bias_rho)
            bias = self.bias_mu + bias_sigma * tf.random.normal(self.bias_mu.shape)

            self.add_loss(self.kl_loss(kernel, self.kernel_mu, kernel_sigma) +
                          self.kl_loss(bias, self.bias_mu, bias_sigma))

            return self.activation(K.dot(inputs, kernel) + bias)


        def log_prior_prob(self, w):
            comp_1_dist = tfp.distributions.Normal(0.0, self.prior_sigma_1)
            comp_2_dist = tfp.distributions.Normal(0.0, self.prior_sigma_2)
            return K.log(self.prior_pi_1 * comp_1_dist.prob(w) +
                         self.prior_pi_2 * comp_2_dist.prob(w))


        
        for layer in self.model.layers:
            if layer.__class__ in SVI_Layers: 
                print("HELLO")
                layer.kl_weight = kl_weight
                print(layer.kl_weight)
                layer.prior_sigma_1 = prior_sigma_1
                layer.prior_sigma_2 = prior_sigma_2
                layer.prior_pi_1 = prior_pi
                layer.prior_pi_2 = 1.0 - prior_pi
                layer.init_sigma = np.sqrt(layer.prior_pi_1 * layer.prior_sigma_1 ** 2 +
                                          layer.prior_pi_2 * layer.prior_sigma_2 ** 2)

                layer.compute_output_shape = types.MethodType(compute_output_shape, layer)
                layer.build = types.MethodType(build, layer)
                layer.call = types.MethodType(call, layer)
                layer.kl_loss = types.MethodType(kl_loss, layer)
                layer.log_prior_prob = types.MethodType(log_prior_prob, layer)
                layer.built = False
                
                
                
    def fit_normalize(self, X):
        mean = tf.math.reduce_mean(X, axis=0, keepdims=True)
        std = tf.math.reduce_std(X, axis=0, keepdims=True)
        return (X - mean)/std, mean, std
    
    def unnormalize(self, Y):
        return (Y*self.ystd) + self.ymean
    
    def Y_normalize(self, Y):
        return (Y - self.ymean)/self.ystd
    
    def std_unnormalize(self, Y):
        return Y * self.ystd
    
    def normalize(self, X):
        return (X - self.xmean)/self.xstd
        
        

    def compile(self, *args, loss=None, **kwargs):
        if loss is None:
            loss = self.neg_log_likelihood
        kwargs['loss'] = loss          
        self.model.compile(*args, **kwargs)   
        
        
        
    def elu_plus_one_plus_epsilon(self, x):
        """ELU activation with a very small addition to help prevent
        NaN in loss."""
        return tf.keras.backend.elu(x) + 1 + .00001


        
    def neg_log_likelihood(self, y_obs, y_pred):
        if self.task == 'regression':
            y_means, y_stds = tf.split(y_pred, [self.dim, self.dim], axis = 1)
            if self.train_std == 1:
                y_stds = ((self.max_var - .1) * tf.math.sigmoid(y_stds)) + .1
            else:
                y_stds = tf.constant(1.0)
            dist = tfp.distributions.Normal(loc=y_means, scale=y_stds )
            return K.sum(-dist.log_prob(tf.dtypes.cast(y_obs, tf.int32)))
        
        
        if self.task == 'classification':
            y_pred = tf.reshape(y_pred, [-1, self.dim])
            y_obs = tf.reshape(y_obs, [-1, self.dim, 1])
            dist = tfp.distributions.Categorical(logits = y_pred)
#             return tf.reduce_sum(y_pred)
            return K.sum(-dist.log_prob(tf.dtypes.cast(y_obs, tf.int32)))




    def evaluate(self, *args, **kwargs):
        if self.use_normalization is True:
            args = list(args)
            args[0] = self.normalize(args[0])
            if self.task == 'regression':
                args[1] = self.Y_normalize(args[1])
            args = tuple(args)
        return self.model.evaluate( *args, **kwargs)
    
    
    def fit(self, *args, **kwargs):
        if self.use_normalization is True:
            args = list(args)
            args[0], self.xmean, self.xstd = self.fit_normalize(args[0])
            if self.task == 'regression':
                args[1], self.ymean, self.ystd = self.fit_normalize(args[1])
            args = tuple(args)
        
        if 'batch_size' not in kwargs.keys():
            kwargs['batch_size'] = 32
        
        for layer in self.model.layers:
            if layer.__class__ in self.SVI_Layers:            
                layer.kl_weight = kwargs['batch_size'] * layer.kl_weight/args[0].shape[0]
                
        if (self.one_hot == False) and self.task == 'classification':
            args = list(args)
            args[1] = tf.one_hot(y_obs, tf.dtypes.cast(tf.reduce_max(y_obs), tf.int32))
            args = tuple(args)


                
                

        self.model.fit(*args, **kwargs)
        
        self.train_std = 1
        
        self.model.fit(*args, **kwargs)
        
        self.train_std = 0
        
        for layer in self.model.layers:
            if layer.__class__ in self.SVI_Layers:            
                layer.kl_weight =  layer.kl_weight*args[0].shape[0]/kwargs['batch_size']



    
    def predict(self, *args, return_std = True, **kwargs):
        if self.use_normalization is True:
            args = list(args)
            args[0] = self.normalize(args[0])
            args = tuple(args)
        y_pred = self.model.predict(*args, **kwargs)
        
        if self.task == 'regression':
            y_means, y_stds = tf.split(y_pred, [self.dim, self.dim], axis = 1)
            if self.use_normalization is True:
                y_means = self.unnormalize(y_means)
                y_stds = self.std_unnormalize(tf.exp(y_stds))
            dist = tfp.distributions.Normal(loc=y_means, scale=y_stds)
            return dist
        
        if self.task == 'classification':
            y_pred = tf.reshape(y_pred,  [-1, self.dim])
            dist = tfp.distributions.Categorical(logits= y_pred)
            return dist

In [13]:
# regmodel = SVI(RegModel, 1)
# regmodel.compile(optimizer = tf.keras.optimizers.Adam(lr=0.001))
# regmodel.fit(Xreg, Yreg)
# print(regmodel.evaluate(Xreg, Yreg)) 
# regression_dist = regmodel.predict(Xreg[0:5])
# print(regression_dist.sample())






classmodel = SVI(ClassModel, 1, task = 'classification', one_hot = True)
classmodel.compile(optimizer = tf.keras.optimizers.Adam())
classmodel.fit(Xclass, Yclass)
print(classmodel.evaluate(Xclass, Yclass))
classification_dist = classmodel.predict(Xclass[0:5])
print(classification_dist.sample())



HELLO
1
HELLO
1
HELLO
1
WE BUILD
WE BUILD
WE BUILD

KeyboardInterrupt: 

In [None]:
print(Xreg.shape)

In [12]:
515345/15625

32.98208

In [14]:
a = tf.random.normal([3,3])
print(a)

tf.Tensor(
[[ 1.1709458  -1.0055563   0.60173005]
 [ 1.1605917  -0.58635294  0.7802697 ]
 [-0.11709109 -0.33026427  0.42474884]], shape=(3, 3), dtype=float32)


In [17]:
print(tf.reduce_mean(tf.clip_by_value((a), 1., 1.)))

tf.Tensor(1.0, shape=(), dtype=float32)


In [18]:
print(tf.constant(1.))

tf.Tensor(1.0, shape=(), dtype=float32)


In [24]:
dim = 7
y_pred = tf.ones(7*32)
y_obs = tf.ones(7*32)

y_pred = tf.reshape(y_pred, [-1, dim])
y_obs = tf.reshape(y_obs, [-1, dim, 1])
dist = tfp.distributions.Categorical(logits = y_pred)
print(K.sum(-dist.log_prob(y_obs)))



tf.Tensor(13948.345, shape=(), dtype=float32)
