In [1]:
%matplotlib inline
import re
import nltk
import string
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from joblib import Parallel, delayed
from tqdm.notebook import tqdm as tqdm
from tqdm.notebook import trange
import contextlib
import joblib
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import edward2 as ed
import tensorflow as tf
from scipy.special import digamma
from pickle import dump, load
from scipy.sparse import csr_matrix
import tensorflow_probability as tfp
from gensim.corpora.dictionary import Dictionary
from gensim.models import CoherenceModel
import os
import sys
import time

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
# tensorflow does not work with new numpy versions
assert np.__version__  < '1.20'





In [2]:
tqdm.pandas()

In [3]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/iron/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
# data_proc = pd.read_pickle('data_proc.pkl')
# data_enc = pd.read_pickle('data.pkl')
# with open('word_to_idx.pkl', 'rb') as f:
#     words_to_idx = load(f)

In [5]:
seed = 42
data = pd.read_csv('nips-papers/papers.csv')
data = data[['paper_text']]
data_n = data.sample(n=100, random_state=42)

In [6]:
# data_n.head()

In [7]:
def func(text):
    text = text.lower()
    text = re.sub(r'(\d+)', '', text)
    text = re.sub(r'(\n)|(\t)', ' ', text)
    text = text.translate({ ord(c): None for c in string.punctuation })
    text = text.strip()
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(text)
    text = [i for i in tokens if not i in stop_words and len(i) > 1]
    text = [lemmatizer.lemmatize(word) for word in text]
    return text

In [8]:
data_proc = data_n['paper_text'].progress_apply(func)

  0%|          | 0/100 [00:00<?, ?it/s]

In [9]:
data_proc.head()

509     [independent, component, analysis, identificat...
2576    [nearmaximum, entropy, model, binary, neural, ...
6362    [nearestneighbor, sample, compression, efficie...
6173    [efficient, highorder, interactionaware, featu...
6552    [multioutput, polynomial, network, factorizati...
Name: paper_text, dtype: object

In [10]:
# words_to_idx = {}
# idx_to_word = {}
# def encode(text, words_to_idx, idx_to_word):
#     text_enc = np.empty(len(text), dtype=int)
#     for i, word in enumerate(text):
#         if word not in words_to_idx:
#             idx = words_to_idx[word] = len(words_to_idx)
#             idx_to_word[idx] = word
#         else:
#             idx = words_to_idx[word]
#         text_enc[i] = idx
#     return text_enc
# data_enc = data_proc.progress_apply(encode, args=(words_to_idx, idx_to_word))

In [11]:
def encode2(text, word_dict):
    return np.asarray(word_dict.doc2idx(text))

word_dict = Dictionary(data_proc)
data_enc = data_proc.progress_apply(lambda x: encode2(x, word_dict))

  0%|          | 0/100 [00:00<?, ?it/s]

In [12]:
# data_proc.to_pickle('data_proc.pkl')
# data_enc.to_pickle('data.pkl')

# with open('word_to_idx.pkl', 'wb') as f:
#     dump(words_to_idx, f)

In [13]:
K = 5
D = len(data_enc)
Ns = data_enc.apply(lambda x: len(x)).to_numpy().astype(np.int)
N = Ns.sum()
V = len(word_dict)

In [14]:
print(K, D, Ns, N, V)

5 100 [1473 2453 3310 3107 3017 1621 1727 1618 3434 1774 1454 1339 2501 1669
 1785 1426 1956 2017 2192 1927 2614 2839 1642 2324 1934 3103 3264 2957
 2776 1736 3298 2756 1244 1501 1855 1112 1979 2687 1972 2950 2946 2881
 2925 2306 2767 3010 1629 1468 2904 1105 1847 1282 1463 2645 2189 3190
 1907 1033 2197 1437 3537 2117 1484 2646 2227 2141 3498 1673 2113 2894
 2744 1479 1917 3173 2566 2640 2765 2451 2814 2804 1580 1656 1806 2439
 2567 2834 2827 2455 2306 1458 3056 2524 1648 2298 1153 3384 2860 3158
 2646 1954] 227766 18411


In [15]:
def create_sparse(data, alpha, D, K, V):
    Ns = np.empty(D, dtype=np.int)
     
    for i, doc in enumerate(data):
        Ns[i] = len(doc)
    
    N = Ns.sum()
    rows = np.empty(N, dtype=np.int64)
    cols = np.empty(N, dtype=np.int64)
    v_cols = np.empty(N, dtype=np.int64)
    
    last_idx = 0
    
    for i, doc in tqdm(enumerate(data), total=D):
        n = len(doc)
        rows[last_idx:last_idx+n] = i
        cols[last_idx:last_idx+n] = np.arange(n, dtype=np.int64)
        v_cols[last_idx:last_idx+n] = doc
        last_idx += n
        
    N_max = Ns.max()
    phi_rows = np.repeat(rows, K)
    phi_cols = np.repeat(cols, K)
    phi3 = np.tile(np.arange(K), len(rows))  
    phi_indices = np.vstack((phi_rows, phi_cols, phi3)).T
    phi = tf.sparse.SparseTensor(phi_indices, values=tf.fill((len(phi_indices),), 1/K), dense_shape=(D, N_max, K))
    phi = tf.sparse.reorder(phi)
    gamma_v_cols = np.repeat(v_cols, K)
    gamma = tf.expand_dims(alpha, 0) + (Ns / K).reshape(-1, 1)

    beta_indices = np.vstack((phi3, gamma_v_cols)).T
    gamma_indices = np.vstack((phi_rows, phi3)).T
    w_indices = np.vstack((phi_rows, phi_cols, phi3, gamma_v_cols)).T
    tmp = tf.sparse.SparseTensor(w_indices, phi.values, dense_shape=(D, N_max, K, V))
    tmp = tf.sparse.reorder(tmp)
    w_indices = tmp.indices
    lmbd = tf.fill((K, V), 1/V)
    return phi, gamma, lmbd, tf.constant(beta_indices, dtype=tf.int64), tf.constant(gamma_indices, dtype=tf.int64), tf.constant(w_indices, dtype=tf.int64), N_max

In [None]:
# alpha_n = np.random.rand(K).astype(np.float32)
# eta_n = np.random.rand(1).astype(np.float32)
# beta_n = np.random.dirichlet(np.random.rand(V).astype(np.float32), size=K).astype(np.float32)
# phi_n = [np.full((n, K), 1/K).astype(np.float32) for n in Ns]
# gamma_n = alpha_n.reshape(1, -1) + Ns.reshape(-1, 1) / K
# gammma_n = gamma_n.astype(np.float32)
# lmbd_n = np.full((K, V), eta_n)
# lmbd_n = lmbd_n.astype(np.float32)

In [16]:
# It tries to estimate also smoothed params, but eta tends to go negative, breaking all the stuff.
# Maybe loss is incorrect

class Positive(tf.keras.constraints.Constraint):
    def __call__(self, w):
        return w * tf.cast(tf.math.greater(w, 0.), w.dtype)   
    
@tf.function
def calc_elbo(alpha, beta, eta, phi, gamma, lmbd, w_indices, D, K, N_max, V):
    digamma = tf.math.digamma(gamma) - tf.math.digamma(tf.math.reduce_sum(gamma, axis=1, keepdims=True))
    ta = tf.math.lgamma(tf.math.reduce_sum(alpha)) - \
                                          tf.math.reduce_sum(tf.math.lgamma(alpha)) + \
                                          tf.math.reduce_sum(tf.expand_dims(alpha - 1, 0)*digamma, axis=1)
    zt = tf.sparse.reduce_sum(tf.expand_dims(digamma, 1)*phi, axis=(1, 2))
    wzb = tf.math.reduce_sum(tf.expand_dims(tf.math.log1p(beta), axis=0) * tf.sparse.reduce_sum(tf.SparseTensor(w_indices, phi.values, dense_shape=(D, N_max, K, V)), axis=(1)), axis=(1, 2))
    bl = tf.math.reduce_sum((eta-1)*tf.math.reduce_sum(tf.math.digamma(lmbd) - tf.math.digamma(tf.math.reduce_sum(lmbd, axis=1, keepdims=True)), axis=1) + \
         tf.math.lgamma(eta*V) - V*tf.math.lgamma(eta))
    qt = -tf.math.lgamma(tf.math.reduce_sum(gamma, axis=1)) + tf.math.reduce_sum(tf.math.lgamma(gamma), axis=1) - tf.math.reduce_sum((gamma-1)*digamma, axis=1)
    phi_log_phi = tf.sparse.SparseTensor(phi.indices, phi.values*tf.math.log1p(phi.values), dense_shape=[D, N_max, K])
    qz = tf.sparse.reduce_sum(phi_log_phi, axis=(1, 2))
    elbo = tf.math.reduce_sum(ta + zt + qt + qz)
    return elbo

@tf.function
def e_step_it(alpha, beta_mod, eta, phi, gamma, lmbd, gamma_indices, w_indices, D, K, N_max, V):
    # phi
    dg = tf.math.exp(tf.math.digamma(gamma) - tf.math.digamma(tf.math.reduce_sum(gamma, axis=1, keepdims=True)))
    phi = tf.sparse.SparseTensor(phi.indices, beta_mod*tf.gather_nd(dg, gamma_indices), dense_shape=[D, N_max, K])
    phi /= tf.sparse.reduce_sum(phi, axis=2, keepdims=True) + 1e-5
    # gamma
    gamma = tf.expand_dims(alpha, 0) + tf.sparse.reduce_sum(phi, axis=1)
    # lambda
    lmbd = eta + tf.sparse.reduce_sum(tf.SparseTensor(w_indices, phi.values, dense_shape=(D, N_max, K, V)), axis=(0, 1))
    
    gamma.set_shape((D, K))
    lmbd.set_shape((K, V))
    return phi, gamma, lmbd

@tf.function
def e_step(alpha, beta, eta, phi, gamma, lmbd, beta_indices, gamma_indices, w_indices, D, K, N_max, V, max_it=1000, rtol=1e-03, atol=1e-03):      
    beta_mod = tf.gather_nd(beta, beta_indices)

    np_isclose = lambda elbo_old, elbo: np.allclose(elbo_old, elbo, rtol=rtol, atol=atol)
    tf_cond = lambda i, elbo_old, elbo, args: tf.logical_and(i < max_it, tf.logical_or(i == 0, tf.logical_not(tf.numpy_function(np_isclose, [elbo_old, elbo], tf.bool))))
    
    @tf.function
    def tf_body(i, elbo_old, elbo, args):
        phi, gamma, lmbd = args
        i = i + 1
        elbo_old = tf.identity(elbo)
        phi, gamma, lmbd = e_step_it(alpha, beta_mod, eta, phi, gamma, lmbd, gamma_indices, w_indices, D, K, N_max, V)
        elbo = calc_elbo(alpha, beta, eta, phi, gamma, lmbd, w_indices, D, K, N_max, V)
        args = (phi, gamma, lmbd)
        return (i, elbo_old, elbo, args)
    
    i = tf.constant(0, name='e_loop_counter')
    elbo = tf.constant(np.inf, dtype=tf.float32)
    elbo_old = tf.identity(elbo)
    args = (phi, gamma, lmbd)
    
    i, _, elbo, args = tf.while_loop(tf_cond, tf_body, [i, elbo_old, elbo, args], parallel_iterations=1)
    phi, gamma, lmbd = args
    return phi, gamma, lmbd

@tf.function
def m_step(alpha, alpha_2, beta, eta, phi, gamma, lmbd, w_indices, D, K, N_max, V, opt, max_it=1000, rtol=1e-03, atol=1e-03):
    # beta
    beta = tf.sparse.reduce_sum(tf.SparseTensor(w_indices, phi.values, dense_shape=(D, N_max, K, V)), axis=(0, 1))
    beta /= tf.math.reduce_sum(beta, axis=1, keepdims=True) + 1e-5
    # alpha
    digamma = tf.math.reduce_sum(tf.math.digamma(gamma) - tf.math.digamma(tf.math.reduce_sum(gamma, axis=1, keepdims=True)), axis=0)
    
    @tf.function
    def alpha_elbo(alpha, digamma):
        val = tf.math.reduce_sum(tf.math.lgamma(tf.math.reduce_sum(alpha)) - \
                                          tf.math.reduce_sum(tf.math.lgamma(alpha)) + \
                                          tf.math.reduce_sum(tf.expand_dims(alpha - 1, 0)*digamma, axis=1))
        return val
    
    alpha_it_cond = lambda i, digamma: i < K
    
    np_isclose = lambda elbo_old, elbo: np.allclose(elbo_old, elbo, rtol=rtol, atol=atol)
    alpha_cond = lambda i, elbo_old, elbo, digamma: tf.logical_and(i < max_it, tf.logical_or(i == 0, tf.logical_not(tf.numpy_function(np_isclose, [elbo_old, elbo], tf.bool))))
    
    @tf.function
    def alpha_it_body(i, digamma):        
        def loss_func():
            ta = tf.TensorArray(alpha.dtype, size=K, dynamic_size=False)
            alpha_copy = ta.unstack(alpha_2)
            alpha_copy = alpha_copy.write(i, tf.math.maximum(alpha[i], 1e-5))
            alpha_copy = alpha_copy.stack()
            alpha.assign(alpha_copy)
            alpha_2.assign(alpha_copy)
            a = tf.math.reduce_sum((alpha - 1)*digamma)
            res = -(D*(tf.math.lgamma(tf.math.reduce_sum(alpha)) - tf.math.reduce_sum(tf.math.lgamma(alpha))) + a)
            return res
        losses = tfp.math.minimize(loss_func, max_it, opt, convergence_criterion=tfp.optimizer.convergence_criteria.LossNotDecreasing(rtol=rtol, atol=atol), 
                                   trainable_variables=[alpha])
        
        ta = tf.TensorArray(alpha.dtype, size=K, dynamic_size=False)
        alpha_copy = ta.unstack(alpha_2)
        alpha_copy = alpha_copy.write(i, tf.math.maximum(alpha[i], 1e-5))
        alpha_copy = alpha_copy.stack()
        alpha.assign(alpha_copy)
        
        return i + 1, digamma
    
    @tf.function
    def alpha_body(i, elbo_old, elbo, digamma):
        i = i + 1
        j = tf.constant(0)
        elbo_old = tf.identity(elbo)
        j, _ = tf.while_loop(alpha_it_cond, alpha_it_body, [j, digamma], parallel_iterations=1)
#         tf.print(alpha, output_stream=sys.stdout)
        elbo = alpha_elbo(alpha, digamma)
        return (i, elbo_old, elbo, digamma)
    
    i = tf.constant(0)
    elbo = tf.constant(np.inf, dtype=tf.float32)
    elbo_old = tf.identity(elbo)
    i, _, elbo, _ = tf.while_loop(alpha_cond, alpha_body, [i, elbo_old, elbo, digamma], parallel_iterations=1)
    
    di_lmbd = tf.math.reduce_sum(tf.math.digamma(lmbd) - tf.math.digamma(tf.math.reduce_sum(lmbd, axis=1, keepdims=True))) 
    # eta
    def loss_func():
        loss_v = -((eta-1)*di_lmbd + K*(tf.math.lgamma(eta*V) - V*tf.math.lgamma(eta)))
        return loss_v
    
    losses = tfp.math.minimize(loss_func, max_it, opt, convergence_criterion=tfp.optimizer.convergence_criteria.LossNotDecreasing(rtol=rtol, atol=atol), trainable_variables=[eta])
    eta.assign(tf.math.maximum(eta, 0.1))
    return beta

def train(data, D, K, V, alpha=None, eta=None, max_it=1000, seed=42, rtol=1e-3, atol=1e-3):
    """
    alpha
        The alpha prior. Must be a single value
    eta
        The eta prior. Must be a single value
    """
    start = time.time()
    if alpha:
        alpha = tf.zeros((K,)) + alpha
    else:
        alpha = tf.random.uniform((K,))
    
    if eta:
        eta = tf.zeros((1,)) + eta
    else:
        eta = tf.random.uniform((1,))
        
    beta = tf.convert_to_tensor(np.random.dirichlet(np.zeros((V,)) + eta, size=K), dtype=tf.float32)

    opt = tf.optimizers.Adam(1e-3)
    
    phi, gamma, lmbd, beta_indices, gamma_indices, w_indices, N_max = create_sparse(data, alpha, D, K, V)
    N_max = N_max.item()
    
    alpha = tf.Variable(alpha, constraint=Positive())
    alpha_2 = tf.Variable(tf.identity(alpha), trainable=False)
    eta = tf.Variable(eta, constraint=Positive())

    np_isclose = lambda elbo_old, elbo: np.allclose(elbo_old, elbo, rtol=1e-3, atol=1e-3)
    train_cond = lambda i, elbo_old, elbo, args: tf.logical_and(i < max_it, tf.logical_or(i == 0, tf.logical_not(tf.numpy_function(np_isclose, [elbo_old, elbo], tf.bool))))
    
    @tf.function
    def train_body(i, elbo_old, elbo, args):
        i = i + 1
        beta, phi, gamma, lmbd = args
        elbo_old = elbo
        tf.print(i, ': E step', output_stream=sys.stdout, end='\t')
        phi, gamma, lmbd = e_step(alpha, beta, eta, phi, gamma, lmbd, beta_indices, gamma_indices, w_indices, D, K, N_max, V, atol=atol, rtol=rtol)
        tf.print('M step', output_stream=sys.stdout, end='\t')
        beta = m_step(alpha, alpha_2, beta, eta, phi, gamma, lmbd, w_indices, D, K, N_max, V, opt, atol=atol, rtol=rtol)
        elbo = calc_elbo(alpha, beta, eta, phi, gamma, lmbd, w_indices, D, K, N_max, V)
        tf.print('ELBO =', elbo, output_stream=sys.stdout)
        args = (beta, phi, gamma, lmbd)
        return i, elbo_old, elbo, args
    
    i = tf.constant(0)
    args = (beta, phi, gamma, lmbd)
    elbo = tf.constant(np.inf, dtype=tf.float32)
    elbo_old = tf.identity(elbo)
    print('Preparing...')
    i, elbo_old, elbo, args = tf.while_loop(train_cond, train_body, [i, elbo_old, elbo, args], parallel_iterations=1)
    beta, phi, gamma, lmbd = args
    tf.print('Converged in', i,  'iterations', output_stream=sys.stdout)
    end = time.time()
    print('Time:', end-start, 's')
    return alpha, beta, eta, phi, gamma, lmbd, elbo

alpha, beta, eta, phi, gamma, lmbd, elbo = train(data_enc, D, K, V)

  0%|          | 0/100 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = -233279.766
2 : E step	M step	ELBO = -236907.844
3 : E step	M step	ELBO = -197734.438
4 : E step	M step	ELBO = -151478.844
5 : E step	M step	ELBO = -109258.789
6 : E step	M step	ELBO = -76375.2578
7 : E step	M step	ELBO = -49924.5977
8 : E step	M step	ELBO = -30471.8262
9 : E step	M step	ELBO = -15996.1758
10 : E step	M step	ELBO = -3363.90479
11 : E step	M step	ELBO = 8155.35498
12 : E step	M step	ELBO = 20044.6816
13 : E step	M step	ELBO = 29865.5039
14 : E step	M step	ELBO = 37720.6133
15 : E step	M step	ELBO = 45035.4297
16 : E step	M step	ELBO = 52159.8203
17 : E step	M step	ELBO = 58456.8672
18 : E step	M step	ELBO = 63638.8086
19 : E step	M step	ELBO = 67640.2344
20 : E step	M step	ELBO = 70649.4375
21 : E step	M step	ELBO = 72807.2891
22 : E step	M step	ELBO = 74468.4688
23 : E step	M step	ELBO = 75988.7109
24 : E step	M step	ELBO = 77478.2109
25 : E step	M step	ELBO = 78874.4531
26 : E step	M step	ELBO = 80053.2344
27 : E step	M step	ELBO 

In [None]:
# # Generate doc
# theta_d = ed.Dirichlet(alpha)
# for i in range(100):
#     z_dn = ed.Categorical(probs=theta_d)
#     w_dn = ed.Categorical(probs=beta[z_dn])
#     print(word_dict[w_dn.numpy()])


In [17]:
# Get topic top words
def get_topics(beta, word_dict, k):
    topics = []
    weights = []
    for h in range(k):
        top_beta = tf.math.top_k(beta[h], k=10)
        topic = [word_dict[i] for i in top_beta[1].numpy()] 
        weight = list(top_beta[0].numpy())
    #     print(([idx_to_word[i] for i in tf.math.top_k(beta[h], k=10)[1].numpy()]))
        topics.append(topic)
        weights.append(weight)

    return (np.asarray(topics), np.asarray(weights))

topics, weights = get_topics(beta, word_dict, K)
topics

array([['algorithm', 'data', 'graph', 'problem', 'learning', 'method',
        'function', 'set', 'matrix', 'model'],
       ['feature', 'game', 'image', 'algorithm', 'set', 'strategy',
        'player', 'action', 'method', 'model'],
       ['function', 'model', 'distribution', 'data', 'learning',
        'algorithm', 'probability', 'sample', 'set', 'problem'],
       ['neuron', 'model', 'neural', 'network', 'input', 'figure',
        'weight', 'cell', 'learning', 'signal'],
       ['network', 'model', 'neural', 'data', 'learning', 'training',
        'parameter', 'number', 'using', 'set']], dtype='<U12')

In [18]:
def get_coherence(topics, text_data, word_dict, coherence_type='c_v'):
    coherence_model_lda = CoherenceModel(topics=topics, texts=text_data, dictionary=word_dict, coherence='c_v')
    return coherence_model_lda.get_coherence()

coherence = get_coherence(topics, data_proc, word_dict, 'c_v')
coherence

0.4213764018817706

Parameter exploration loop

In [None]:
Ns = [25, 50]
Ks = range(1, 15, 3)
etas = list(np.arange(0.01, 1., 0.3)) + [5, 10]
alphas = list(np.arange(0.01, 1., 0.3)) + [5, 10]

params = []
for n in Ns:
    n_params = []
    for k in Ks:
        for a in alphas:
            for e in etas:
                n_params.append((k, a, e))
    params.append((n, n_params))

results = pd.DataFrame(columns=["N", "K", "alpha", "eta", "elbo", "coherence", "topics", "weights"])

i = 0
for n, n_params in tqdm(params):
    data_n = data.sample(n=n, random_state=42)
    data_proc = data_n['paper_text'].progress_apply(func)
    word_dict = Dictionary(data_proc)
    data_enc = data_proc.progress_apply(lambda x: encode2(x, word_dict))
    V = len(word_dict)
    
    for k, a, e in tqdm(n_params):
        print(n, k, a, e)
        _, beta, _, _, _, _, elbo = train(data_enc, n, k, V, alpha=a, eta=e)
        topics, weights = get_topics(beta, word_dict, k=k)
        coherence = get_coherence(topics, data_proc, word_dict, 'c_v')
        str_topics = repr([list(topic) for topic in topics])
        str_weights = repr([list(weight) for weight in weights])
        results.loc[i] = n, k, a, e, elbo.numpy(), coherence, str_topics, str_weights
        i += 1

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/180 [00:00<?, ?it/s]

25 1 0.01 0.01


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 2374.34375
2 : E step	M step	ELBO = 4595.79443
3 : E step	M step	ELBO = 6393.53564
4 : E step	M step	ELBO = 8461.25293
5 : E step	M step	ELBO = 9621.52734
6 : E step	M step	ELBO = 10573.9248
7 : E step	M step	ELBO = 11562.6641
8 : E step	M step	ELBO = 12411.3779
9 : E step	M step	ELBO = 13157.124
10 : E step	M step	ELBO = 13822.8564
11 : E step	M step	ELBO = 14561.5361
12 : E step	M step	ELBO = 15453.2236
13 : E step	M step	ELBO = 16127.4395
14 : E step	M step	ELBO = 16666.7754
15 : E step	M step	ELBO = 17048.916
16 : E step	M step	ELBO = 17351.0273
17 : E step	M step	ELBO = 17585.3516
18 : E step	M step	ELBO = 17793.9785
19 : E step	M step	ELBO = 17947.2051
20 : E step	M step	ELBO = 18078.2852
21 : E step	M step	ELBO = 18192.1328
22 : E step	M step	ELBO = 18266.1367
23 : E step	M step	ELBO = 18333.0215
24 : E step	M step	ELBO = 18387.4297
25 : E step	M step	ELBO = 18431.9941
26 : E step	M step	ELBO = 18475.3145
27 : E step	M step	ELBO = 18515.3145

  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 19258.4512
2 : E step	M step	ELBO = 29008.25
3 : E step	M step	ELBO = 31536.9219
4 : E step	M step	ELBO = 32248.291
5 : E step	M step	ELBO = 32434.5
6 : E step	M step	ELBO = 32540.3945
7 : E step	M step	ELBO = 32616.9805
8 : E step	M step	ELBO = 32645.8047
Converged in 8 iterations
Time: 2.112272024154663 s
25 1 0.01 0.61


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 24415.7383
2 : E step	M step	ELBO = 32267.4453
3 : E step	M step	ELBO = 32681.959
4 : E step	M step	ELBO = 32689.9824
Converged in 4 iterations
Time: 1.587148904800415 s
25 1 0.01 0.9099999999999999


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 26478.0918
2 : E step	M step	ELBO = 32946.6641
3 : E step	M step	ELBO = 32824.293
4 : E step	M step	ELBO = 32751.5859
5 : E step	M step	ELBO = 32723.7695
Converged in 5 iterations
Time: 1.7241103649139404 s
25 1 0.01 5


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31454.332
2 : E step	M step	ELBO = 33219.1758
3 : E step	M step	ELBO = 32868.1445
4 : E step	M step	ELBO = 32769.5312
5 : E step	M step	ELBO = 32732.6855
6 : E step	M step	ELBO = 32717.0137
Converged in 6 iterations
Time: 2.1839427947998047 s
25 1 0.01 10


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31934.748
2 : E step	M step	ELBO = 33220.5234
3 : E step	M step	ELBO = 32868.0625
4 : E step	M step	ELBO = 32769.5078
5 : E step	M step	ELBO = 32732.6758
6 : E step	M step	ELBO = 32717.0137
Converged in 6 iterations
Time: 2.220677614212036 s
25 1 0.31 0.01


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 2115.20679
2 : E step	M step	ELBO = 4014.7771
3 : E step	M step	ELBO = 5804.37939
4 : E step	M step	ELBO = 7704.33
5 : E step	M step	ELBO = 9358.15332
6 : E step	M step	ELBO = 10433.0254
7 : E step	M step	ELBO = 11518.1133
8 : E step	M step	ELBO = 12557.1787
9 : E step	M step	ELBO = 13520.8975
10 : E step	M step	ELBO = 14375.5566
11 : E step	M step	ELBO = 15373.1211
12 : E step	M step	ELBO = 16110.3418
13 : E step	M step	ELBO = 16617.4492
14 : E step	M step	ELBO = 16997.4688
15 : E step	M step	ELBO = 17418.7012
16 : E step	M step	ELBO = 17709.0703
17 : E step	M step	ELBO = 17945.1973
18 : E step	M step	ELBO = 18127.0664
19 : E step	M step	ELBO = 18275.293
20 : E step	M step	ELBO = 18412.207
21 : E step	M step	ELBO = 18528.4473
22 : E step	M step	ELBO = 18625.8301
23 : E step	M step	ELBO = 18698.2227
24 : E step	M step	ELBO = 18757.7031
25 : E step	M step	ELBO = 18801.1035
26 : E step	M step	ELBO = 18837.9121
27 : E step	M step	ELBO = 18866.1484
28 

  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 18378.8262
2 : E step	M step	ELBO = 28900.1133
3 : E step	M step	ELBO = 31334.1484
4 : E step	M step	ELBO = 32051.1895
5 : E step	M step	ELBO = 32428.5293
6 : E step	M step	ELBO = 32544.0938
7 : E step	M step	ELBO = 32606.6133
8 : E step	M step	ELBO = 32637.6055
Converged in 8 iterations
Time: 2.1915767192840576 s
25 1 0.31 0.61


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 24666.2148
2 : E step	M step	ELBO = 32166.1191
3 : E step	M step	ELBO = 32668.9609
4 : E step	M step	ELBO = 32706.5742
5 : E step	M step	ELBO = 32700.0234
Converged in 5 iterations
Time: 1.7022109031677246 s
25 1 0.31 0.9099999999999999


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 27286.2656
2 : E step	M step	ELBO = 32880.8594
3 : E step	M step	ELBO = 32814.25
4 : E step	M step	ELBO = 32747.8848
5 : E step	M step	ELBO = 32721.6543
Converged in 5 iterations
Time: 1.7636749744415283 s
25 1 0.31 5


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31435.7852
2 : E step	M step	ELBO = 33220.6328
3 : E step	M step	ELBO = 32868.1172
4 : E step	M step	ELBO = 32769.5
5 : E step	M step	ELBO = 32732.6719
6 : E step	M step	ELBO = 32717.0059
Converged in 6 iterations
Time: 2.1256308555603027 s
25 1 0.31 10


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31949.8672
2 : E step	M step	ELBO = 33218.4648
3 : E step	M step	ELBO = 32867.9844
4 : E step	M step	ELBO = 32769.4922
5 : E step	M step	ELBO = 32732.6758
6 : E step	M step	ELBO = 32717.0117
Converged in 6 iterations
Time: 2.2865090370178223 s
25 1 0.61 0.01


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 2344.88354
2 : E step	M step	ELBO = 4486.85742
3 : E step	M step	ELBO = 6026.56738
4 : E step	M step	ELBO = 7761.18652
5 : E step	M step	ELBO = 9390.71777
6 : E step	M step	ELBO = 10797.7686
7 : E step	M step	ELBO = 12120.4746
8 : E step	M step	ELBO = 12948.9102
9 : E step	M step	ELBO = 13825.7324
10 : E step	M step	ELBO = 14653.3418
11 : E step	M step	ELBO = 15461.2715
12 : E step	M step	ELBO = 16142.3252
13 : E step	M step	ELBO = 16583.9844
14 : E step	M step	ELBO = 17188.0156
15 : E step	M step	ELBO = 17678.3027
16 : E step	M step	ELBO = 17966.748
17 : E step	M step	ELBO = 18162.2578
18 : E step	M step	ELBO = 18302.2598
19 : E step	M step	ELBO = 18449.2598
20 : E step	M step	ELBO = 18547.3887
21 : E step	M step	ELBO = 18651.1133
22 : E step	M step	ELBO = 18745.9531
23 : E step	M step	ELBO = 18821.4727
24 : E step	M step	ELBO = 18882.0098
25 : E step	M step	ELBO = 18938.123
26 : E step	M step	ELBO = 18985.0957
27 : E step	M step	ELBO = 19023.4551

  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 18504.5625
2 : E step	M step	ELBO = 28929.2891
3 : E step	M step	ELBO = 31632.7461
4 : E step	M step	ELBO = 32288.2109
5 : E step	M step	ELBO = 32451.7734
6 : E step	M step	ELBO = 32542.4277
7 : E step	M step	ELBO = 32591.2773
8 : E step	M step	ELBO = 32629.3691
9 : E step	M step	ELBO = 32651.8652
Converged in 9 iterations
Time: 2.0258266925811768 s
25 1 0.61 0.61


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 24468.127
2 : E step	M step	ELBO = 32187.4102
3 : E step	M step	ELBO = 32672.8086
4 : E step	M step	ELBO = 32709.1172
5 : E step	M step	ELBO = 32702.2539
Converged in 5 iterations
Time: 1.6338255405426025 s
25 1 0.61 0.9099999999999999


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 26746.873
2 : E step	M step	ELBO = 32821.043
3 : E step	M step	ELBO = 32806.5391
Converged in 3 iterations
Time: 1.5705275535583496 s
25 1 0.61 5


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31537.748
2 : E step	M step	ELBO = 33214.6602
3 : E step	M step	ELBO = 32867.4805
4 : E step	M step	ELBO = 32769.3242
5 : E step	M step	ELBO = 32732.6035
6 : E step	M step	ELBO = 32716.9766
Converged in 6 iterations
Time: 2.276832342147827 s
25 1 0.61 10


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 32015.6797
2 : E step	M step	ELBO = 33216.1914
3 : E step	M step	ELBO = 32867.5938
4 : E step	M step	ELBO = 32769.3711
5 : E step	M step	ELBO = 32732.627
6 : E step	M step	ELBO = 32716.9902
Converged in 6 iterations
Time: 2.245131731033325 s
25 1 0.9099999999999999 0.01


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 1815.06714
2 : E step	M step	ELBO = 3674.26147
3 : E step	M step	ELBO = 5363.64795
4 : E step	M step	ELBO = 7106.43506
5 : E step	M step	ELBO = 8568.97949
6 : E step	M step	ELBO = 10257.5869
7 : E step	M step	ELBO = 11556.416
8 : E step	M step	ELBO = 12615.5332
9 : E step	M step	ELBO = 13589.9297
10 : E step	M step	ELBO = 14601.6602
11 : E step	M step	ELBO = 15543.0586
12 : E step	M step	ELBO = 16253.9072
13 : E step	M step	ELBO = 16776.0605
14 : E step	M step	ELBO = 17220.9395
15 : E step	M step	ELBO = 17691.7695
16 : E step	M step	ELBO = 17979.3398
17 : E step	M step	ELBO = 18242.9883
18 : E step	M step	ELBO = 18423.4238
19 : E step	M step	ELBO = 18543.5938
20 : E step	M step	ELBO = 18632.2305
21 : E step	M step	ELBO = 18716.748
22 : E step	M step	ELBO = 18805.0293
23 : E step	M step	ELBO = 18884.7754
24 : E step	M step	ELBO = 18940.5195
25 : E step	M step	ELBO = 18980.6641
26 : E step	M step	ELBO = 19023.041
27 : E step	M step	ELBO = 19062.8145


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 19202.9844
2 : E step	M step	ELBO = 28932.0449
3 : E step	M step	ELBO = 31337.127
4 : E step	M step	ELBO = 32158.5957
5 : E step	M step	ELBO = 32395.5078
6 : E step	M step	ELBO = 32498.3789
7 : E step	M step	ELBO = 32569.8984
8 : E step	M step	ELBO = 32616.0684
9 : E step	M step	ELBO = 32644.8184
Converged in 9 iterations
Time: 2.106872320175171 s
25 1 0.9099999999999999 0.61


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 24391.541
2 : E step	M step	ELBO = 32280.6289
3 : E step	M step	ELBO = 32679.8887
4 : E step	M step	ELBO = 32694.0664
Converged in 4 iterations
Time: 1.9586355686187744 s
25 1 0.9099999999999999 0.9099999999999999


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 26902.3398
2 : E step	M step	ELBO = 32817.9531
3 : E step	M step	ELBO = 32809.9609
Converged in 3 iterations
Time: 1.4474713802337646 s
25 1 0.9099999999999999 5


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31573.1426
2 : E step	M step	ELBO = 33210.9492
3 : E step	M step	ELBO = 32866.9609
4 : E step	M step	ELBO = 32769.2383
5 : E step	M step	ELBO = 32732.5879
6 : E step	M step	ELBO = 32716.9766
Converged in 6 iterations
Time: 2.1465892791748047 s
25 1 0.9099999999999999 10


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31894.3457
2 : E step	M step	ELBO = 33222.2031
3 : E step	M step	ELBO = 32868.4766
4 : E step	M step	ELBO = 32769.625
5 : E step	M step	ELBO = 32732.7227
6 : E step	M step	ELBO = 32717.0332
Converged in 6 iterations
Time: 2.304630756378174 s
25 1 5 0.01


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 2315.9375
2 : E step	M step	ELBO = 4501.88037
3 : E step	M step	ELBO = 6462.10254
4 : E step	M step	ELBO = 8230.98
5 : E step	M step	ELBO = 9534.39258
6 : E step	M step	ELBO = 10773.374
7 : E step	M step	ELBO = 11569.1631
8 : E step	M step	ELBO = 12657.1416
9 : E step	M step	ELBO = 13377.0703
10 : E step	M step	ELBO = 14178.2295
11 : E step	M step	ELBO = 15070.293
12 : E step	M step	ELBO = 15711.8594
13 : E step	M step	ELBO = 16277.2988
14 : E step	M step	ELBO = 16719.7324
15 : E step	M step	ELBO = 17054.6016
16 : E step	M step	ELBO = 17355.6074
17 : E step	M step	ELBO = 17568.3066
18 : E step	M step	ELBO = 17749.5527
19 : E step	M step	ELBO = 17903.5703
20 : E step	M step	ELBO = 18027.6133
21 : E step	M step	ELBO = 18128.2637
22 : E step	M step	ELBO = 18207.832
23 : E step	M step	ELBO = 18295.5664
24 : E step	M step	ELBO = 18364.5273
25 : E step	M step	ELBO = 18422.1387
26 : E step	M step	ELBO = 18481.502
27 : E step	M step	ELBO = 18524.1113
28 : 

  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 19331.1602
2 : E step	M step	ELBO = 29450.2012
3 : E step	M step	ELBO = 31590.5254
4 : E step	M step	ELBO = 32204.7246
5 : E step	M step	ELBO = 32472.6562
6 : E step	M step	ELBO = 32561.8809
7 : E step	M step	ELBO = 32608.6504
8 : E step	M step	ELBO = 32636.1504
Converged in 8 iterations
Time: 1.7989225387573242 s
25 1 5 0.61


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 25071.8379
2 : E step	M step	ELBO = 32098.123
3 : E step	M step	ELBO = 32642.623
4 : E step	M step	ELBO = 32686.0254
5 : E step	M step	ELBO = 32697.4863
Converged in 5 iterations
Time: 1.5744950771331787 s
25 1 5 0.9099999999999999


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 26857.1641
2 : E step	M step	ELBO = 32796.9844
3 : E step	M step	ELBO = 32804.6445
Converged in 3 iterations
Time: 1.4096856117248535 s
25 1 5 5


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31475.4
2 : E step	M step	ELBO = 33217.8828
3 : E step	M step	ELBO = 32867.8906
4 : E step	M step	ELBO = 32769.457
5 : E step	M step	ELBO = 32732.6641
6 : E step	M step	ELBO = 32717.0059
Converged in 6 iterations
Time: 1.9693005084991455 s
25 1 5 10


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31890.4727
2 : E step	M step	ELBO = 33221.7617
3 : E step	M step	ELBO = 32868.3516
4 : E step	M step	ELBO = 32769.5938
5 : E step	M step	ELBO = 32732.7129
6 : E step	M step	ELBO = 32717.0273
Converged in 6 iterations
Time: 2.044567823410034 s
25 1 10 0.01


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 1588.24768
2 : E step	M step	ELBO = 3762.00269
3 : E step	M step	ELBO = 5687.99561
4 : E step	M step	ELBO = 7625.22705
5 : E step	M step	ELBO = 8916.12207
6 : E step	M step	ELBO = 10299.4521
7 : E step	M step	ELBO = 11417.8926
8 : E step	M step	ELBO = 12565.3047
9 : E step	M step	ELBO = 14323.4814
10 : E step	M step	ELBO = 15267.8125
11 : E step	M step	ELBO = 15986.0469
12 : E step	M step	ELBO = 16603.6211
13 : E step	M step	ELBO = 17237.3184
14 : E step	M step	ELBO = 17693.6914
15 : E step	M step	ELBO = 18058.0625
16 : E step	M step	ELBO = 18317.6602
17 : E step	M step	ELBO = 18551.4316
18 : E step	M step	ELBO = 18743.8887
19 : E step	M step	ELBO = 18864.9141
20 : E step	M step	ELBO = 18992.5742
21 : E step	M step	ELBO = 19092.9414
22 : E step	M step	ELBO = 19183.002
23 : E step	M step	ELBO = 19257.582
24 : E step	M step	ELBO = 19317.2402
25 : E step	M step	ELBO = 19375.1719
26 : E step	M step	ELBO = 19424.8633
27 : E step	M step	ELBO = 19465.1562

  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 20108.0918
2 : E step	M step	ELBO = 29879.1094
3 : E step	M step	ELBO = 31557.8691
4 : E step	M step	ELBO = 32137.0098
5 : E step	M step	ELBO = 32408.0371
6 : E step	M step	ELBO = 32527.0469
7 : E step	M step	ELBO = 32581.9492
8 : E step	M step	ELBO = 32624.2598
9 : E step	M step	ELBO = 32650.373
Converged in 9 iterations
Time: 1.9357264041900635 s
25 1 10 0.61


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 25018.9746
2 : E step	M step	ELBO = 32158.6973
3 : E step	M step	ELBO = 32628.7637
4 : E step	M step	ELBO = 32693.5371
5 : E step	M step	ELBO = 32696.9336
Converged in 5 iterations
Time: 2.1892507076263428 s
25 1 10 0.9099999999999999


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 26620.5215
2 : E step	M step	ELBO = 32851.0078
3 : E step	M step	ELBO = 32805.3711
4 : E step	M step	ELBO = 32749.0625
5 : E step	M step	ELBO = 32724.502
Converged in 5 iterations
Time: 1.6629748344421387 s
25 1 10 5


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31584.4648
2 : E step	M step	ELBO = 33213.293
3 : E step	M step	ELBO = 32866.8594
4 : E step	M step	ELBO = 32769.0859
5 : E step	M step	ELBO = 32732.502
6 : E step	M step	ELBO = 32716.9297
Converged in 6 iterations
Time: 1.997713327407837 s
25 1 10 10


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = 31994.0176
2 : E step	M step	ELBO = 33217.3672
3 : E step	M step	ELBO = 32867.7578
4 : E step	M step	ELBO = 32769.4531
5 : E step	M step	ELBO = 32732.666
6 : E step	M step	ELBO = 32717.0078
Converged in 6 iterations
Time: 2.4234671592712402 s
25 4 0.01 0.01


  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = -7407.19238
2 : E step	M step	ELBO = -13131.4414
3 : E step	M step	ELBO = -16784.1934
4 : E step	M step	ELBO = -20101.418
5 : E step	M step	ELBO = -22259.7129
6 : E step	M step	ELBO = -24007.6016
7 : E step	M step	ELBO = -25318.0879
8 : E step	M step	ELBO = -26879.4199
9 : E step	M step	ELBO = -28083.0137
10 : E step	M step	ELBO = -28943.9668
11 : E step	M step	ELBO = -29519.1387
12 : E step	M step	ELBO = -30109.6699
13 : E step	M step	ELBO = -30822.0312
14 : E step	M step	ELBO = -31233.7969
15 : E step	M step	ELBO = -31567.4102
16 : E step	M step	ELBO = -31891.2852
17 : E step	M step	ELBO = -32142.5781
18 : E step	M step	ELBO = -32308.791
19 : E step	M step	ELBO = -32467.6777
20 : E step	M step	ELBO = -32629.2871
21 : E step	M step	ELBO = -32736.0527
22 : E step	M step	ELBO = -32822.2734
23 : E step	M step	ELBO = -32904.3594
24 : E step	M step	ELBO = -32983.2656
25 : E step	M step	ELBO = -33042.0391
26 : E step	M step	ELBO = -33085.8164
27 : E ste

  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	ELBO = -43157.7734
2 : E step	M step	ELBO = -45859.0117
3 : E step	M step	ELBO = -43997.0898
4 : E step	M step	ELBO = -41315.8633
5 : E step	M step	ELBO = -37775.875
6 : E step	M step	ELBO = -33570.3555
7 : E step	M step	ELBO = -28916.6387
8 : E step	M step	ELBO = -23927.5273
9 : E step	M step	ELBO = -18956.9785
10 : E step	M step	ELBO = -13753.8281
11 : E step	M step	ELBO = -8461.61719
12 : E step	M step	ELBO = -3447.50635
13 : E step	M step	ELBO = 1081.01208
14 : E step	M step	ELBO = 4816.12
15 : E step	M step	ELBO = 8283.4
16 : E step	M step	ELBO = 11779.9033
17 : E step	M step	ELBO = 14969.0957
18 : E step	M step	ELBO = 17383.2656
19 : E step	M step	ELBO = 19384.8438
20 : E step	M step	ELBO = 21124.332
21 : E step	M step	ELBO = 22495.8027
22 : E step	M step	ELBO = 23729.1152
23 : E step	M step	ELBO = 24684.7578
24 : E step	M step	ELBO = 25558.918
25 : E step	M step	ELBO = 26305.2793
26 : E step	M step	ELBO = 26855.7207
27 : E step	M step	ELBO = 27372.

  0%|          | 0/25 [00:00<?, ?it/s]

Preparing...
1 : E step	M step	

In [None]:
results

In [None]:
results.to_csv("results/resultset_1.csv", index=False)

In [None]:
# MAX_IT = 10
# EPS = 0.001
# tf.executing_eagerly()
# optim = tf.keras.optimizers.Adam(1e-3)
# # B = ed.Dirichlet(concentration=tf.fill([K, V], 0.1), name="topics")
# # Z = ed.DirichletMultinomial(tf.convert_to_tensor(Ns), concentration=tf.fill([D, K], 0.1))
# alpha = np.copy(alpha_n).astype(np.float32)
# eta = np.copy(eta_n).astype(np.float32)

# beta = np.copy(beta_n).astype(np.float32)
# phi = [np.full((n, K), 1/K).astype(np.float32) for n in Ns]
# gamma = np.copy(gamma_n).astype(np.float32)
# lmbd = np.copy(lmbd_n).astype(np.float32)

# bb = None
# gg = None
# ww = None

# class Positive(tf.keras.constraints.Constraint):
#     def __call__(self, w):
#         return w * tf.cast(tf.math.greater(w, 0.), w.dtype)

# bb = []
# for it in trange(MAX_IT):
#     bb2 = []
#     gg2 = []
#     ww2 = []
#     print('before', gamma)
#     for d in range(D):
#         for n in range(Ns[d]):
#             for i in range(K):
#                 phi[d][n, i] = beta[i, data_enc.iloc[d][n]] * np.exp(digamma(gamma[d, i]) - digamma(np.sum(gamma[d])))
#         phi[d] /= np.sum(phi[d], axis=-1, keepdims=True) + 1e-5
        
#         for i in range(K):
#             gamma[d, i] = alpha[i] + np.sum(phi[d][:, i])
#     print('after', gamma)

#     lmbd = np.full((K, V), eta)
#     for i in range(K):
#         for j in range(V):
#             for d in range(D):
#                 mask = (data_enc.iloc[d] == j)
#                 lmbd[i, j] += np.sum(phi[d][:, i]*mask)
    
     
# #     if bb is None:
# #         bb = bb2
# #         gg = gg2
# #         ww = ww2
# #         break
                
#     alpha_t = tf.Variable(alpha, trainable=True, constraint=Positive())
#     gamma_t = tf.convert_to_tensor(gamma, dtype=tf.float32)
    
    
#     def f_x():
#         g_term = tf.math.reduce_sum(tf.expand_dims((alpha_t - 1), 0)*(tf.math.digamma(gamma_t) - 
#                                                    tf.math.digamma(tf.math.reduce_sum(gamma_t, axis=1, keepdims=True))), axis=1)
#         loss = -tf.math.reduce_sum(tf.math.lgamma(tf.math.reduce_sum(alpha_t)) - tf.math.reduce_sum(tf.math.lgamma(alpha_t)) + g_term)
#         return loss
    
#     for itt in range(10):
#         for i in range(K):
#             for itt1 in range(50):
#                 #with tf.GradientTape() as tape:
#                 optim.minimize(f_x, [alpha_t])
# #                 grads = tape.gradient(loss, opt_a)
# #                 optim.apply_gradients([(grads, opt_a)])
#                 alpha[i] = alpha_t.numpy()[i]
#                 np.nan_to_num(alpha, copy=False, nan=1e-5)
#                 alpha_t.assign(alpha)
#         print(alpha_t)
#     beta = (lmbd - eta) / (np.sum(lmbd - eta, axis=-1, keepdims=True) + 1e-5)
#     break
    
#     eta_t = tf.Variable(eta, trainable=True, constraint=Positive())
    
#     @tf.function
#     def f_eta():
#         loss = K*((eta_t-1)*(tf.math.digamma(eta_t) - tf.math.digamma(eta_t*V)) + tf.math.lgamma(eta_t*V) - V*tf.math.lgamma(eta))
#         return loss
    
#     for itt1 in range(50):
#         optim.minimize(f_eta, [eta_t])
#     eta = eta_t.numpy()

    