# Model fitting and experiment listing

This notebook shows how we can train models and list previously trained ones (along with useful information about them).

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os

import numpy as np

from neural_clustering.model import dpmm, gmm, util
from neural_clustering.criticize import summarize_experiments
from neural_clustering import config

  return f(*args, **kwds)


In [4]:
import logging
logging.basicConfig(level=logging.INFO)

In [5]:
cfg = config.load('../config.yaml')

# Model training

## Load training data (generated in notebook 2)

In [6]:
x_train = np.load(os.path.join(cfg['root'], 'training.npy'))
print(f'x_train shape: {x_train.shape}')

x_train shape: (1876, 21)


## ...or generate toy dataset

In [10]:
# x_train = util.build_toy_dataset(500)
# print(f'x_train shape: {x_train.shape}')

## (truncated) Dirichlet Process Mixture Model

$$p(x_n | \pi, \mu, \sigma) = \sum_{k=1}^{K} \pi_k \mathrm{Normal}(x_n |\; \mu_k, \sigma_k)$$

$$ \beta_k \sim \mathrm{Beta}(1,\alpha) $$

$$ \pi_i = \beta_i \prod_{j=1}^{i-1}(1-\beta_j) $$

$$\mu_k \sim \mathrm{Normal} (\mu_k |\; \mathbf{0}, \mathbf{I}) $$

$$\sigma_k^2 \sim \mathrm{Gamma}(\sigma^2_k |\; a, b) $$

In [14]:
# small number of iterations for testing
dpmm.fit(x_train, truncation_level=5, cfg=cfg,
         inference_params=dict(n_iter=500))

500/500 [100%] ██████████████████████████████ Elapsed: 6s | Loss: 512528.469


INFO:neural_clustering.model.dpmm:Session saved in /Users/Edu/data/neural-clustering/sessions/30-Nov-2017@15-18-08-DPMM/session.ckpt
INFO:neural_clustering.model.dpmm:Training data saved in /Users/Edu/data/neural-clustering/sessions/30-Nov-2017@15-18-08-DPMM/training.npy
INFO:neural_clustering.model.dpmm:Params saved in /Users/Edu/data/neural-clustering/sessions/30-Nov-2017@15-18-08-DPMM/params.yaml


## Gaussian Mixture Model

$$p(x_n | \pi, \mu, \sigma) = \sum_{k=1}^{K} \pi_k \mathrm{Normal}(x_n |\; \mu_k, \sigma_k)$$

$$\pi \sim \mathrm{Dirichlet}(\pi, \alpha \mathbf{1}_K) $$

$$\mu_k \sim \mathrm{Normal} (\mu_k |\; \mathbf{0}, \mathbf{I}) $$

$$\sigma_k^2 \sim \mathrm{Gamma}(\sigma^2_k |\; a, b) $$

In [15]:
# small number of iterations for testing
gmm.fit(x_train, k=2, cfg=cfg, samples=10)

INFO:neural_clustering.model.gmm:Session saved in /Users/Edu/data/neural-clustering/sessions/30-Nov-2017@15-18-31-GMM/session.ckpt
INFO:neural_clustering.model.gmm:Training data saved in /Users/Edu/data/neural-clustering/sessions/30-Nov-2017@15-18-31-GMM/training.npy
INFO:neural_clustering.model.gmm:Params saved in /Users/Edu/data/neural-clustering/sessions/30-Nov-2017@15-18-31-GMM/params.yaml


# List experiments

List all previously trained models along with some useful information.

In [16]:
summarize_experiments(cfg)

inference_params,name,model_type,git_hash,inference_algoritm,truncation_level,timestamp,samples,k
{'n_iter': 500},30-Nov-2017@15-11-16-DPMM,DPMM,d7ee49a adds script for checking installation,KLqp,4.0,2017-11-30T15:11:16.899501,,
{'n_iter': 500},30-Nov-2017@15-11-49-DPMM,DPMM,d7ee49a adds script for checking installation,KLqp,5.0,2017-11-30T15:11:49.588493,,
,30-Nov-2017@15-18-31-GMM,GMM,d7ee49a adds script for checking installation,Gibbs,,2017-11-30T15:18:31.565896,10.0,2.0
{'n_iter': 500},30-Nov-2017@15-18-08-DPMM,DPMM,d7ee49a adds script for checking installation,KLqp,5.0,2017-11-30T15:18:08.805416,,
{'n_iter': 500},30-Nov-2017@14-59-38-DPMM,DPMM,d7ee49a adds script for checking installation,KLqp,2.0,2017-11-30T14:59:38.406453,,
,30-Nov-2017@14-59-55-GMM,GMM,d7ee49a adds script for checking installation,Gibbs,,2017-11-30T14:59:55.720688,1000.0,2.0
{'n_iter': 500},30-Nov-2017@15-12-12-DPMM,DPMM,d7ee49a adds script for checking installation,KLqp,6.0,2017-11-30T15:12:12.058732,,
{'n_iter': 500},30-Nov-2017@15-12-28-DPMM,DPMM,d7ee49a adds script for checking installation,KLqp,10.0,2017-11-30T15:12:28.923101,,
