# Model fitting and experiment listing

This notebook shows how we can train models and list previously trained ones (along with useful information about them).

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

import numpy as np

from neural_clustering.model import dpmm, gmm, util
from neural_clustering.criticize import summarize_experiments
from neural_clustering import config

  return f(*args, **kwds)


In [3]:
import logging
logging.basicConfig(level=logging.INFO)

In [4]:
cfg = config.load('../server_config.yaml')

# Model training

## Load training data (generated in notebook 2)

In [5]:
x_train = np.load(os.path.join(cfg['root'], 'training.npy'))
print(f'x_train shape: {x_train.shape}')

x_train shape: (1876, 21)


## ...or generate toy dataset

In [11]:
x_train = util.build_toy_dataset(500)
print(f'x_train shape: {x_train.shape}')

INFO:neural_clustering.model.util:Generated data from two bi-variate normals std=1 (diagonal), mu1 = [5, 5], mu2=[-5, -5]


x_train shape: (500, 2)


## (truncated) Dirichlet Process Mixture Model

$$p(x_n | \pi, \mu, \sigma) = \sum_{k=1}^{K} \pi_k \mathrm{Normal}(x_n |\; \mu_k, \sigma_k)$$

$$ \beta_k \sim \mathrm{Beta}(1,\alpha) $$

$$ \pi_i = \beta_i \prod_{j=1}^{i-1}(1-\beta_j) $$

$$\mu_k \sim \mathrm{Normal} (\mu_k |\; \mathbf{0}, \mathbf{I}) $$

$$\sigma_k^2 \sim \mathrm{Gamma}(\sigma^2_k |\; a, b) $$

In [9]:
dpmm.fit(x_train, truncation_level=2, cfg=cfg,
         inference_params=dict(n_iter=500))

1000/1000 [100%] ██████████████████████████████ Elapsed: 3s | Loss: 16050.037


INFO:neural_clustering.model.dpmm:Session saved in /Users/Edu/data/neural-clustering-toy/sessions/28-Nov-2017@23-22-39-DPMM/session.ckpt
INFO:neural_clustering.model.dpmm:Training data saved in /Users/Edu/data/neural-clustering-toy/sessions/28-Nov-2017@23-22-39-DPMM/training.npy
INFO:neural_clustering.model.dpmm:Params saved in /Users/Edu/data/neural-clustering-toy/sessions/28-Nov-2017@23-22-39-DPMM/params.yaml


## Gaussian Mixture Model

$$p(x_n | \pi, \mu, \sigma) = \sum_{k=1}^{K} \pi_k \mathrm{Normal}(x_n |\; \mu_k, \sigma_k)$$

$$\pi \sim \mathrm{Dirichlet}(\pi, \alpha \mathbf{1}_K) $$

$$\mu_k \sim \mathrm{Normal} (\mu_k |\; \mathbf{0}, \mathbf{I}) $$

$$\sigma_k^2 \sim \mathrm{Gamma}(\sigma^2_k |\; a, b) $$

In [6]:
gmm.fit(x_train, k=2, cfg=cfg, samples=1000)

1000/1000 [100%] ██████████████████████████████ Elapsed: 9s | Acceptance Rate: 1.000


INFO:neural_clustering.model.gmm:Session saved in /Users/Edu/data/neural-clustering-toy/sessions/28-Nov-2017@17-32-31-GMM/session.ckpt
INFO:neural_clustering.model.gmm:Training data saved in /Users/Edu/data/neural-clustering-toy/sessions/28-Nov-2017@17-32-31-GMM/training.npy
INFO:neural_clustering.model.gmm:Params saved in /Users/Edu/data/neural-clustering-toy/sessions/28-Nov-2017@17-32-31-GMM/params.yaml


# List experiments

List all previously trained models along with some useful information.

In [6]:
summarize_experiments(cfg)

inference_algoritm,inference_params,k,name,timestamp,git_hash,model_type,samples
Gibbs,,20,29-Nov-2017@12-02-58-GMM,2017-11-29T12:02:58.317289,ec6c239 automatically create tmp dir,GMM,10000
