# Maximum Likelihood Estimation with Normal Distribution

## Import modules

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
import time
import glob

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from IPython import display

import tensorflow as tf
tf.enable_eager_execution()

os.environ["CUDA_VISIBLE_DEVICES"]="0"

## Setting hyperparameters

In [None]:
# Training Flags (hyperparameter configuration)
batch_size = 128
learning_rate = 1e-1

## Make a toy dataset (normal distribution)

In [None]:
true_mu = -3.0
true_std = 3.5
N = 10000
train_data = np.random.normal(loc=true_mu, scale=true_std, size=N)
train_data = train_data.astype(np.float32)
train_data = np.expand_dims(train_data, axis=1)

## Set up dataset with `tf.data`

### create input pipeline with `tf.data.Dataset`

In [None]:
# for train
N = len(train_data)
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
train_dataset = train_dataset.shuffle(buffer_size=N)
train_dataset = train_dataset.batch(batch_size=batch_size, drop_remainder=True)
print(train_dataset)

## Create the parameters to learn

**Normal distribution**

$$ \mathcal{N}(\mu, \sigma) = \frac{1}{\sqrt{2\pi \sigma^{2}}} \exp \left( {-\frac{(x-\mu)^{2}}{2\sigma^{2}}} \right) $$

**Log normal distribution**

$$ \log \mathcal{N}(\mu, \sigma) = -\frac{1}{2} \log(2 \pi \sigma^{2}) + \left[ -\frac{(x-\mu)^{2}}{2\sigma^{2}} \right] $$

$$ = -\frac{1}{2} \left[ \log(2 \pi) + \log(\sigma^{2}) + \frac{(x-\mu)^{2}}{\sigma^{2}} \right] $$

$$ = -\frac{1}{2} \left[ \log(2 \pi) + \log(\sigma^{2}) + (x-\mu)^{2} \exp(\log(-\sigma^{2})) \right] $$

**Variables**

* `mu`: $\mu$
* `logvar`: $\log (\sigma^{2})$

In [None]:
mu = tf.Variable(0.0)
logvar = tf.Variable(1.0)

In [None]:
def log_normal_pdf(sample, mean, logvar, raxis=1):
  log2pi = tf.log(2. * np.pi)
  return -.5 * tf.reduce_sum((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi, axis=raxis)

## Define the loss functions and the optimizer

In [None]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate)

## Training

In [None]:
print('Start Training.')
global_step = tf.train.get_or_create_global_step()
num_batches_per_epoch = int(N / batch_size)
loss_history = []

for epoch in range(10):
  for step, images in enumerate(train_dataset):
    
    with tf.GradientTape() as tape:
      log_likelihood = -tf.reduce_mean(log_normal_pdf(images, mu, logvar))
      loss_history.append(log_likelihood)
      
    gradients = tape.gradient(log_likelihood, [mu, logvar])
    optimizer.apply_gradients(zip(gradients, [mu, logvar]), global_step=global_step)
    
    epochs = epoch + step / float(num_batches_per_epoch)
    if global_step.numpy() % 50 == 0:
      #display.clear_output(wait=True)
      print("Epochs: {:.2f} global_step: {} loss: {:.3g}  mu: {:.3g}  std: {:3g}".format(
                epochs, global_step.numpy(), log_likelihood.numpy(), mu.numpy(), np.sqrt(np.exp(logvar.numpy()))))
      
print('Training Done.')

## Print the results

In [None]:
print("Results")
print("mean: {:.3g}".format(mu.numpy()))
print("standard deviation: {:.3g}".format(np.sqrt(np.exp(logvar.numpy()))))
print("true mean: {:.3g}".format(true_mu))
print("true standard deviation: {:.3g}".format(true_std))