In [1]:
import matplotlib.pyplot as plt
import time
import torch
from torch import logsumexp
from torch.distributions.normal import Normal
import numpy as np
from utils import *
from plots import *
from objectives import *

In [2]:
## training parameters
iterations = 10000
num_samples = 20
LEARNING_RATE = 5*1e-3
## model parameters
p_mu = 0.0
p_sigma2 = 1.0
log_Z = np.log(np.sqrt((2*np.pi)))

ests = ['IWAE', 'IWAE-DReG', 'RWS', 'RWS-DReG', 'STL']
PATH = 'IS-'

In [3]:
ELBOs = []
ESSs = []
SNRs = []

for est in ests:
    q_mu = torch.tensor([6.0], requires_grad=True)
    q_sigma = torch.tensor([2.0], requires_grad=True) 
    optimizer = torch.optim.SGD([q_mu, q_sigma], lr=LEARNING_RATE)
    print('======= start training by %s ========\n' % est)
    if est == 'IWAE':
        ELBO, Mu, Sigma, Grad_mu, Grad_sigma, ESS = vi(num_samples, q_mu, q_sigma, p_mu, p_sigma2, iterations, optimizer)
        ELBOsmo, _ = exp_average(ELBO, ELBO, iterations, beta1=0.9, beta2=0.999)
    elif est == 'IWAE-DReG':
        EUBO, ELBO, Mu, Sigma, Grad_mu, Grad_sigma, ESS = dreg(num_samples, q_mu, q_sigma, p_mu, p_sigma2, iterations, optimizer, alpha=0)
        EUBOsmo, ELBOsmo = exp_average(EUBO, ELBO, iterations, beta1=0.9, beta2=0.999)
    elif est == 'RWS':
        EUBO, ELBO, Mu, Sigma, Grad_mu, Grad_sigma, ESS = rws(num_samples, q_mu, q_sigma, p_mu, p_sigma2, iterations, optimizer)
        EUBOsmo, ELBOsmo = exp_average(EUBO, ELBO, iterations, beta1=0.9, beta2=0.999)

    elif est == 'RWS-DReG':
        EUBO, ELBO, Mu, Sigma, Grad_mu, Grad_sigma, ESS = dreg(num_samples, q_mu, q_sigma, p_mu, p_sigma2, iterations, optimizer, alpha=1)
        EUBOsmo, ELBOsmo = exp_average(EUBO, ELBO, iterations, beta1=0.9, beta2=0.999)  
    else:
        EUBO, ELBO, Mu, Sigma, Grad_mu, Grad_sigma, ESS = dreg(num_samples, q_mu, q_sigma, p_mu, p_sigma2, iterations, optimizer, alpha=0.5)
        EUBOsmo, ELBOsmo = exp_average(EUBO, ELBO, iterations, beta1=0.9, beta2=0.999)
        
    eg_mu, eg2_mu, var_mu, mu_snr = SNR(np.array(Grad_mu), iterations, beta1=0.99, beta2=0.99)    
    eg_sigma, eg2_sigma, var_sigma, sigma_snr = SNR(np.array(Grad_sigma), iterations, beta1=0.99, beta2=0.99)    
    
    ELBOs.append(ELBOsmo)
    ESSs.append(ESS)
    SNRs.append((mu_snr + sigma_snr) / 2)
    print('======= end training by %s ========\n' % est)


iteration:0, ELBO:3.508, ESS:1.357 (0s)
iteration:1000, ELBO:0.998, ESS:4.150 (0s)
iteration:2000, ELBO:1.020, ESS:4.099 (1s)
iteration:3000, ELBO:1.040, ESS:4.511 (1s)
iteration:4000, ELBO:1.192, ESS:5.593 (1s)
iteration:5000, ELBO:1.090, ESS:6.247 (1s)
iteration:6000, ELBO:1.020, ESS:5.832 (0s)
iteration:7000, ELBO:1.027, ESS:5.582 (0s)
iteration:8000, ELBO:1.093, ESS:6.227 (1s)
iteration:9000, ELBO:0.456, ESS:5.021 (1s)


iteration:0, EUBO:-0.790, ELBO:-3.689, ESS:1.035 (0s)
iteration:1000, EUBO:2.485, ELBO:1.045, ESS:4.047 (0s)
iteration:2000, EUBO:2.431, ELBO:0.951, ESS:4.036 (0s)
iteration:3000, EUBO:2.466, ELBO:0.904, ESS:3.734 (0s)
iteration:4000, EUBO:2.520, ELBO:0.322, ESS:2.084 (0s)
iteration:5000, EUBO:1.831, ELBO:0.898, ESS:6.204 (0s)
iteration:6000, EUBO:2.208, ELBO:0.832, ESS:4.495 (0s)
iteration:7000, EUBO:1.972, ELBO:0.575, ESS:4.141 (0s)
iteration:8000, EUBO:2.139, ELBO:0.790, ESS:4.699 (0s)
iteration:9000, EUBO:2.287, ELBO:1.628, ESS:9.711 (0s)


iteration:0, EUBO:-

In [None]:
def plot_results_multiple(ELBOs, ESSs, SNRs, num_samples, ests):
    colors = ['blue', 'red', 'orange', 'black', 'green']
    fig = plt.figure(figsize=(40,20))
    plt.tight_layout()
    axes = fig.subplots(3, 3, sharex=True)
    axes[1,0].set_yscale('log')
    axes[1,1].set_yscale('log')
    axes[1,2].set_yscale('log')
    for i, est in enumerate(ests):
        if est == 'IWAE' or est == 'IWAE-DReG':
            axes[0,0].plot(ELBOs[i], c=colors[i], label="ELBO " + est)
            axes[1,0].plot(SNRs[i], c=colors[i], label='SNR ' + est)
            ess_ratio = np.array(ESSs[i]) / num_samples
            ave_ess = np.reshape(ess_ratio, (-1, 10)).mean(-1)
            N = ave_ess.shape[0]
            axes[2,0].plot(np.arange(N) * 10, ave_ess, '-o', c=colors[i], label='ESS ' + est)
        elif est == 'RWS' or est == 'RWS-DReG':
            axes[0,1].plot(ELBOs[i], c=colors[i], label="ELBO " + est)
            axes[1,1].plot(SNRs[i], c=colors[i], label='SNR ' + est)
            ess_ratio = np.array(ESSs[i]) / num_samples
            ave_ess = np.reshape(ess_ratio, (-1, 10)).mean(-1)
            N = ave_ess.shape[0]
            axes[2,1].plot(np.arange(N) * 10, ave_ess, '-o', c=colors[i], label='ESS ' + est)
        else:   
            axes[0,2].plot(ELBOs[i], c=colors[i], label="ELBO " + est)
            axes[1,2].plot(SNRs[i], c=colors[i], label='SNR ' + est)
            ess_ratio = np.array(ESSs[i]) / num_samples
            ave_ess = np.reshape(ess_ratio, (-1, 10)).mean(-1)
            N = ave_ess.shape[0]
            axes[2,2].plot(np.arange(N) * 10, ave_ess, '-o', c=colors[i], label='ESS ' + est)
    axes[0,0].set_ylim([-10,2])    q_mu = torch.tensor([6.0], requires_grad=True)
    q_sigma = torch.tensor([2.0], requires_grad=True) 
    optimizer = torch.optim.SGD([q_mu, q_sigma], lr=LEARNING_RATE)
    print('
    axes[0,1].set_ylim([-10,2]) 
    axes[0,2].set_ylim([-10,2])
    
    axes[1,0].set_ylim([1e-4,1e3])
    axes[1,1].set_ylim([1e-4,1e3])
    axes[1,2].set_ylim([1e-4,1e3])
    axes[1,0].set_yticks
    axes[2,0].set_ylim([0, 1])

    axes[0,0].legend()
    axes[1,0].legend()
    axes[2,0].legend()
    
    axes[0,1].legend()
    axes[1,1].legend()
    axes[2,1].legend()
    
    axes[0,2].legend()
    axes[1,2].legend()
    axes[2,2].legend()
    plt.savefig(PATH + 'training_results_%dsamples.svg' % num_samples)

In [None]:
plot_results_multiple(ELBOs, ESSs, SNRs, num_samples, ests)

In [None]:
fig = plt.figure(figsize=(4,2))
axs = fig.subplots(3, 3, sharex=True)

In [None]:
SNRssmo = []
for i in range(5):
    ltr1, ltr2 = init_tril(iterations, 0.9, 0.999)
    snrsmo = (ltr1 * SNRs[i]).sum(-1) * (1 - 0.9) / (1 - ltr1[:, 0] * 0.9)
    SNRssmo.append(snrsmo)