In [None]:
# Standard imports
import numpy as np
import matplotlib.pyplot as plt
import wandb

In [None]:
# Parameters
prog_set = 'homework_3'

# Sampling mode
sampling_method = 'desugar'
#sampling_method = 'graph'

# Inference method
inference_method = 'None'
#inference_method = 'IS'
#inference_method = 'MH'
#inference_method = 'MHG'
#inference_method = 'HMC'

# Weights & biases
use_wandb = False
%env WANDB_NOTEBOOK_NAME='homework_3.ipynb'

In [None]:
# Calculations
if use_wandb: 
    wandb.init(project='test_homework3', entity='cs532-2022')

In [None]:
# Definitions
def triangle_plots(dicts_of_samples, params, labels,
    truths = None,
    fig_size = 3.,
    hist_bins = 'auto',
    hist_density = True,
    hist_alpha = 0.7,
    scatter_alpha = 0.1,
    scatter_size = 5.,
    use_wandb = False,
    wandb_name = None,
    ):
    '''
    Makes a triangle plot
    params:
    dicts_of_samples: List of dictionaries of samples (e.g., dict['x'] = [1., 1.1, 1.3, ...])
    params: List of names of parameters to plot (dictionary keys)
    labels: List of axis labels corresponding to parameters
    truths: List of true values of the parameters TODO: Option for None
    '''
    n = len(params)
    fig, _ = plt.subplots(figsize=(n*fig_size, n*fig_size))
    iplot = 0
    samples = len(dicts_of_samples[0][params[0]])
    for ir, (param_r, label_r) in enumerate(zip(params, labels)):
        for ic, (param_c, label_c) in enumerate(zip(params, labels)):
            iplot += 1
            if ir == ic:
                plt.subplot(n, n, iplot)
                if truths is not None:
                    plt.axvline(truths[ir], color='black', ls='--', alpha=0.7, label='Truth')
                for dict_of_samples in dicts_of_samples:
                    plt.hist(dict_of_samples[param_r], 
                        bins=hist_bins, density=hist_density, alpha=hist_alpha,
                    )
                plt.xlabel(label_r) if ic==n-1 else plt.gca().set_xticklabels([])
                plt.yticks([])
                mean = dict_of_samples[param_r].mean(); std = dict_of_samples[param_r].std()
                plt.axvline(mean, color='k', ls='--', label='Mean: %1.2f'%mean)
                plt.axvline(mean-std, color='k', ls=':', label='Std: %1.2f'%std)
                plt.axvline(mean+std, color='k', ls=':')
                #if and iplot == 1: plt.legend(loc='upper left', bbox_to_anchor=(1., 1.))
                plt.legend()
            elif ir > ic:
                plt.subplot(n, n, iplot)
                if truths is not None:
                    plt.plot([truths[ic]], [truths[ir]], color='black', marker='x', alpha=0.7, label='Truth')
                for dict_of_samples in dicts_of_samples:
                    plt.scatter(dict_of_samples[param_c], dict_of_samples[param_r], 
                            alpha=scatter_alpha, s=scatter_size,
                    )
                plt.xlabel(label_c) if ir==n-1 else plt.gca().set_xticklabels([])
                plt.ylabel(label_r) if ic==0 else plt.gca().set_yticklabels([])
    plt.suptitle('Samples: {:,}'.format(samples))
    plt.tight_layout()
    plt.show()
    if use_wandb: wandb.log({wandb_name: wandb.Image(fig)})


def plot_traces(data, nr, nc, names=None, panel_size=5., verbose=False, use_wandb=False, wandb_name=None):
    samples = data.shape[0]
    n = data.shape[1]
    fig, _ = plt.subplots(figsize=(nc*panel_size, nr*panel_size))
    for i in range(n):
        plt.subplot(nr, nc, 1+i)
        mean = data[:, i].mean()
        std = data[:, i].std()
        if verbose:
            print('Mean:', mean)
            print('Std:', std)
        plt.plot(data[:, i], color='C%d'%i, alpha=0.3)
        plt.scatter(list(range(samples)), data[:, i], color='C%d'%i, marker='.', alpha=0.1)
        plt.axhline(mean, color='black', ls='--', label='Mean: %1.2f'%mean)
        plt.axhline(mean-std, color='black', ls=':', label='Std: %1.2f'%std)
        plt.axhline(mean+std, color='black', ls=':')
        if names is not None: plt.ylabel(names[i])
        plt.legend()
        plt.xlabel('samples')
        plt.xlim(left=0.)
    plt.suptitle('Samples: {:,}'.format(samples))
    plt.tight_layout()
    plt.show()
    if use_wandb: wandb.log({wandb_name: wandb.Image(fig)})

In [None]:
# Program 1
variables = [r'$\mu$']
file = 'data/homework_3/1_%s_%s.dat'%(sampling_method, inference_method)
print('File:', file)
data = np.loadtxt(file)
data = np.atleast_2d(data).T
print('Data shape:', np.squeeze(data.shape))
samples_dict = [{'mu': data[:, 0]}]
triangle_plots(samples_dict, params=['mu'], labels=variables, fig_size=5., use_wandb=use_wandb, wandb_name='Program: 1')
plot_traces(data, nr=1, nc=data.shape[1], names=variables, use_wandb=use_wandb, wandb_name='Samples: 1')

In [None]:
# Program 2
variables = ['slope', 'bias']
file = 'data/homework_3/2_%s_%s.dat'%(sampling_method, inference_method)
print('File:', file)
data = np.loadtxt(file)
samples_dict = [{'slope': data[:, 0], 'bias': data[:, 1]}]
triangle_plots(samples_dict, params=variables, labels=variables, fig_size=4., use_wandb=use_wandb, wandb_name='Program: 2')
plot_traces(data, nr=1, nc=data.shape[1], names=variables, use_wandb=use_wandb, wandb_name='Samples: 2')
print('Covariance matrix:\n', np.cov(data, bias=False, rowvar=False)) # TODO: Does not look correct

In [None]:
# Program 3
if inference_method != 'HMC':
    variables = ['Are the points from the same cluster?']
    file = 'data/homework_3/3_%s_%s.dat'%(sampling_method, inference_method)
    print('File:', file)
    data = np.loadtxt(file)
    data = np.atleast_2d(data).T
    samples_dict = [{'x': data[:, 0]}]
    triangle_plots(samples_dict, params=['x'], labels=variables, fig_size=5., use_wandb=use_wandb, wandb_name='Program: 3')
    print('Probability:', variables[0], data.mean())

In [None]:
# Program 4
if inference_method != 'HMC':
    variables = ['Is it raining?']
    file = 'data/homework_3/4_%s_%s.dat'%(sampling_method, inference_method)
    print('File:', file)
    data = np.loadtxt(file)
    data = np.atleast_2d(data).T
    samples_dict = [{'x': data[:, 0]}]
    triangle_plots(samples_dict, params=['x'], labels=variables, fig_size=5., use_wandb=use_wandb, wandb_name='Program: 4')
    print('Probability:', variables[0], data.mean())

In [None]:
# Program 5
variables = ['x', 'y']
file = 'data/homework_3/5_%s_%s.dat'%(sampling_method, inference_method)
print('File:', file)
data = np.loadtxt(file)
samples_dict = [{'x': data[:, 0], 'y': data[:, 1]}]
triangle_plots(samples_dict, params=['x', 'y'], labels=[r'$x$', r'$y$'], fig_size=4., use_wandb=use_wandb, wandb_name='Program: 5')
plot_traces(data, nr=1, nc=data.shape[1], names=variables, use_wandb=use_wandb, wandb_name='Samples: 5')
print('Covariance matrix:\n', np.cov(data, bias=False, rowvar=False))

In [None]:
# Finish W&B
if use_wandb:
    wandb.finish()