# Latent dimension comparisons

## Goal - Compare the `average per sample mse loss` and `average per sample kl loss` for different number of latent dimensions to determine what number of latent dimensions to use for the VAE

### Imports

In [4]:
import numpy as np
import math

### Check the validity of the dumped arrays by ensuring that all the samples used for testing are present in the validation for all the latent dimension tests

In [2]:
latent_dims = [16, 32, 64, 128, 256]
dumps = ["20190829_010238", "20190829_010252", "20190829_010339", "20190829_010405", "20190829_010431"]

# First check that all the indices from the test validation set exist in all the dumps

ldump_idx_arr = None

# Iterate over the dumps and check the indices
for latent_dim, dump in zip(latent_dims, dumps):
    print("----------------------------------------------------")
    print("Reading metrics from VAE with {0} latent dimensions :".format(latent_dim))
    print("----------------------------------------------------")
    
    dump_npz_path = "/home/akajal/WatChMaL/VAE/dumps/{0}/test_valid_iteration_metrics.npz".format(dump)
    dump_npz_arr = np.load(dump_npz_path)
    
    dump_indices = np.sort(dump_npz_arr["indices"])
    
    if ldump_idx_arr is not None:
        if not np.array_equal(dump_indices, ldump_idx_arr):
            print("Index array for latent dims {0} not equal to all the other.".format(latent_dim))
        else:
            print("Index array equal to the first index array")
    else:
        ldump_idx_arr = dump_indices

----------------------------------------------------
Reading metrics from VAE with 16 latent dimensions :
----------------------------------------------------
----------------------------------------------------
Reading metrics from VAE with 32 latent dimensions :
----------------------------------------------------
Index array equal to the first index array
----------------------------------------------------
Reading metrics from VAE with 64 latent dimensions :
----------------------------------------------------
Index array equal to the first index array
----------------------------------------------------
Reading metrics from VAE with 128 latent dimensions :
----------------------------------------------------
Index array equal to the first index array
----------------------------------------------------
Reading metrics from VAE with 256 latent dimensions :
----------------------------------------------------
Index array equal to the first index array


### For each configuration of the latent dimensions, print the `average per sample mse loss` with its `standard deviation` and `standard error` and print the `average per sample kl loss` with its `standard deviation` and `standard error`

In [5]:
# Collect the metrics for plotting as well
recon_loss_values, kl_loss_values = [], []
recon_std_values, kl_std_values = [], []
recon_stderr_values, kl_stderr_values = [], []

# Iterate over the dumps and check the indices
for latent_dim, dump in zip(latent_dims, dumps):
    print("----------------------------------------------------")
    print("Printing metrics for VAE with {0} latent dimensions :".format(latent_dim))
    print("----------------------------------------------------")
    
    dump_npz_path = "/home/akajal/WatChMaL/VAE/dumps/{0}/test_valid_iteration_metrics.npz".format(dump)
    npz_arr = np.load(dump_npz_path)
    
    dump_recon_loss, dump_kl_loss = npz_arr["recon_loss"], npz_arr["kl_loss"]
    
    mean_recon_loss, std_recon_loss = np.mean(dump_recon_loss), np.std(dump_recon_loss)
    stderr_recon_loss = std_recon_loss/math.sqrt(dump_recon_loss.shape[0])
    
    recon_loss_values.append(mean_recon_loss)
    recon_std_values.append(std_recon_loss)
    recon_stderr_values.append(stderr_recon_loss)
    
    mean_kl_loss, std_kl_loss = np.mean(dump_kl_loss), np.std(dump_kl_loss)
    stderr_kl_loss = std_kl_loss/math.sqrt(dump_kl_loss.shape[0])
    
    kl_loss_values.append(mean_kl_loss)
    kl_std_values.append(std_kl_loss)
    kl_stderr_values.append(stderr_kl_loss)
    
    print("Recon Loss metrics")
    print("Mean Recon loss : {0}".format(mean_recon_loss))
    print("Std Recon loss : {0}".format(std_recon_loss))
    print("Stderr Recon loss : {0}\n".format(stderr_recon_loss))
    
    print("KL Loss metrics")
    print("Mean KL loss : {0}".format(mean_kl_loss))
    print("Std KL loss : {0}".format(std_kl_loss))
    print("Stderr KL loss : {0}".format(stderr_kl_loss))

----------------------------------------------------
Printing metrics for VAE with 16 latent dimensions :
----------------------------------------------------
Recon Loss metrics
Mean Recon loss : 8958.9755859375
Std Recon loss : 5998.86669921875
Stderr Recon loss : 6.327784681298076

KL Loss metrics
Mean KL loss : 54.29353332519531
Std KL loss : 25.35870933532715
Stderr KL loss : 0.026749127879516207
----------------------------------------------------
Printing metrics for VAE with 32 latent dimensions :
----------------------------------------------------
Recon Loss metrics
Mean Recon loss : 8853.7060546875
Std Recon loss : 6004.27734375
Stderr Recon loss : 6.333491991578074

KL Loss metrics
Mean KL loss : 54.090919494628906
Std KL loss : 22.656230926513672
Stderr KL loss : 0.02389847252505471
----------------------------------------------------
Printing metrics for VAE with 64 latent dimensions :
----------------------------------------------------
Recon Loss metrics
Mean Recon loss 