In [None]:
import h5py
import pylab as plt
from matplotlib.colors import LogNorm
import matplotlib.ticker as mtick
import numpy as np
import os
from scipy.stats import linregress
import seaborn as sns
import tensorflow as tf
import glob
from spacepy import pycdf
import warnings
import moms_fast
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pandas as pd

import nnet_evaluate
import utils

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

sns.set_style('darkgrid')
%matplotlib inline

# Table 1

In [None]:
hdf= h5py.File('/home/ubuntu/data/samples_train_n=50000_nosw.hdf')
phases = list(hdf.keys())

for phase in phases:
    print(phase.ljust(20), hdf[phase]['E'].shape[0])

# Figure 1

In [None]:
files = [
#    '/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/all.rfr001/moments_stats.hdf',
    '/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4A_dusk_flank.rfr001/moments_stats.hdf',
    '/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4B_dayside.rfr001/moments_stats.hdf',
    '/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4C_dawn_flank.rfr001/moments_stats.hdf',
    '/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4D_tail.rfr001/moments_stats.hdf',
]

In [None]:
moments = ['n']
for file_name in files:
    hdf = h5py.File(file_name, 'r')
    sizes = hdf['sizes'][:]
    r2 = {m: hdf[m]['r2'][:] for m in moments}

    print(os.path.basename(file_name), r2['n'].max())


In [None]:
moments = ['vx', 'vy', 'vz']

fig, axes = plt.subplots(1, 3, sharex='all', sharey='all', figsize=(20, 6))

for file_name in files:
    hdf = h5py.File(file_name, 'r')
    sizes = hdf['sizes'][:]
    r2 = {m: hdf[m]['r2'][:] for m in moments}
    points_true = {m: hdf[m]['points_true'][:] for m in moments}
    points_recon = {m: hdf[m]['points_recon'][:] for m in moments}
    hdf.close()
    
    region_title = 'Phase ' + os.path.basename(os.path.dirname(file_name)).replace("_", " ").split(".")[0]
    region_title = ' '.join([word[0].upper() + word[1:] for word in region_title.split(' ')])

    for i, m in enumerate(r2):
        axes[i].plot(sizes/(32*16*2), r2[m], 'o-', label=region_title)
        axes[i].set_title(m.capitalize())
        axes[i].set_ylabel('Correlation Coefficient ($r^2$)', fontsize=12)
        axes[i].set_xlabel('Dimensionality Reduction (Fraction)', fontsize=12)

    axes[i].set_ylim(0.75, 1.05)
for ax in axes:
    ax.legend(ncol=2)
    ax.axhline(1, color='black', linestyle='dashed')
    
fig.suptitle('Neural Network Bulk Velocity Reconstruction Correlation For Each Mission Phase', fontweight='bold', fontsize=20)
None

In [None]:
moments = ['vx', 'vy', 'vz', 'n']

fig, axes = plt.subplots(1, 4, sharex='all', sharey='all', figsize=(14, 4), dpi=300)

for file_name in [files[1]]:
    hdf = h5py.File(file_name, 'r')
    sizes = hdf['sizes'][:]
    r2 = {m: hdf[m]['r2'][:] for m in moments}
    points_true = {m: hdf[m]['points_true'][:] for m in moments}
    points_recon = {m: hdf[m]['points_recon'][:] for m in moments}
    hdf.close()
    
    region_title = 'Phase ' + os.path.basename(os.path.dirname(file_name)).replace("_", " ").split(".")[0]
    region_title = ' '.join([word[0].upper() + word[1:] for word in region_title.split(' ')])

    for i, m in enumerate(r2):
        axes[i].plot(sizes/(32*16*2), r2[m], 'o-')
        
        if m[0]== 'v':
            axes[i].set_title('Flow Velocity ' + m[1].upper()) 
        else:
            axes[i].set_title('Density') 
        axes[i].set_ylabel('Correlation Coefficient ($r^2$)', fontsize=12)
        axes[i].set_xlabel('Dimensionality Reduction Fraction', fontsize=12)

    axes[i].set_ylim(0.75, 1.01)
for ax in axes:
    ax.legend(ncol=2)
    ax.axhline(1, color='black', linestyle='dashed')
    
fig.suptitle('Neural Network Fluid Parameter Reconstruction Quality', fontweight='bold', fontsize=18)
fig.tight_layout()
None

In [None]:
moments = ['txx', 'tyy', 'tzz']

fig, axes = plt.subplots(1, 3, sharex='all', sharey='all', figsize=(15, 4), dpi=300)

for file_name in [files[1]]:
    hdf = h5py.File(file_name, 'r')
    sizes = hdf['sizes'][:]
    r2 = {m: hdf[m]['r2'][:] for m in moments}
    points_true = {m: hdf[m]['points_true'][:] for m in moments}
    points_recon = {m: hdf[m]['points_recon'][:] for m in moments}
    hdf.close()
    
    region_title = 'Phase ' + os.path.basename(os.path.dirname(file_name)).replace("_", " ").split(".")[0]
    region_title = ' '.join([word[0].upper() + word[1:] for word in region_title.split(' ')])

    for i, m in enumerate(r2):
        axes[i].plot(sizes/(32*16*2), r2[m], 'o-')
        axes[i].set_title('$'+m[0].capitalize() + '_{' + m[1:]+'}$')
        axes[i].set_ylabel('Correlation Coefficient ($r^2$)', fontsize=12)
        axes[i].set_xlabel('Dimensionality Reduction Fraction', fontsize=12)

    axes[i].set_ylim(0.75, 1.01)
for ax in axes:
    ax.legend(ncol=2)
    ax.axhline(1, color='black', linestyle='dashed')
    
#fig.suptitle('Neural Network Bulk Velocity Reconstruction Correlation', fontweight='bold', fontsize=18)
fig.tight_layout()
None

In [None]:
moments = ['n']

fig, axes = plt.subplots(1, 3, sharex='all', sharey='all', figsize=(15, 4), dpi=300)

for file_name in [files[1]]:
    hdf = h5py.File(file_name, 'r')
    sizes = hdf['sizes'][:]
    r2 = {m: hdf[m]['r2'][:] for m in moments}
    points_true = {m: hdf[m]['points_true'][:] for m in moments}
    points_recon = {m: hdf[m]['points_recon'][:] for m in moments}
    hdf.close()
    
    region_title = 'Phase ' + os.path.basename(os.path.dirname(file_name)).replace("_", " ").split(".")[0]
    region_title = ' '.join([word[0].upper() + word[1:] for word in region_title.split(' ')])

    for i, m in enumerate(r2):
        axes[i].plot(sizes/(32*16*2), r2[m], 'o-')
        axes[i].set_title(r'$\rho$')
        axes[i].set_ylabel('Correlation Coefficient ($r^2$)', fontsize=12)
        axes[i].set_xlabel('Dimensionality Reduction Fraction', fontsize=12)

    axes[i].set_ylim(0.75, 1.01)
for ax in axes[:1]:
    ax.legend(ncol=2)
    ax.axhline(1, color='black', linestyle='dashed')
axes[1].axis('off')
axes[2].axis('off')
#fig.suptitle('Neural Network Bulk Velocity Reconstruction Correlation', fontweight='bold', fontsize=18)
fig.tight_layout()
None

# Figure 1 - Appendix

In [None]:
def plot_summary(file_name, xlim=None, redline=100, ylim=None, facecolor=None):
    # Load data ------------------------------------------
    hdf = h5py.File(file_name, 'r')
    sizes = hdf['sizes'][:]
#    moments = ['n', 'vx', 'vy', 'vz', 'txx', 'tyy', 'tzz', 'txz', 'tyz', 'txy']
    moments = ['n', 'vx', 'vy', 'vz', 'txx', 'tyy', 'tzz']

    r2 = {m: hdf[m]['r2'] for m in moments}
    points_true = {m: hdf[m]['points_true'][:] for m in moments}
    points_recon = {m: hdf[m]['points_recon'][:] for m in moments}
    #test_data = nnet_evaluate.load_test_data('4B_dayside')
    
    # Make plot -----------------------------------------
    fig, axes = plt.subplots(1, 7, sharex='all', sharey='all', figsize=(20, 3), facecolor=facecolor)
    axes_orig = axes
    axes = axes.flatten()
    for i, m in enumerate(r2):
        axes[i].plot(sizes/(32*16*2), r2[m], 'o-')
        
        if m[0] == 't':
            axes[i].set_title(m[0].upper() + m[1:], fontsize=15)
        else:
            axes[i].set_title(m, fontsize=15)
        #axes[i].axvline(1, color='red', linestyle='dashed', label='No Reduction')
        #axes[i].axhline(1, color='black', linestyle='dashed')

        if xlim:
            axes[i].set_xlim(*xlim)
        if ylim:
            axes[i].set_ylim(*ylim)
        axes[i].set_xlabel('Dimensionality Reduction\n(Fraction)', fontsize=12)
        
    axes[0].set_ylabel('$r^2$', fontsize=15)
    
    fig.suptitle(f'Moments Reconstruction vs Dimensionality Reduction Fraction (MMS Mission {region_title})', fontsize=20, fontweight='bold')
    fig.tight_layout()

In [None]:
plot_summary('/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/all.rfr001/moments_stats.hdf', ylim=(.75, 1.05), facecolor='#fffee0')

In [None]:
plot_summary('/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4A_dusk_flank.rfr001/moments_stats.hdf', ylim=(.75, 1.05))

In [None]:
plot_summary('/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4B_dayside.rfr001/moments_stats.hdf', ylim=(.75, 1.05))

In [None]:
plot_summary('/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4C_dawn_flank.rfr001/moments_stats.hdf', ylim=(.75, 1.05))

In [None]:
plot_summary('/mnt/efs/dasilva/compression-cfha/data/nnet_models/hidden_layer_exp/4D_tail.rfr001/moments_stats.hdf', ylim=(.75, 1.05))

# Figure 2

In [None]:
def get_data(cdf_filename):

    N_EN = 32
    N_EN_SHELLS = 2
    N_PHI = 32
    N_THETA = 16
    cdf = pycdf.CDF(cdf_filename)

    dist = cdf['mms1_dis_dist_brst'][:]
    dist_err = cdf['mms1_dis_disterr_brst'][:]
    epoch = cdf['Epoch'][:]
    ntime = epoch.size
    counts = np.zeros((ntime, N_PHI, N_THETA, N_PHI))

    for i in range(ntime):
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore')
            tmp_counts = np.square(dist[i] / dist_err[i])
        tmp_counts[np.isnan(tmp_counts)] = 0
        tmp_counts = np.rint(tmp_counts)
        counts[i] = tmp_counts

    cdf.close()

    return epoch, counts, ntime

In [None]:
test_data = nnet_evaluate.load_test_data('4B_dayside')
f1ct = utils.get_f1ct({'4B_dayside': test_data}, ['4B_dayside'])


In [None]:
E = np.array([2.160000e+00, 3.910000e+00, 7.070000e+00, 1.093000e+01,
       1.424000e+01, 1.854000e+01, 2.414000e+01, 3.144000e+01,
       4.094000e+01, 5.332000e+01, 6.944000e+01, 9.043000e+01,
       1.177700e+02, 1.533600e+02, 1.997200e+02, 2.601000e+02,
       3.387200e+02, 4.411100e+02, 5.744500e+02, 7.481000e+02,
       9.742300e+02, 1.268720e+03, 1.652240e+03, 2.151680e+03,
       2.802100e+03, 3.649120e+03, 4.752190e+03, 6.188690e+03,
       8.059430e+03, 1.049565e+04, 1.366831e+04, 1.780000e+04],
      dtype=np.float32)

In [None]:
N_EN = 32
N_EN_SHELLS = 2
N_PHI = 32
N_THETA = 16

hdf_filename = glob.glob('/home/ubuntu/data/recons/4B_dayside-100/*.hdf5')[95]
print(hdf_filename)
cdf_filename = '/mnt/efs/dasilva/compression-cfha/data/mms_data/4B_dayside/' + os.path.basename(hdf_filename).replace('.hdf5', '.cdf')
gpc_filename = hdf_filename.replace('.hdf5', '.gpc')

epoch, counts, ntime = get_data(cdf_filename)

hdf = h5py.File(hdf_filename, 'r')
counts_recon = hdf['counts'][:]
hdf.close()

moms_true = [moms_fast.fast_moments(f1ct * c) for c in counts]
moms_recon = [moms_fast.fast_moments(f1ct * c) for c in counts_recon]


cmpr_ratio =  (32 * 16 * 32 * 16 * epoch.size) / (os.path.getsize(gpc_filename) * 8)
file_size = os.path.getsize(gpc_filename) / epoch.size
vars = ['Strue', 'Srecon']
fig, axes = plt.subplots(len(vars), 1, figsize=(15, 4*len(vars)))


Escale = np.zeros((32, counts.shape[0]))
for k in range(Escale.shape[1]):
    Escale[:, k] = E

for i, var in enumerate(vars):
    vmin = .01
    vmax = 15
    if var == 'Strue':
        im = axes[i].pcolor(epoch, E, (counts).mean(axis=((1, 2))).T, norm=LogNorm(vmin=vmin, vmax=vmax), cmap='jet')
        axes[i].set_ylabel('$\\bf{Original~Ion~Data}$\nEnergy [eV]', fontsize=18)
    elif var == 'Srecon':
        im = axes[i].pcolor(epoch, E, (counts_recon).mean(axis=((1, 2))).T, norm=LogNorm(vmin=vmin, vmax=vmax), cmap='jet')
        axes[i].set_ylabel('$\\bf{Compressed~Ion~Data}$\nEnergy [eV]', fontsize=18)
    
    axes[i].set_yscale('log')
    divider = make_axes_locatable(axes[i])
    cax = divider.append_axes('right', size='5%', pad=0.05)
    fig.colorbar(im, cax=cax, orientation='vertical').set_label('Average Counts\nper Energy Channel', fontsize=16)

axes[0].set_title(f'Demonstration of Compression (Compression Ratio: {cmpr_ratio:.1f}X)', fontweight='bold', fontsize=20)
fig.tight_layout()