In [None]:
import h5py
import numpy as np
import os
import pandas as pd
import pickle
import scipy
import scipy.interpolate
import unyt

In [None]:
import verdict
import trove

In [None]:
import matplotlib
import matplotlib.pyplot as plt
# # Currently need to call this to get matplotlib selected style to load...
plt.plot()
matplotlib.style.use( '/Users/zhafen/repos/clean-bold/clean-bold-mnras.mplstyle' )
import matplotlib.gridspec
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.patheffects as path_effects
import palettable
import voronoi_scatter

In [None]:
import helpers

# Parameters

In [None]:
sample_i = 0

In [None]:
agreement_cmap = palettable.colorbrewer.diverging.PuOr_5.mpl_colormap

In [None]:
figure_dir = '/Users/zhafen/drafts/cgm_modeling_challenge_paper/figures/sample1'
summary_data_fp = './data/polished_data/summary.h5'
os.makedirs( figure_dir, exist_ok=True )

In [None]:
prop_keys = [ 'l', 'T', 'nH', 'Z', ]

## Plotting

In [None]:
logscale = {
    'l': True,
    'T': True,
    'nH': True,
    'Z': True,
}

In [None]:
labels = {
    'l': r'$\ell$ [kpc]',
    'T': r'T [K]',
    'nH': r'$n_{\rm H}$ [cm$^{-3}$]',
    'Z': r'$Z$ [$Z_{\odot}$]',
}

In [None]:
lims = {
    'T': [ 1e4, 1e6 ],
}

In [None]:
panel_length = 4.

In [None]:
blinded_color = helpers.blinded_color
revised_color = helpers.revised_color

# Analysis

## Load Data

### Actual Data

In [None]:
actual = verdict.Dict.from_hdf5( './data/synthetic_data/sample{}/theorists_file.h5'.format( sample_i ) )

In [None]:
observers_data = verdict.Dict.from_hdf5( './data/synthetic_data/sample{}/observers_file.h5'.format( sample_i ) )

In [None]:
provided = verdict.Dict.from_hdf5( './data/synthetic_data_samples/sample0.h5' )

### Modeled

#### Sameer & Charlton

In [None]:
modeling_group = 'sameer_charlton'

##### Original Results

In [None]:
modeled_raw = verdict.Dict.from_hdf5( './data/modeling_results/{}/sample{}/sample{}.hdf5'.format( modeling_group, sample_i, sample_i ) )

In [None]:
derived = pd.read_csv( './data/modeling_results/sameer_charlton/sample{}/derived.txt'.format( sample_i ), sep='\t', index_col=0 )

In [None]:
modeled = {
    'metallicity': np.array([ modeled_raw[str(i+1)][0] for i in range( actual['metallicity'].size ) ]),
    'emetallicity': np.array([ modeled_raw[str(i+1)][1] for i in range( actual['metallicity'].size ) ]),
    'length': derived.values[:,0],
    'H_column': derived.values[:,1],
    'temperature': derived.values[:,2],
    'H_density': derived.values[:,3],
}

In [None]:
f = h5py.File( '/Users/zhafen/Downloads/sample0.hdf5', 'r' )

In [None]:
fp = './data/modeling_results/{}/sample{}/firstiteration.pickle'.format( modeling_group, sample_i )
columns = dict( pd.read_pickle( fp ) )

In [None]:
for key, item in columns.items():
    item_arr = np.array(item)
    modeled['{}'.format( key )] = item_arr[:,0]
    modeled['e{}'.format( key )] = item_arr[:,1]

In [None]:
modeled = pd.DataFrame( modeled )
modeled = modeled.set_index( np.arange( modeled.shape[0] ) + 1 )

In [None]:
modeled

##### Revised

In [None]:
revised = pd.read_csv( './data/modeling_results/{}/sample{}/revised_params.csv'.format( modeling_group, sample_i ) )
revised = revised.set_index( np.arange( revised.shape[0] ) + 1 )

In [None]:
fp = './data/modeling_results/{}/sample{}/seconditeration.pickle'.format( modeling_group, sample_i )
columns = dict( pd.read_pickle( fp ) )

In [None]:
for key, item in columns.items():
    item_arr = np.array(item)
    revised['{}'.format( key )] = item_arr[:,0]
    revised['e{}'.format( key )] = item_arr[:,1]

In [None]:
# Additional revised data
fp = './data/modeling_results/sameer_charlton/sample0/sample0_Z_T_nH.pkl'
revised_add = dict( pd.read_pickle( fp ) )

In [None]:
revised_add_formatted = verdict.Dict({})
for j in revised.index:
    for prop_key, vals in revised_add[j].items():
        try:
            revised_add_formatted[prop_key].append( vals[0] )
            revised_add_formatted['low'+prop_key].append( vals[1] )
            revised_add_formatted['high'+prop_key].append( vals[2] )
        except KeyError:
            revised_add_formatted[prop_key] = [ vals[0], ]
            revised_add_formatted['low'+prop_key] = [ vals[1], ]
            revised_add_formatted['high'+prop_key] = [ vals[2], ]
for key, item in revised_add_formatted.items():
    revised[key] = item

In [None]:
revised

#### Mohapatra

In [None]:
met_mohapatra = np.full( ( actual['metallicity'].size ),  np.nan,  )
emet_mohapatra = np.full( ( actual['metallicity'].size ),  np.nan,  )
met_mohapatra[7] = -0.52
emet_mohapatra[7] = 0.41
met_mohapatra[9] = 0.48
emet_mohapatra[9] = 0.39

## Data for Interpretation

In [None]:
phase_diagram = verdict.Dict.from_hdf5( './data/m12i_phase_diagram_z0.25.h5' )

### Address Possible Scrambling of Sightline Inds

#### Best case match closest metallicities

In [None]:
model_sort_inds = np.argsort( modeled['metallicity'] )
actual_sort_inds = np.argsort( actual['metallicity'] )

#### Use Sameer's columns to assess mixup

In [None]:
columns = pd.read_csv( './data/modeling_results/sameer_charlton/sample{}/columns.txt'.format( sample_i ), sep='\t', index_col=0 )

In [None]:
columns.loc['HI']

In [None]:
provided['H I']['logN']

In [None]:
np.log10( actual['H I'] )

It doesn't look like there was a mixup...

## Formatting

In [None]:
found = verdict.Dict({
    'l': modeled['length'].values * unyt.kpc,
    'Z': 10.**modeled['metallicity'].values * unyt.Zsun,
    'T': 10.**modeled['temperature'].values * unyt.K,
    'nH': 10.**modeled['H_density'].values * unyt.cm**-3,
})

In [None]:
found_revised = verdict.Dict({
    'Z': 10.**revised['Z'].values * unyt.Zsun,
    'T': 10.**revised['T'].values * unyt.K,
    'nH': 10.**revised['ne'].values * unyt.cm**-3,
})

In [None]:
actual['l'] = actual['length'] * unyt.kpc
actual['Z'] = actual['metallicity'] * unyt.Zsun
actual['T'] = actual['temperature'] * unyt.K
actual['nH'] = actual['H_density'] * unyt.cm**-3
actual = verdict.Dict( actual )

In [None]:
xs = modeled.index

# Results


## Sightlines in Context of Priors

### Photoionization Equlibrium Data

In [None]:
pie_data = pd.read_csv( './data/PIEdata.csv', header=None )

In [None]:
lognH_pie = pie_data[0].values
logT_pie = pie_data[1].values

### Interpolate to Get PDF Values

In [None]:
dlogT = phase_diagram['logT_edges'][1] - phase_diagram['logT_edges'][0]
dlogHDen = phase_diagram['logH_density_edges'][1] - phase_diagram['logH_density_edges'][0]
pdf = phase_diagram['hist'] / ( dlogT * dlogHDen )

In [None]:
logHDen_centers = phase_diagram['logH_density_edges'][:-1] + 0.5 * dlogHDen
logT_centers = phase_diagram['logT_edges'][:-1] + 0.5 * dlogT

In [None]:
interp_fn = scipy.interpolate.RectBivariateSpline(
    logHDen_centers,
    logT_centers,
    pdf,
)

In [None]:
pdf_values = np.array([ interp_fn( np.log10( actual['H_density'][i] ), np.log10( actual['temperature'][i] ) )[0][0] for i in range( actual['metallicity'].size ) ])

In [None]:
pdf_values[pdf_values<0] = pdf[np.nonzero(pdf)].min()

In [None]:
fig = plt.figure( facecolor='w' )
ax = plt.gca()

ax.set_xscale( 'log' )
ax.set_yscale( 'log' )

img = ax.pcolormesh(
    10.**phase_diagram['logH_density_edges'],
    10.**phase_diagram['logT_edges'],
    pdf.transpose(),
#     extent = [ logHDen_centers[0], logHDen_centers[-1], logT_centers[0], logT_centers[-1] ],
    cmap = palettable.cubehelix.classic_16_r.mpl_colormap,
    norm = matplotlib.colors.LogNorm(),
)

c = found['Z'] / actual['Z']
logc = np.log10( c )
logvlim = np.nanmax( np.abs( logc ) )
vmin = 10.**-logvlim
vmax = 10.**logvlim
scatter = ax.scatter(
    actual['H_density'],
    actual['temperature'],
    edgecolor = 'k',
    s = 50,
#     c = pdf_values,
    c = c,
    cmap = agreement_cmap,
    norm = matplotlib.colors.LogNorm( vmin, vmax ),
    linewidth = 2,
)

# Labels
voronoi_scatter.scatter(
    actual['H_density'],
    actual['temperature'],
    labels = np.arange( actual['H_density'].size ) + 1,
    ax = ax,
    plot_scatter = False,
    xlim = [ 1e-6, 100 ],
    ylim = [ 20, 1e7 ],
)
    
# Add line for PIE
pie_line = ax.plot(
    10.**lognH_pie,
    10.**logT_pie,
    color = 'k',
    linewidth = 3,
)
pie_line[0].set_path_effects([
    path_effects.Stroke(linewidth=5, foreground='w'),
    path_effects.Normal()
])
text = ax.annotate(
    'PIE',
    10.**np.array([ lognH_pie[-1], logT_pie[-1] ]),
    xycoords = 'data',
    xytext = ( -5, -5 ),
    textcoords = 'offset points',
    color = 'k',
    ha = 'right',
    va = 'top',
)
text.set_path_effects([
    path_effects.Stroke(linewidth=3, foreground='w'),
    path_effects.Normal()
])

# Colorbars
# Create divider for existing axes instance
divider = make_axes_locatable(ax)
# Append axes to the right of ax, with 5% width of ax
cax = divider.append_axes("right", pad=0.05, size='5%')
cb = plt.colorbar( scatter, cax=cax )
cax.annotate(
    text = r'$Z_{\rm modeled}$ / $Z_{\rm actual}$',
    xy = ( 1, 1 ),
    xytext = ( 0, 5 ),
    xycoords = 'axes fraction',
    textcoords = 'offset points',
    ha = 'right',        
)
cax = divider.append_axes("right", pad=0.41, size='5%')
cb = plt.colorbar( img, cax=cax )
cax.annotate(
    text = 'PDF',
    xy = ( 1, 1 ),
    xytext = ( 0, 5 ),
    xycoords = 'axes fraction',
    textcoords = 'offset points',
    ha = 'right',        
)


# ax.tick_params( length=15, width=3, labelsize=24 )
# ax.tick_params( which='minor', length=15 / 2, width=3./2, labelsize=24 )

ax.set_xlabel( r'$n_{\rm H}$ [cm$^{-3}$]' )
ax.set_ylabel( r'$T$ [K]', )

# ax.set_ylim( logT_centers[0], 7 )
# ax.set_xlim( -6, 2 )

# ax.set_aspect( 'equal' )

savefile = os.path.join( figure_dir, 'phase_space.png' )
print( 'Saving at {}'.format( savefile ) )
plt.savefig( savefile, bbox_inches='tight' )

## Comparison Including Expected Frequency

In [None]:
y_labels = {
    'metallicity': r'$\log_{10} ( Z_{\rm modeled}/Z_{\rm actual} )$',
    'H_density': r'$\log_{10} ( n_{\rm H, modeled}/n_{\rm H, actual} )$',
    'temperature': r'$\log_{10} ( T_{\rm modeled}/T_{\rm actual} )$',
    'length': r'$\log_{10} (\ell_{\rm modeled} / \ell_{\rm actual} )$',
}

### Ratios on Y-Axis

In [None]:
vmin = np.nanmin( pdf[np.nonzero(pdf)] )
vmax = np.nanmax( pdf[np.nonzero(pdf)] )

In [None]:
for key in [ 'metallicity', 'H_density', 'temperature', 'length' ]:

    fig = plt.figure( figsize=(8,8), facecolor='w' )
    ax = plt.gca()

    if key != 'length':
        ys = modeled[key] - np.log10( actual[key] )
    else:
        ys = np.log10( modeled[key] / actual[key] )
        
    ax.scatter(
        np.arange( actual[key].size ) + 1,
        ys,
        edgecolor = 'k',
        s = 100,
        c = pdf_values,
        cmap = palettable.cubehelix.classic_16_r.mpl_colormap,
        norm = matplotlib.colors.LogNorm( vmin, vmax ),
    )

    if key == 'metallicity':
        
        ax.errorbar(
            np.arange( actual[key].size ) + 1,
            modeled[key] - np.log10( actual[key] ),
            yerr = modeled['emetallicity'],
            linestyle = 'none',
            color = 'k',
            zorder = -10,
        )
        
        ax.scatter(
            np.arange( actual[key].size ) + 1,
            revised['Z'] - np.log10( actual[key] ),
            s = 100,
            color = revised_color,
            zorder = -9,
        )
        ax.errorbar(
            np.arange( actual[key].size ) + 1,
            revised['Z'] - np.log10( actual[key] ),
            yerr = revised['errZ'],
            linestyle = 'none',
            color = revised_color,
            zorder = -10,
        )
        
#         # Mohapatra results
#         ax.errorbar(
#             np.arange( actual[key].size ) + 1,
#             met_mohapatra,
#             yerr = emet_mohapatra,
#             marker = 's',
#             markersize = 10,
#             color = colormap[1],
#             linestyle = 'none',
#             label = 'Mohapatra',
#             zorder = -10
#         )
#         ax.plot(
#             [ 9, ]*2,
#             [ 0.54, 1000 ],
#             color = colormap[1],
#         )

    ax.axhline(
        0,
        color = '0.5',
        linestyle = '--',
        linewidth = 3,
        zorder = -10,
    )

    plt.xticks( np.arange( actual['metallicity'].size ) + 1, )
    ax.tick_params( length=10, width=1.5, labelsize=18 )

    ax.set_xlabel( r'Sightline ID', fontsize=22 )
    ax.set_ylabel( y_labels[key], fontsize=22 )
    
    if key == 'length':
        ax.set_ylim( -1.5, 4 )
    elif key == 'metallicity':
        ax.set_ylim( -1.5, 1.5 )
    else:
        ax.set_ylim( -4, 1.5 )
        
#     plt.savefig( './figures/sample0/comparison_{}_inc_freq.pdf'.format( key ), bbox_inches='tight' )

### Properties on Y-axis

In [None]:
clean_mosaic = [ [ 'Z', ], [ 'T', ], [ 'nH' ], ]

In [None]:
y_labels = {
    'Z': r'$Z_{\rm modeled}$ / $Z_{\rm actual}$',
    'nH': r'$n_{\rm H, modeled}$ / $n_{\rm H, actual}$',
    'T': r'$T_{\rm modeled}$ / $T_{\rm actual}$',
    'l': r'$\ell_{\rm modeled}$  / $\ell_{\rm actual}$',
}

In [None]:

# Setup Figure
n_rows_clean = len( clean_mosaic )
n_cols_clean = len( clean_mosaic[0] )
panel_length = plt.rcParams['figure.figsize'][0]
s_default = plt.rcParams['lines.markersize']
fig = plt.figure( figsize=(n_cols_clean*panel_length, n_rows_clean*panel_length/2.), facecolor='w' )
ax_dict = fig.subplot_mosaic(
    clean_mosaic,
)

ratio = found['Z'] / actual['Z']
ordered_inds = np.argsort( np.abs( np.log10( ratio ) ) )

for key in ax_dict.keys():

    ax = ax_dict[key]

    ratio = found[key] / actual[key]

    ratio_loglim = np.nanmax( np.abs( np.log10( actual[key] / found[key] ) ) )
    ratio_min = 10.**-ratio_loglim
    ratio_max = 10.**ratio_loglim

#     # Shade regions
#     norm = matplotlib.colors.LogNorm( vmin=ratio_min, vmax=ratio_max )
#     for i, c_value in enumerate( ratio[ordered_inds] ):

#         c = agreement_cmap( norm( c_value ) )

#         ax.fill_between(
#             [ xs[i] - 0.5, xs[i] + 0.5 ],
#             [ 0, 0 ],
#             [ 1, 1 ],
#             transform = matplotlib.transforms.blended_transform_factory( ax.transData, ax.transAxes ),
#             color = c,
#         )

    actual_plotted = actual[key][ordered_inds]
    if logscale[key]:
        actual_plotted = np.log10( actual_plotted )

    # Actual
    ax.scatter(
        xs,
        actual_plotted,
        s = s_default * 5,
        color = 'k',
#         edgecolor = 'k',
#         c = pdf_values[ordered_inds],
#         cmap = palettable.cubehelix.classic_16_r.mpl_colormap,
#         norm = matplotlib.colors.LogNorm( vmin, vmax ),
        zorder = -100,
    )

    # Original
    blinded = found[key][ordered_inds]
    if logscale[key]:
        blinded = np.log10( blinded )
    ax.scatter(
        xs,
        blinded,
        color = blinded_color,
        s = s_default * 2,
        zorder = 10,
    )
    if key == 'Z':
        ax.errorbar(
            xs,
            blinded,
            yerr = modeled['emetallicity'].values[ordered_inds],
            color = blinded_color,
            linewidth = 0.0,
            elinewidth = 2,
            zorder = 9,
        )

    # Revised
    try:
        ax.scatter(
            xs,
            revised[key].values[ordered_inds],
            color = revised_color,
            s = s_default * 4,
            zorder = 5,
        )
        ax.errorbar(
            xs,
            revised[key].values[ordered_inds],
            yerr = [ revised['low'+key].values[ordered_inds], revised['high'+key].values[ordered_inds] ],
            color = revised_color,
            linewidth = 0.0,
            elinewidth = 2,
            zorder = 4,
        )
    except KeyError:
        pass

    # if logscale[key]:
    #     ax.set_yscale( 'log' )

    # X ticks
    ax.set_xticks( xs )
    ax.set_xticklabels( ordered_inds + 1 )

    ax.set_xlim( xs[0] - 0.5, xs[-1] + 0.5 )
    if key in lims:
        ylim = lims[key]
        if logscale[key]:
            ylim = np.log10( ylim )
        ax.set_ylim( ylim )


    # # Colorbars
    # # Create divider for existing axes instance
    # divider = make_axes_locatable( ax )
    # # Append axes to the right of ax, with 5% width of ax
    # cax = divider.append_axes("right", pad=0.05, size='5%')
    # cb = matplotlib.colorbar.ColorbarBase( cax, cmap=agreement_cmap, norm=norm )
    # cax.annotate(
    #     text = y_labels[key],
    #     xy = ( 1, 1 ),
    #     xytext = ( 0, 5 ),
    #     xycoords = 'axes fraction',
    #     textcoords = 'offset points',
    #     ha = 'right',        
    # )

# Cleanup
for x_key, ax in ax_dict.items():

    if x_key == 'legend':
        continue

    subplotspec = ax.get_subplotspec()

    ax.set_ylabel( labels[x_key], )
    if subplotspec.is_last_row():
        ax.set_xlabel( 'sightline ID', )

savefile = os.path.join( figure_dir, 'comparison.pdf' )
print( 'Saving at {}'.format( savefile ) )
plt.savefig( savefile, bbox_inches='tight' )

## Corner Plot Comparison

### Plot

In [None]:
mosaic = [
    [ 'l', 'legend', '.', '.' ],
    [ 'T_l', 'T', '.', '.' ],
    [ 'nH_l', 'nH_T', 'nH', '.' ],
    [ 'Z_l', 'Z_T', 'Z_nH', 'Z', ],
]

In [None]:
# Setup Figure
n_cols = len( prop_keys )
fig = plt.figure( figsize=( panel_length*n_cols, panel_length*n_cols ), facecolor='w' )
ax_dict = fig.subplot_mosaic(
    mosaic,
)

# Loop through all properties
for j, x_key in enumerate( prop_keys ):
    for k, y_key in enumerate( prop_keys ):

        # Avoid duplicates
        if k < j:
            continue 
            
        # Single property comparison
        if j == k:
            ax = ax_dict[x_key]
            subplotspec = ax.get_subplotspec()
            
            x_label = labels[x_key]
            y_label = 'sightline ID'
                        
        # 2D comparisons
        else:
            try:
                ax = ax_dict['{}_{}'.format( x_key, y_key )]
            except KeyError:
                ax = ax_dict['{}_{}'.format( y_key, x_key )]
            subplotspec = ax.get_subplotspec()
            
            # Actual values
            ax.scatter(
                actual[x_key],
                actual[y_key],
                color = 'k',
            )
            
            # Modeled values
            ax.scatter(
                found[x_key],
                found[y_key],
                color = blinded_color,
            )
            
            # Revised modeled values
            try:
                ax.scatter(
                    found_revised[x_key],
                    found_revised[y_key],
                    color = revised_color,
                )
            except KeyError:
                pass
            
            if logscale[x_key]:
                ax.set_xscale( 'log' )
            if logscale[y_key]:
                ax.set_yscale( 'log' )
                
            x_label = labels[x_key]
            y_label = labels[y_key]

        if subplotspec.is_last_row():
            ax.set_xlabel( x_label, fontsize=16 )
        if subplotspec.is_first_col():
            ax.set_ylabel( y_label, fontsize=16 )

## How Much Column Density Agreement is Enough?

### Overview

In [None]:
inds = np.arange( ordered_inds.size )

In [None]:
ions = [ 'H I', 'Si II', 'Si III', 'Si IV', 'N II', 'N III', 'N V', 'C II', 'C III', 'O I', 'O VI', ]

In [None]:
n_rows = 3
n_cols = 4
fig = plt.figure( figsize=(n_cols*5,n_rows*4), facecolor = 'w' )
ax = plt.gca()

gs = matplotlib.gridspec.GridSpec( n_rows, n_cols )

gs.update( hspace=0.001, wspace=0.001 )

i = 0
j = 0
for k, ion in enumerate( ions ):
    
    ax_ij = fig.add_subplot( gs[j,i] )
    
    def convert_to_linear( ylog_, yerr_log_ ):
        y_ = 10.**ylog_
        yerr_ = 10.**np.array([ ylog_ - yerr_log_, ylog_ + yerr_log_ ])
        yerr_[0] = y_ - yerr_[0]
        yerr_[1] = yerr_[1] - y_
        return y_, yerr_
    
    ylog = modeled[ion.replace( ' ', '' )][inds+1] - provided[ion]['logN'][inds]
    yerr_log = provided[ion]['elogN'][inds]
    y, yerr = convert_to_linear( ylog, yerr_log )
    ax_ij.scatter(
        np.arange( inds.size ),
        y,
        color = blinded_color,
        s = 200,
        label = 'original',
        zorder = -1,
    )
    
    ax_ij.errorbar(
        np.arange( inds.size ),
        y,
        yerr = yerr,
        linestyle = 'none',
        color = blinded_color,
        zorder = -5,
        linewidth = 4,
    )
    
    ylog = revised[ion.replace( ' ', '' )][inds+1] - provided[ion]['logN'][inds]
    yerr_log = provided[ion]['elogN'][inds]
    y, yerr = convert_to_linear( ylog, yerr_log )
    ax_ij.scatter(
        np.arange( inds.size ),
        y,
        s = 200,
        color = revised_color,
        zorder = 2,
        label = 'revised',
    )
    ax_ij.errorbar(
        np.arange( inds.size ),
        y,
        yerr = yerr,
        linestyle = 'none',
        color = revised_color,
        zorder = 1,
        linewidth = 4,
    )
    
#     ax_ij.scatter(
#         inds + 1,
#         np.log10( actual[ion][inds] ),
#         color = 'k',
#     )

    ax_ij.axhline(
        1,
        color = '0.5',
        linestyle = '--',
        linewidth = 3,
        zorder = -10,
    )
    
    ax_ij.annotate(
        text = ion,
        xy = (0, 1),
        xycoords = 'axes fraction',
        xytext = ( 5, -5 ),
        textcoords = 'offset points',
        va = 'top',
        ha = 'left',
        fontsize = 22,
    )
    
    ax_ij.set_yscale( 'log' )
    ax_ij.set_ylim( 10.**np.array([-1.5, 2.5]) )
    
    # Adjust ticks
    plt.xticks( np.arange( inds.size ), inds + 1 )
    ax_ij.tick_params( which='major', labelsize=15, size=10, width=2 )
    if i != 0:
        ax_ij.tick_params( left=False, labelleft=False )
    
    # Move to next axis in line
    if i > n_rows - 1:
        j += 1
        i = 0
    else:
        i += 1
        
        
handles = [
    matplotlib.lines.Line2D([0], [0], marker='o', color=blinded_color, label='original', markersize=15),
    matplotlib.lines.Line2D([0], [0], marker='o', color=revised_color, label='revised', markersize=15),
]

ax.legend(
    handles = handles,
    loc = 'lower right',
    prop = { 'size': 22 },
)

# Removing spines and tick marks
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.tick_params( bottom=False, left=False, labelleft=False, labelbottom=False )

ax.set_xlabel( 'Sightline ID', fontsize=26, labelpad=30 )
ax.set_ylabel( r'$N_{\rm modeled}$ / $N_{\rm actual}$', fontsize=26, labelpad=40 )

savefile = os.path.join( figure_dir, 'column_den.pdf' )
print( 'Saving at {}'.format( savefile ) )
plt.savefig( savefile, bbox_inches='tight' )

### Vs Average Agreement

In [None]:
# Calculate weighted error
avg_errors = []
avg_actual_errors = []
for obs_data in [ modeled, revised ]:
    sum_error = np.zeros( obs_data.shape[0] )
    sum_actual_error = np.zeros( obs_data.shape[0] )
    sum_weights = np.zeros( obs_data.shape[0] )
    for ion in provided.keys():
        modeled_values = obs_data[ion.replace( ' ', '' )].values
        error = provided[ion]['logN'] - modeled_values
        actual_error = np.log10( actual[ion] ) - modeled_values
        weight = 1. / provided[ion]['elogN']

        # Deal with infinite values
        invalid = np.invert(np.isfinite( provided[ion]['logN'] ) )
        error[invalid] = 0.
        actual_error[invalid] = 0.
        weight[invalid] = 0.

        sum_error += np.abs( error ) * weight 
        sum_actual_error += np.abs( actual_error ) * weight
        sum_weights += weight
        
    avg_errors.append( sum_error / sum_weights )
    avg_actual_errors.append( sum_actual_error / sum_weights )
avg_error_modeled, avg_error_revised = avg_errors
avg_actual_error_modeled, avg_actual_error_revised = avg_actual_errors

In [None]:
# Setup Figure
n_rows_clean = 2
n_cols_clean = 2
aspect_ratio = 1.5
fig = plt.figure( figsize=(n_cols_clean*panel_length*aspect_ratio, n_rows_clean*panel_length), facecolor='w' )
ax_dict = fig.subplot_mosaic(
    clean_mosaic,
#     gridspec_kw = { 'hspace': 0.5 },
)

for key in prop_keys:
        
    ax = ax_dict[key]
    
    ratio = np.abs( np.log10( found[key] / actual[key] ) )
    ax.scatter(
        avg_actual_error_modeled,
        ratio,
        color = blinded_color,
    )
    
    if key in found_revised:
        ratio_revised = np.abs( np.log10( found_revised[key] / actual[key] ) )
        ax.scatter(
            avg_actual_error_revised,
            ratio_revised,
            color = revised_color,
        )
        
    ax.set_xlim( 0, avg_actual_error_modeled.max() * 1.05 )
    ax.set_ylim( 0, ratio.max() * 1.05 )
    
    if ax.get_subplotspec().is_last_row():
        ax.set_xlabel( r'mean $\vert \log_{10} ( N_{X,\,{\rm modeled}}$ / $N_{X,\,{\rm actual}} ) \vert$' )
    ax.set_ylabel( r'$\vert \log_{10}($ ' + y_labels[key] + r' $) \vert$' )
    
#     ax.set_yscale( 'log' )

savefile = os.path.join( figure_dir, 'error_vs_error.pdf' )
print( 'Saving at {}'.format( savefile ) )
plt.savefig( savefile, bbox_inches='tight' )

## Noising of Data

In [None]:
ions = list( provided.keys() )

In [None]:

for ion in ions:
    fig = plt.figure( figsize=(8,8), facecolor='w' )
    ax = plt.gca()

    xs = np.log10( actual[ion] )
    ys = provided[ion]['logN']
    eys = provided[ion]['elogN']

    ax.errorbar(
        xs,
        ys,
        yerr = eys,
        marker = 'o',
        markersize = 10,
        color = 'k',
        linestyle = 'none',
    )

    bounds = [
        min( np.nanmin( xs[np.isfinite(xs)] ), np.nanmin( ys[np.isfinite(ys)] ) ) - 0.5,
        max( np.nanmax( xs[np.isfinite(xs)] ), np.nanmax( ys[np.isfinite(ys)] ) ) + 0.5,
    ]
    ax.plot(
        bounds,
        bounds,
        color = '0.5',
        linestyle = '--',
        linewidth = 3,
        zorder = -10,
    )
    
    ax.annotate(
        text = ion,
        xy = ( 0, 1 ),
        xycoords = 'axes fraction',
        xytext = ( 5, -5 ),
        textcoords = 'offset points',
        fontsize = 22,
        ha = 'left',
        va = 'top',
    )
    
    ax.tick_params( length=10, width=1.5, labelsize=18 )

    ax.set_xlabel( r'$\log_{10}( N_{\rm ion,\,actual} )$', fontsize=22, )
    ax.set_ylabel( r'$\log_{10}( N_{\rm ion,\,provided} )$', fontsize=22, )

    ax.set_xlim( bounds )
    ax.set_ylim( bounds )

    ax.set_aspect( 'equal' )

# Compile Summary Data

In [None]:
summary = verdict.Dict()

In [None]:
# Averages
for prop_key in prop_keys:
    
    summary_keys_path = [
        [ 'estimated', 'blinded', 'mle', prop_key ],
        [ 'estimated', 'revised', 'mle', prop_key ],
        [ 'source', prop_key ],
    ]
    for j, data in enumerate([ found, found_revised, actual ]):
            
        try:
            values = data[prop_key]
        except KeyError:
            continue
    
        if logscale[prop_key]:
            values = np.log10( values )

        summary.setitem_via_list( summary_keys_path[j], values )

In [None]:
# Errors
found_err = verdict.Dict({
    'Z': modeled['emetallicity'],
})

found_revised_err = verdict.Dict({
    'Z': revised['errZ'],
    'T': revised['errT'],
})

In [None]:
for prop_key in prop_keys:
    
    summary_keys_path = [
        [ 'estimated', 'blinded', 'error', 'original provided', prop_key ],
        [ 'estimated', 'revised', 'error', 'original provided', prop_key ],
    ]
    for j, data in enumerate([ found_err, found_revised_err ]):
            
        try:
            values = data[prop_key].values
        except KeyError:
            continue

        summary.setitem_via_list( summary_keys_path[j], values )

In [None]:
for prop_key in prop_keys:
    
    try:
        summary.setitem_via_list( [ 'estimated', 'revised', 'error', '1 sigma low', prop_key ], revised['low'+prop_key].values,  )
        summary.setitem_via_list( [ 'estimated', 'revised', 'error', '1 sigma high', prop_key ], revised['high'+prop_key].values,  )
    except KeyError:
        continue

In [None]:
total_summary = verdict.Dict.from_hdf5( summary_data_fp, create_nonexistent=True )
total_summary['sample0'] = summary
total_summary.to_hdf5( summary_data_fp, )