In [2]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import astropy.cosmology
import scipy.stats
import time
from useful_funcs import PAUSflux_to_ABmag
import h5py
import matplotlib.colors
import os
from SAVEFIG import main as custom_SAVEFIG

latex_textwidth = 455.24#pt

def fixed_aspect_ratio(ratio):
    '''
    Set a fixed aspect ratio on matplotlib plots 
    regardless of axis units
    '''
    xvals,yvals = plt.gca().axes.get_xlim(),plt.gca().axes.get_ylim()

    xrange = xvals[1]-xvals[0]
    yrange = yvals[1]-yvals[0]
    plt.gca().set_aspect(ratio*(xrange/yrange), adjustable='box')

#matplotlib.rcParams['agg.path.chunksize'] = 10000

In [4]:
from astropy.cosmology import Planck15 as cosmo

cosmo = cosmo.clone(name='Planck15 H0 = 100', H0=100) #setting H0 = 100 km/s/MPc, h = 1


In [5]:
data = pd.read_pickle("../../Data/Object_zs_zp_181203_130401.pkl.gz")


lcCoreFilename = "Gonzalez13.PAU.MillGas.field1.core.0.hdf5"
lcPhotomFilename = "Gonzalez13.PAU.MillGas.field1.photometry.0.hdf5"

if os.environ['NAME'] == 'DESKTOP-RB5C8OC': 
    file_path = '/mnt/e/L4ProjectLocal/LightConeData/'
elif os.environ['NAME'] == 'LaNaranjaDos':
    file_path = '/mnt/c/Users/Andrew/L4ProjectLocal/Light Cone Data/'
else: raise Exception('Device not configured for hdf5 data load')

LC_DATA_CORE = h5py.File(file_path+lcCoreFilename, "r")
LC_DATA_PHOTOM = h5py.File(file_path+lcPhotomFilename, "r")

In [6]:
r1, g1, b1 = 255, 235, 247
r1, g1, b1 = r1/255, g1/255, b1/255

cdict1 = {'red':   ((0.0, r1, r1),
                   (1.0, 0.75, 0.75)),

         'green': ((0.0, g1, g1),
                   (1.0, 0.04, 0.04)),

         'blue':  ((0.0, b1, b1),
                   (1.0, 0.4, 0.4)),
          
         'alpha': ((0.0, 0.0, 0.0),
                   (1e-20, 0.5, 0.5),
                    (1.0, 0.5, 0.5))
        }

clear_purple = matplotlib.colors.LinearSegmentedColormap('ClearPurple', cdict1)
plt.register_cmap(cmap=clear_purple)

In [4]:
def ub68(x):
    return np.percentile(x,50+34.1)

def lb68(x):
    return np.percentile(x,50-34.1)

def normed_error(zannz,zpaus):
    return (zannz-zpaus)/(1+zpaus)

def compute_error_stats_spec(DFrame):
    z_annz = DFrame['F:ANNZ_best']
    z_paus = DFrame['F:zspec']
    z_annz_errors = DFrame['F:ANNZ_best_err']

    error = z_annz - z_paus
    normed_error = error / (1+z_paus)

    outliers_idx = abs(normed_error) > 0.15
    out_perc = outliers_idx.sum()/outliers_idx.size *100

    return [normed_error.mean(),np.std(normed_error),(ub68(normed_error)-lb68(normed_error))/2,out_perc],[z_annz.values,z_annz_errors.values,z_paus.values]


def compute_error_stats_paus(DFrame):
    z_annz = DFrame['F:ANNZ_best']
    z_paus = DFrame['F:Z']
    z_annz_errors = DFrame['F:ANNZ_best_err']

    error = z_annz - z_paus
    normed_error = error / (1+z_paus)

    outliers_idx = abs(normed_error) > 0.15
    out_perc = outliers_idx.sum()/outliers_idx.size *100

    return [normed_error.mean(),np.std(normed_error),(ub68(normed_error)-lb68(normed_error))/2,out_perc],[z_annz.values,z_annz_errors.values,z_paus.values]

# SPEC_advanced23 results

In [6]:
df = pd.read_csv("../../ML runs/SPEC_advanced23/ANNZ_randomReg_0000.csv",header=0)

df_cut = df.loc[abs(df['F:ANNZ_best']) < 2]

print("Pre-cut size = {}\n\nPost-cut size = {}".format(df.shape[0],df_cut.shape[0]))

stats, objs = compute_error_stats_spec(df_cut)

column_labels = [r"$\langle \delta \rangle $",r"$\sigma_\delta$",r"$\sigma_{68}$",r"$\xi$"]
results = pd.DataFrame(np.array(stats)[None,:],columns=column_labels)
results.style.format({column_labels[0]: "\${:.2E}\$ &", column_labels[1]: '\${:.3f}\$ &', column_labels[2]: '\${:.3f}\$ &', column_labels[3]: '\${:.2f}\%\$'})


Pre-cut size = 2232

Post-cut size = 2230


Unnamed: 0,$\langle \delta \rangle $,$\sigma_\delta$,$\sigma_{68}$,$\xi$
0,\$2.26E-03\$ &,\$0.081\$ &,\$0.040\$ &,\$3.77\%\$
