# Weak-lensing galaxy shape catalogue validation

## Global metacalibration

Contents.
- Metacalibration (global)
- Additive bias, ellipticity, magnitude distributions

> **_NOTE:_** Before running this notebook, set kernel to `main_set.ipynb'

In [None]:
import os
from uncertainties import ufloat

from lenspack.geometry.projections.gnom import radec2xy

from cs_util.plots import plot_histograms

In [None]:
from sp_validation.survey import *
from sp_validation.basic import *
from sp_validation.util import *
from sp_validation.basic import *
from sp_validation.plots import *
from sp_validation.plot_style import *
from sp_validation.calibration import *
from sp_validation import cat as spv_cat

## metacalibration for galaxies

In [None]:
gal_metacal = {}

for sh in shapes:
    print(f'{sh}:')
    gal_metacal[sh] = metacal(
        dd,
        m_gal[sh],
        prefix=sh.upper(),
        snr_min=gal_snr_min,
        snr_max=gal_snr_max,
        rel_size_min=gal_rel_size_min,
        rel_size_max=gal_rel_size_max,
        size_corr_ell=gal_size_corr_ell,
        sigma_eps=sigma_eps_prior,
        verbose=verbose
    )

    print_stats(
        f"Number of objects on metal input = {gal_metacal[sh]._n_input}",
        stats_file,
        verbose=verbose,
    )
    print_stats(
        f"Number of objects after galaxy selection masking = {gal_metacal[sh]._n_after_gal_mask}",
        stats_file,
        verbose=verbose,
    )


#### Extract quantities after calibration and cuts (in metacal)

In [None]:
g_corr = {}
g_uncorr = {}
w = {}
mask = {}
ra = {}
ra_wrap = {}
dec = {}
ra_mean = {}
dec_mean = {}
mag = {}
snr = {}
tile_ID = {}

for sh in shapes:
    
    g_corr[sh], g_uncorr[sh], w[sh], mask[sh] = (
        get_calibrated_quantities(gal_metacal[sh])
    )

    n_before_cuts = len(gal_metacal[sh].ns['g1'])
    n_after_cuts = len(w[sh])
    print_stats(
        f"Number of objects before cuts = {n_before_cuts}",
        stats_file,
        verbose=verbose
    )
    print_stats(
        f"Number of objects after cuts = {n_after_cuts}",
        stats_file,
        verbose=verbose
    )
    
    # coordinates
    #ra[sh] = dd['XWIN_WORLD'][m_gal[sh]][mask[sh]]
    ra[sh] = spv_cat.get_col(dd, col_name_ra, m_gal[sh], mask[sh]) 
    
    # Modify R.A. for plots if R.A. = 0 in area
    if wrap_ra != 0:
        ra_wrap[sh] = (ra[sh] + wrap_ra) % 360 - wrap_ra + 360
    else:
        ra_wrap[sh] = ra[sh]
    dec[sh] = spv_cat.get_col(dd, col_name_dec, m_gal[sh], mask[sh])

    ra_mean[sh] = np.mean(ra_wrap[sh])
    dec_mean[sh] = np.mean(dec[sh])
    
    print_stats(
        f'{sh}: Mean coordinates (ra, dec) ='
        + f' ({ra_mean[sh]:.3f}, {dec_mean[sh]:.3f}) deg,'
        + f' wrap_ra={wrap_ra} deg',
        stats_file,
        verbose=verbose
    )

    # magnitude, from SExtractor
    mag[sh] = spv_cat.get_col(dd, "MAG_AUTO", m_gal[sh], mask[sh])
    
    # Keep tile ID if external mask
    if mask_external_path:
        tile_ID[sh] = spv_cat.get_col(dd, "TILE_ID", m_gal[sh], mask[sh])

    snr[sh] = spv_cat.get_snr(sh, dd, m_gal[sh], mask[sh])

In [None]:
# Add additional columns with metacal mask

add_cols_data = {}

for sh in shapes:
    
    if add_cols:
        add_cols_data[sh] = {}
        for key in add_cols:
            add_cols_data[sh][key] = dd[key][m_gal[sh]][mask[sh]]
    else:
        add_cols_data[sh] = None                               

In [None]:
#### Compute coordinates for projection and spatial binning (needed later)

In [None]:
x = {}
y = {}

for sh in shapes:
    # Project all objects from spherical to Cartesian coordinates
    x[sh], y[sh] =  radec2xy(ra_mean[sh], dec_mean[sh], ra_wrap[sh], dec[sh])

#### Compute field size

In [None]:
min_x = {}
max_x = {}
min_y = {}
max_y = {}
size_x_deg = {}
size_y_deg = {}

for sh in shapes:
    # Define mix, max and size
    min_x[sh] = np.min(x[sh])
    max_x[sh] = np.max(x[sh])
    min_y[sh] = np.min(y[sh])
    max_y[sh] = np.max(y[sh])

    size_x_deg[sh] = np.rad2deg(max_x[sh] - min_x[sh])
    size_y_deg[sh] = np.rad2deg(max_y[sh] - min_y[sh])

    print_stats(
        f'{sh}: Field size in projected coordinates is (x, y) '
        + f'= ({size_x_deg[sh]:.2f}, {size_y_deg[sh]:.2f}) deg',
        stats_file,
        verbose=verbose
    )

In [None]:
# Get coordinates for all objects
ra["all"] = spv_cat.get_col(dd, col_name_ra)
if wrap_ra:
    ra_wrap['all'] = (ra['all'] + wrap_ra) % 360 - wrap_ra + 360
else:
    ra_wrap['all'] = ra['all']
dec["all"] = spv_cat.get_col(dd, col_name_dec)

shapes_all = []
for sh in shapes:
    shapes_all.append(sh)
shapes_all.append('all')

In [None]:
# Number density
n_gal = {}
n_gal_sm = {}

for sh in shapes:
    n_gal[sh] = len(w[sh])
    n_gal_sm[sh] = len(np.where(m_gal[sh])[0])

    print_stats(sh, stats_file, verbose=verbose)
    print_stats(
        f'Number of galaxies after metacal = {n_gal[sh]}/{n_gal_sm[sh]} '
        + f'= {n_gal[sh] / n_gal_sm[sh] * 100:.1f}%',
        stats_file,
        verbose=verbose
    )
    print_stats(
        f'Galaxy density (ignoring masks+overlaps)= {n_gal[sh] / area_amin2:.2f} gal/arcmin2',
        stats_file,
        verbose=verbose
    )

In [None]:
# Write number of galaxies for each tile to file
for sh in shapes:
    fname = f'{output_dir}/tile_id_gal_counts_{sh}.txt'
    detection_IDs = dd['TILE_ID']
    galaxy_IDs = detection_IDs[m_gal[sh]]
    shape_IDs = galaxy_IDs[mask[sh]]
    write_tile_id_gal_counts(detection_IDs, galaxy_IDs, shape_IDs, fname) 

In [None]:
# Add all weights (for combining weighted averages of subpatches)

w_tot = {}

for sh in shapes:

    w_tot[sh] = np.sum(w[sh])
    
    print_stats(sh, stats_file, verbose=verbose)
    print_stats(f'Sum of weights = {w_tot[sh]:.1f}', stats_file, verbose=verbose)

In [None]:
# Effective sample size ESS = 1/sum(w_n^2)
# The inverse sum over squared normalised weights
# Range [1; N]

for sh in shapes:
    
    # normalised weights
    wn = w[sh] / w_tot[sh]
    s = np.sum(wn**2)
    ess = 1/s
    
    print_stats(sh, stats_file, verbose=verbose)
    print_stats(f'effective sample size, ESS/N = {ess:.1f}/{n_gal[sh]} = {ess/n_gal[sh]:.3g}',
                stats_file, verbose=verbose)

#### Plot spatial distribution of objects

In [None]:
x_label = 'R.A. [deg]'
y_label = 'DEC [deg]'
cbar_label_base = 'Density [$A_{\\rm pix}^{-1}$]'

In [None]:
# Galaxies
Apix = 1 # [arcmin^2]
cbar_label = '{}, $A_{{\\rm pix}} \\approx {:.1g}$ arcmin$^2$'.format(cbar_label_base, Apix)
n_grid = int(np.sqrt(area_amin2) / Apix)
if verbose:
    print('Number of pixels = {}^2'.format(n_grid))

for sh in shapes_all:
    title = f'Galaxies ({sh})'
    out_path = f'{plot_dir}/galaxy_number_count_{sh}'
    plot_spatial_density(
        ra_wrap[sh],
        dec[sh],
        title,
        x_label,
        y_label,
        cbar_label,
        out_path,
        n_grid=n_grid,
        verbose=verbose
    )

# All objects without overlap, useful for position-only
# setting (no shapes)
sh = 'all'
title = f'Galaxies ({sh}, no overlap)'
out_path = f'{plot_dir}/galaxy_number_count_{sh}_nooverlap'
plot_spatial_density(
    ra_wrap[sh][cut_overlap],
    dec[sh][cut_overlap],
    title,
    x_label,
    y_label,
    cbar_label,
    out_path,
    n_grid=n_grid,
    verbose=verbose
)   

#### Plot galaxy signal-to-noise distribution

In [None]:
x_label = 'SNR'
y_label = 'Frequency'
density = True
x_range = (0, 200)
n_bin = 500
x_cut = gal_snr_min

for sh in shapes:
    print_stats(sh, stats_file, verbose=verbose)

    labels = []
    if sh == 'ngmix':
    # Do not apply `mask_ns`, so use all galaxies
        xs = [
            dd['NGMIX_FLUX_NOSHEAR'][m_gal[sh]] / dd['NGMIX_FLUX_ERR_NOSHEAR'][m_gal[sh]],
            dd['SNR_WIN'][m_gal[sh]]
        ]
        labels.append([f'{sh} $F/\\sigma(F)$'])

    elif sh == 'galsim':
        xs = [
            dd['SNR_WIN'][m_gal[sh]]
        ]
  
    labels.append(f'{sh} SExtractor SNR')

    title = f'Galaxies ({sh})'

    out_name = f'hist_SNR_{sh}.pdf'
    out_path = os.path.join(plot_dir, out_name)

    plot_histograms(
        xs,
        labels,
        title,
        x_label,
        y_label,
        x_range,
        n_bin,
        out_path,
        vline_x=[x_cut],
        vline_lab=[f'SNR = {x_cut}']
    )

### Plot galaxy relative size distribution

In [None]:
x_label = r'$R^2_{\rm gal} / R^2_{\rm PSF}$'
y_label = 'Frequency'
density = True
x_range = (0, 1.5)
n_bin = 500
x_cut = gal_rel_size_min

for sh in shapes:
    print_stats(sh, stats_file, verbose=verbose)

    labels = []
    if sh == 'ngmix':
    # Do not apply `mask_ns`, so use all galaxies
        xs = [
            dd['NGMIX_T_NOSHEAR'][m_gal[sh]] / dd['NGMIX_Tpsf_NOSHEAR'][m_gal[sh]]
        ]
        labels.append(f'size ratio')

    elif sh == 'galsim':
        xs = [
            dd['GALSIM_GAL_SIGMA_NOSHEAR'][m_gal[sh]] / dd['GALSIM_PSF_SIGMA_NOSHEAR'][m_gal[sh]]
        ]
  
    title = f'Galaxies ({sh})'

    out_name = f'hist_rel_size_{sh}.pdf'
    out_path = os.path.join(plot_dir, out_name)

    plot_histograms(
        xs,
        labels,
        title,
        x_label,
        y_label,
        x_range,
        n_bin,
        out_path,
        vline_x=[x_cut],
        vline_lab=[f'rel_size = {x_cut}']
    )

## Metacalibration for stars

In [None]:
star_metacal = {}
for sh in shapes:
    star_metacal[sh] = metacal(
        dd[ind_star],
        m_star[sh],
        masking_type='star',
        verbose=verbose
    )

#### Number density

In [None]:
# mask for 'no shear' images
mask_ns_stars = {}
n_star = {}

for sh in shapes:
    mask_ns_stars[sh] = star_metacal[sh].mask_dict['ns']
    n_star[sh] = len(star_metacal[sh].ns['g1'][mask_ns_stars[sh]])

    print_stats(f'{sh}:', stats_file, verbose=verbose)
    print_stats(f'Number of stars = {n_star[sh]}', stats_file, verbose=verbose)
    print_stats('Star density = {:.2f} stars/deg2'.format(n_star[sh] / area_deg2), stats_file, verbose=verbose)

## Additive bias
Use raw, uncorrected ellipticities.

In [None]:
print_stats('additive bias', stats_file, verbose=verbose)

In [None]:
# Compute mean, weighted mean, and (Poisson) error

c = {}
c_err = {}
cw = {}
cw_err = {}

for sh in shapes:
    print_stats(f'{sh}:', stats_file, verbose=verbose)
    
    c[sh] = np.zeros(2)
    c_err[sh] = np.zeros(2)
    cw[sh] = np.zeros(2)
    cw_err[sh] = np.zeros(2)

    for comp in (0, 1):
        c[sh][comp] = np.average(g_uncorr[sh][comp])
        c_err[sh][comp] = np.std(g_uncorr[sh][comp])
        
        cw[sh][comp] = np.average(g_uncorr[sh][comp], weights=w[sh])
        variance = np.average((g_uncorr[sh][comp] - cw[sh][comp])**2, weights=w[sh])
        cw_err[sh][comp] = np.sqrt(variance)

    for comp in (0, 1):
        print_stats(f'c_{comp+1} = {c[sh][comp]:.3g}', stats_file, verbose=verbose)        
        print_stats(f'cw_{comp+1} = {cw[sh][comp]:.3g}', stats_file, verbose=verbose)
    for comp in (0, 1):
        print_stats(f'dc_{comp+1} = {c_err[sh][comp]:.3g}', stats_file, verbose=verbose)        
        print_stats(f'dcw_{comp+1} = {cw_err[sh][comp]:.3g}', stats_file, verbose=verbose)

    # Error of mean: divide by sqrt(N) (TBC whether this is correct)
    for comp in (0, 1):
            print_stats(f'dmc_{comp+1} = {c_err[sh][comp]/np.sqrt(n_gal[sh]):.3e}', stats_file, verbose=verbose)
            print_stats(
                f'dmcw_{comp+1} = {cw_err[sh][comp]/np.sqrt(n_gal[sh]):.3e}',
                stats_file,
                verbose=verbose
            )

In [None]:
# Compute jackknife mean and errors

remove_size = 0.05

cjk = {}
cjk_err = {}
    
for sh in shapes:
    print_stats(f'{sh}:', stats_file, verbose=verbose)
    
    cjk[sh] = np.zeros(2) * -1
    cjk_err[sh] = np.zeros(2) * -1

    for comp in (0, 1):
        if n_jack > 0:
            cjk[sh][comp], cjk_err[sh][comp] = jackknif_weighted_average(
                g_uncorr[sh][comp],
                w[sh],
                remove_size=remove_size,
                n_realization=n_jack
            )           
        cjk_dc = ufloat(cjk[sh][comp], cjk_err[sh][comp])
        print_stats(f'cjk_{comp+1} = {cjk_dc:.3eP}', stats_file, verbose=verbose)

## Get quantities calibrated for both multiplicative and additive bias

In [None]:
g_corr_mc = {}
c_corr = {}

for sh in shapes:
    g_corr_mc[sh] = np.zeros_like(g_corr[sh])
    c_corr[sh] = np.linalg.inv(gal_metacal[sh].R).dot(c[sh])
    for comp in (0, 1):
        g_corr_mc[sh][comp] = g_corr[sh][comp] - c_corr[sh][comp]

## Response matrix

### Mean

In [None]:
R_shear = {}

for sh in shapes:
    print_stats(f'{sh} galaxies:', stats_file, verbose=verbose)

    print_stats('total response matrix:', stats_file, verbose=verbose)
    rs = np.array2string(gal_metacal[sh].R)
    print_stats(rs, stats_file, verbose=verbose)

    print_stats('shear response matrix:', stats_file, verbose=verbose)
    R_shear[sh] = np.mean(gal_metacal[sh].R_shear, 2)
    rs = np.array2string(R_shear[sh])
    print_stats(rs, stats_file, verbose=verbose)

    print_stats('selection response matrix:', stats_file, verbose=verbose)
    rs = np.array2string(gal_metacal[sh].R_selection)
    print_stats(rs, stats_file, verbose=verbose)

In [None]:
R_shear_stars = {}

for sh in shapes:
    print_stats(f'{sh} stars:', stats_file, verbose=verbose)

    print_stats('total response matrix:', stats_file, verbose=verbose)
    rs = np.array2string(star_metacal[sh].R)
    print_stats(rs, stats_file, verbose=verbose)

    print_stats('shear response matrix:', stats_file, verbose=verbose)
    R_shear_stars[sh] = np.mean(star_metacal[sh].R_shear, 2)
    rs = np.array2string(R_shear_stars[sh])
    print_stats(rs, stats_file, verbose=verbose)

    print_stats('selection response matrix:', stats_file, verbose=verbose)
    rs = np.array2string(star_metacal[sh].R_selection)
    print_stats(rs, stats_file, verbose=verbose)

### Plot distribution of response matrix elements

In [None]:
x_label = 'response matrix element'
y_label = 'Frequency'
x_range = (-3, 3)
n_bin = 500

In [None]:
colors = ['blue', 'red','blue', 'red']
linestyles = ['-', '-', ':', ':']

In [None]:
labels = [
    '$R_{11}$ galaxies',
    '$R_{22}$ galaxies',
    '$R_{11}$ stars',
    '$R_{22}$ stars'
]

for sh in shapes:

    xs = [
        gal_metacal[sh].R_shear[0,0],
        gal_metacal[sh].R_shear[1,1],
        star_metacal[sh].R_shear[0,0],
        star_metacal[sh].R_shear[1,1]
    ]
    title = sh
    
    out_name = f'R_{sh}_diag.pdf'
    out_path = os.path.join(plot_dir, out_name)
    
    plot_histograms(
        xs,
        labels,
        title,
        x_label,
        y_label,
        x_range,
        n_bin,
        out_path,
        colors=colors,
        linestyles=linestyles
    )

In [None]:
labels = [
    '$R_{12}$ galaxies',
    '$R_{21}$ galaxies',
    '$R_{12}$ stars',
    '$R_{21}$ stars'
]

for sh in shapes:

    xs = [gal_metacal[sh].R_shear[0,1],
          gal_metacal[sh].R_shear[1,0],
          star_metacal[sh].R_shear[0,1],
          star_metacal[sh].R_shear[1,0]
         ]
    title = sh
    out_name = f'R_{sh}_offdiag.pdf'
    out_path = os.path.join(plot_dir, out_name)

    plot_histograms(
        xs,
        labels,
        title,
        x_label,
        y_label,
        x_range, 
        n_bin,
        out_path,
        colors=colors,
        linestyles=linestyles
    )

## Ellipticities

In [None]:
x_label = 'ellipticity'
y_label = 'Frequency'
x_range = (-1, 1)
n_bin = 500

labels = ['$e_1$', '$e_2$']
colors = ['blue', 'red']
linestyles = ['-', '-'] 

In [None]:
for sh in shapes:

    xs = [g_corr[sh][0], g_corr[sh][1]]
    weights = [w[sh]] * 2

    title = f'{sh} galaxies'
    out_name = f'ell_gal_{sh}.pdf'
    out_path = os.path.join(plot_dir, out_name)

    plot_histograms(
        xs, 
        labels, 
        title, 
        x_label, 
        y_label, 
        x_range, 
        n_bin,
        out_path,
        weights=weights, 
        colors=colors, 
        linestyles=linestyles
    )

In [None]:
for sh in shapes:

    xs = [star_metacal[sh].ns['g1'][mask_ns_stars[sh]], star_metacal[sh].ns['g2'][mask_ns_stars[sh]]]
    weights = [star_metacal[sh].ns['w'][mask_ns_stars[sh]]] * 2

    title = f'{sh} stars'
    out_name = f'ell_stars_{sh}.pdf'
    out_path = os.path.join(plot_dir, out_name)

    plot_histograms(
        xs, 
        labels, 
        title, 
        x_label, 
        y_label, 
        x_range, 
        n_bin, 
        out_path,
        weights=weights, 
        colors=colors, 
        linestyles=linestyles
    )

In [None]:
x_range = (-0.15, 0.15)
n_bin = 250

In [None]:
for sh in shapes:

    key = key_PSF_ell[sh]
    xs = [
        dd[key][:,0][mask_ns_stars[sh]],
        dd[key][:,1][mask_ns_stars[sh]]
    ]
    title = f'{sh} PSF'
    out_name = f'ell_PSF_{sh}.pdf'
    out_path = os.path.join(plot_dir, out_name)

    plot_histograms(
        xs, 
        labels, 
        title, 
        x_label, 
        y_label, 
        x_range, 
        n_bin, 
        out_path,      
        colors=colors, 
        linestyles=linestyles
    )

## Magnitudes

In [None]:
x_label = '$r$-band magnitude'
y_label = 'Frequency'
x_range = (gal_mag_bright + 1, gal_mag_faint - 1)
n_bin = 500

colors = ['blue', 'red']
linestyles = ['-', '-']

title = 'galaxies'
out_name = 'mag_gal.pdf'
out_path = os.path.join(plot_dir, out_name)

In [None]:
labels = []
xs = []

for sh in shapes:
    labels.append(sh)
    xs.append(dd['MAG_AUTO'][m_gal[sh]][mask[sh]])

if len(xs) > 0:
    plot_histograms(
        xs, 
        labels, 
        title, 
        x_label, 
        y_label, 
        x_range, 
        n_bin, 
        out_path,             
        colors=colors, 
        linestyles=linestyles
    )

## Ellipticity dispersion

In [None]:
for sh in shapes:
    print_stats(f'{sh}', stats_file, verbose=verbose)

    sig_eps = np.sqrt(np.var(g_corr[sh][0]) + np.var(g_corr[sh][1]))
    print_stats('Dispersion of complex ellipticity = {:.3f}' \
                ''.format(sig_eps), stats_file, verbose=verbose)
    print_stats('Dispersion of (average) single-component ellipticity = {:.3f} = {:.3f} / sqrt(2)' \
                ''.format(sig_eps /  np.sqrt(2), sig_eps), stats_file, verbose=verbose)