In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Validating kappa signal in the graphs

__Author:__ Ji Won Park (@jiwoncpark)

__Created:__ 4/22/2021

__Last run:__ 4/22/2021

__Goals:__
We visualize the correlation between graph features and the output kappa.

__Before_running:__
Generate the labels with explicit kappa sampling (moving the constituent halos around in each sightline field of view (FOV) multiple times and evaluating kappa at the center), e.g.
```python
kappa_sampler = CosmoDC2Raytracer(in_dir=IN_DIR,
                                  out_dir='../kappa_sampling',
                                  fov=0.85,
                                  healpix=10450,
                                  n_sightlines=1000,  # keep this small
                                  mass_cut=11.0,
                                  n_kappa_samples=1000)
kappa_sampler.parallel_raytrace()
kappa_sampler.apply_calibration()
```

In [None]:
from n2j.trainval_data.raytracers.cosmodc2_raytracer import CosmoDC2Raytracer

IN_DIR = '../n2j/data'  # where raw data lies
TRAIN_HP = [10327]
N_TRAIN = 1000
BATCH_SIZE = min(N_TRAIN//5, 25)
    
# Use this to infer the mean kappa contribution of new sightlines
for hp in TRAIN_HP:
    train_Y_generator = CosmoDC2Raytracer(in_dir=IN_DIR,
                                          out_dir=f'../demo_Y_{hp}',
                                          kappa_sampling_dir='../kappa_sampling',
                                          fov=0.85,
                                          healpix=hp,
                                          n_sightlines=N_TRAIN,
                                          mass_cut=11.0,
                                          n_kappa_samples=0)  # no sampling
    train_Y_generator.parallel_raytrace()
    train_Y_generator.apply_calibration()

Now we build graphs for these 100 sightlines we just computed labels for.

In [None]:
from n2j.trainval_data.graphs.cosmodc2_graph import CosmoDC2Graph
# Features to compile
features = ['ra', 'dec', 'galaxy_id', 'redshift']
features += ['ra_true', 'dec_true', 'redshift_true']
features += ['ellipticity_1_true', 'ellipticity_2_true']
features += ['bulge_to_total_ratio_i', 'ellipticity_1_bulge_true', 'ellipticity_1_disk_true',
             'ellipticity_2_bulge_true', 'ellipticity_2_disk_true', ]
features += ['shear1', 'shear2', 'convergence']
features += ['size_bulge_true', 'size_disk_true', 'size_true']
features += ['mag_{:s}_lsst'.format(b) for b in 'ugrizY']

train_XY = CosmoDC2Graph(in_dir=IN_DIR, 
                         healpixes=TRAIN_HP, 
                         raytracing_out_dirs=[f'../demo_Y_{hp}' for hp in TRAIN_HP], 
                         aperture_size=1.0,
                         n_data=[100], 
                         features=features, 
                         stop_mean_std_early=False)

In [None]:
train_XY[0].x.shape, train_XY[1].x[:, -3].shape, train_XY[1].y.shape, train_XY.cumulative_sizes

In [None]:
n_data = sum(train_XY.cumulative_sizes)
n_nodes = np.empty(n_data)
dist_weighted_sum = np.empty(n_data)
dist_3d_weighted_sum = np.empty(n_data)
sum_flux = np.empty(n_data)
dist_weighted_sum_flux = np.empty(n_data)
dist_3d_weighted_sum_flux = np.empty(n_data)
kappa = np.empty(n_data)
for i in range(n_data):
    x = train_XY[i].x.numpy()[1:, :]
    y = train_XY[i].y.numpy()
    n_nodes[i] = x.shape[0]
    flux_nodes = 10**(0.4*x[:, -3])  # -3 is index in features list
    ra_diff, dec_diff = x[:, 4]*60.0, x[:, 5]*60.0  # amin
    dist_nodes = (ra_diff**2.0 + dec_diff**2.0)**0.5  # 4, 5 are ra_true, dec_true 
    dist_3d_nodes = (ra_diff**2.0 + dec_diff**2.0 + x[:, 6]**2.0)**0.5  # 6 is z_true
    sum_flux[i] = np.sum(flux_nodes)
    dist_weighted_sum[i] = np.sum(1/dist_nodes)
    dist_3d_weighted_sum[i] = np.sum(dist_3d_nodes)
    dist_weighted_sum_flux[i] = np.sum(flux_nodes/dist_nodes)
    dist_weighted_sum_flux[i] = np.sum(flux_nodes/dist_3d_nodes)
    kappa[i] = y[0, 0]

In [None]:
plt.scatter(n_nodes, kappa)
plt.xlabel('Number of nodes')
plt.ylabel('Kappa')

In [None]:
plt.scatter(dist_weighted_sum_flux, kappa)
plt.xlabel('Sum of fluxes per sightline')
plt.ylabel('Kappa')

In [None]:
plt.scatter(dist_weighted_sum_flux, kappa)
plt.xlabel('Dist-weighted sum of fluxes per sightline')
plt.ylabel('Kappa')

In [None]:
plt.scatter(dist_weighted_sum, kappa)
plt.xlabel('2D Dist-weighted number count per sightline')
plt.ylabel('Kappa')

In [None]:
plt.scatter(dist_3d_weighted_sum, kappa)
plt.xlabel('3D ist-weighted number count per sightline')
plt.ylabel('Kappa')

In [None]:
plt.scatter(dist_3d_weighted_sum_flux, kappa)
plt.xlabel('3D Dist-weighted number count per sightline')
plt.ylabel('Kappa')