# Comparison of various properties between SDSS and OM10 objects

Authors: Jenny Kim (jennykim1016), Ji Won Park (jiwoncpark)

In this notebook, we draw cornerplots of various features, such as size and ellipticities, for both the lenses (OM10 objects) and non-lenses (SDSS objects). This will allow us to compare the distribution of the features and ensure that the lens vs. non-lens classification task is not too easy.

In [None]:
%%capture 
# ^ To hide a long warning passage from lenspop
from __future__ import print_function
#from astropy.table import Table, hstack
import numpy as np
import pandas as pd
#np.set_printoptions(threshold=np.nan)
import corner
import matplotlib.pyplot as plt
import os, sys
realizer_path = os.path.join(os.environ['SLREALIZERDIR'], 'slrealizer')
sys.path.insert(0, realizer_path)
#from realize_om10 import OM10Realizer
#import astropy.io.fits as pyfits
#import desc.slrealizer
#import warnings
#warnings.filterwarnings("ignore")
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
data_path = os.path.join(os.environ['SLREALIZERDIR'], 'data')

lens_object_f = os.path.join(data_path, 'lens_object_table.csv')
nonlens_object_f = os.path.join(data_path, 'nonlens_object_table.csv')

lens_obj = pd.read_csv(lens_object_f)
num_data = len(lens_obj)
print(num_data)
nonlens_obj = pd.read_csv(nonlens_object_f).sample(num_data, random_state=123).reset_index(drop=True)
# query('(mRrCc_u < 4.55) & (mRrCc_ < 4.10) & (r_trace < 3.9) & (i_trace < 3.8) & (z_trace < 3.4)')
assert len(lens_obj) == len(nonlens_obj)
nonlens_obj = pd.read_csv(nonlens_object_f)

In this dictionary, we will save all the figures of cornerplots we generate.

In [None]:
fig_dict = {}

In [None]:
def draw_cornerplot(attribute_list, df, units='arcsec', fig=None, color='black'):
    num_attributes = len(attribute_list)
    num_data = len(df)
    labels = [a + ' / ' + units for a in attribute_list]
    data = df[attribute_list][:].values.reshape(-1, num_attributes)
    plot = corner.corner(data, 
                        color=color, 
                        smooth=1.0, 
                        #show_titles=True,
                        labels=labels,
                        fill_contours=True,
                        bins=50,
                        fig=fig,
                        range=[0.999]*num_attributes,
                        hist_kwargs=dict(normed=True, ))
    return plot

### 1. Size

With our initial sample of lenses and nonlenses, the lenses seem to be smaller than non-lenses. The trace of lenses also showed a much smaller spread than that of non-lenses. As far as the lens vs. non-lens classification was concerned, we had to query non-lenses to have similar size as the lenses, as we did not want the size to be the distinguishing feature.

In [None]:
lens_obj.columns

In [None]:
for df in [lens_obj, nonlens_obj]:
    for b in 'gz':
        df[b + '_apMag'][~np.isfinite(df[b + '_apMag'])] = 100.0
        #df[b + '_mag-std'] = from_flux_to_mag(df[b + '_apFluxErr'], from_unit='nMgy')
        #df[b + '_posmod'] = np.power(np.power(df[b + '_x'], 2.0) + np.power(df[b + '_y'], 2.0), 0.5)
        #df[b + '_omega'] = (df[b + '_e1']*df[b + '_x'] + df[b + '_e2']*df[b + '_y'])/(df[b + '_e']*df[b + '_posmod'])
    df['delta_pos'] = np.hypot(df['g_x']-df['z_x'], df['g_y']-df['z_y'])

In [None]:
for df in [lens_obj, nonlens_obj]:
    
    df['delta_apMag-std'] = df['g_apMag-std'] - df['z_apMag-std']
    df['delta_size-std'] = df['g_trace-std'] - df['z_trace-std']
    df['delta_e-std'] = df['g_e-std'] - df['z_e-std']
    df['delta_phi-std'] = df['g_phi-std'] - df['z_phi-std']
    df['delta_pos-std'] = df['g_x-std']-df['z_x-std']

In [None]:
alist = []

for a in ['_size-std',  '_apMag-std', '_e-std', '_phi-std', '_pos-std', ]:
        alist.append('delta' + a)

In [None]:
#feature, units = 'trace', 'arcsec^2'
a = alist
fig_dict['lens_' + 'object'] = draw_cornerplot(a, lens_obj, units='', color='blue')
fig_dict['nonlens_' + 'object'] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + 'object'], units='', color='red')

In [None]:
#feature, units = 'trace', 'arcsec^2'
a = alist
fig_dict['lens_' + 'object'] = draw_cornerplot(a, lens_obj, units='', color='blue')
fig_dict['nonlens_' + 'object'] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + 'object'], units='', color='red')

In [None]:
#feature, units = 'trace', 'arcsec^2'
a = alist
fig_dict['lens_' + 'object'] = draw_cornerplot(a, lens_obj, units='', color='blue')
fig_dict['nonlens_' + 'object'] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + 'object'], units='', color='red')

In [None]:
alist = []
for b in 'gz':
    for a in ['_trace-std',  '_apMag-std','_x', '_y',  '_e',  '_phi']:
        alist.append(b + a)

In [None]:
feature, units = 'trace', 'arcsec^2'
a = [b + '_' + feature for b in 'ugriz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

We've queried non-lenses to have u-band trace below the maximum lens u-band trace.

In [None]:
for b in 'ugriz':
    print(np.max(lens_obj['%s_trace' %b].values), np.max(nonlens_obj['%s_trace' %b].values))
    #print(np.min(lens_obj['%s_trace' %b].values), np.min(nonlens_obj['%s_trace' %b].values))
    #print(np.mean(lens_obj['%s_trace' %b].values), np.mean(nonlens_obj['%s_trace' %b].values))


### 2. First moment

Technically, it wouldn't be right to refer to the quantities 'x' and 'y' as the first moments. They are offsets given as separations (in arcseconds) of the image in each band from the centroid of the r-band image. The r-band "first moments" are all zero, so it wouldn't make sense to include the r band in the cornerplot! 

### 2.1 First moment along the x axis

In [None]:
feature, units = 'x', 'arcsec'
a = [b + '_' + feature for b in 'ugiz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 2.1 First moment along the y axis

In [None]:
feature, units = 'y', 'arcsec'
a = [b + '_' + feature for b in 'ugiz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 3. Ellipticities

The ellipcities $e_1$ and $e_2$ are defined as follows:

$e1 = (Qxx-Qyy)/(Qxx+Qyy)$

$e2 = 2*Qxy/(Qxx+Qyy)$

where $Q$ is the matrix of second moments.

In [None]:
feature, units = 'e1', 'no unit'
a = [b + '_' + feature for b in 'ugiz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

In [None]:
feature, units = 'e2', 'no unit'
a = [b + '_' + feature for b in 'ugiz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 4. Flux

The distributions had different patterns, but the peaks overlapped with each other. Still, there were significant numbers of OM10 lensed systems that were seperated from SDSS samples.

In [None]:
feature, units = 'apFlux', 'nMgy'
a = [b + '_' + feature for b in 'ugiz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 5. Magnitude

This cornerplot plots different magnitudes in different bands. In effect, this is the same as plotting the flux in log scale.

We first compute the magnitudes from the fluxes.

In [None]:
from utils.utils import *

for b in 'ugriz':
    lens_obj[b + '_mag'] = from_flux_to_mag(lens_obj[b + '_apFlux'], from_unit='nMgy')
    nonlens_obj[b + '_mag'] = from_flux_to_mag(nonlens_obj[b + '_apFlux'], from_unit='nMgy')

In [None]:
%%capture
# Checking for numerical overflow values...
for b in 'ugriz':
    vals = lens_obj[b + '_mag']
    print("lens in ", b)
    print("min, max: ", np.min(vals), np.min(vals))
    print("NaNs", len(vals[~np.isfinite(vals)]))
    if len(vals[~np.isfinite(vals)]) != 0:
        lens_obj[b + '_mag'][~np.isfinite(lens_obj[b + '_mag'])] = 100.0
    vals = nonlens_obj[b + '_mag']
    print("nonlens in ", b)
    print("min, max: ", np.min(vals), np.min(vals))
    print("NaNs", len(vals[~np.isfinite(vals)]))

In [None]:
feature, units = 'mag', 'mag'
a = [b + '_' + feature for b in 'ugriz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 6. Hand-engineered features

There are a few features worth looking at, such as color, that are not in the object table but can be computed from values in the object table.

### 6.1 Magnitude difference from z (u-z, g-z, r-z, i-z)

In [None]:
for b in 'ugri':
    lens_obj[b + '_minus_z'] = lens_obj[b + '_mag'] - lens_obj['z_mag']
    nonlens_obj[b + '_minus_z'] = nonlens_obj[b + '_mag'] - nonlens_obj['z_mag']

In [None]:
feature, units = 'minus_z', 'mag'
a = [b + '_' + feature for b in 'ugri']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

The color seems to be the feature that differentiates between lenses and non-lenses the most! This makes sense because quasars tend to be blue (brighter in the smaller wavelengths). In the next few cells, we see that the magnitude difference from the r-band will work just as well as that from the z-band.

### 6.2 Magnitude difference from r (u-r, g-r, i-r, z-r)

In [None]:
for b in 'ugiz':
    lens_obj[b + '_minus_r'] = lens_obj[b + '_mag'] - lens_obj['r_mag']
    nonlens_obj[b + '_minus_r'] = nonlens_obj[b + '_mag'] - nonlens_obj['r_mag']

In [None]:
2feature, units = 'minus_r', 'mag'
a = [b + '_' + feature for b in 'ugiz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 6.3 Ellipticity measures $|e|$ and $\phi$

The $e_1$ and $e_2$ features in the object table are not very informative of the total "degree of shear", so we investigate the magnitude of the complex shear $|e|$ and the ellipticity angle $\phi$. 

Quasar images are brighter in smaller wavelengths, effectively stretching the image of the lens system, so depending on the images' relative positions, we expect the ellipticity to be smaller in smaller wavelengths.

We first perform the conversion from $e_1$ and $e_2$.

In [None]:
for b in 'ugriz':
    lens_obj[b + '_e'] = np.power(np.power(lens_obj[b + '_e1'], 2.0) + np.power(lens_obj[b + '_e2'], 2.0), 0.5)
    nonlens_obj[b + '_e'] = np.power(np.power(nonlens_obj[b + '_e1'], 2.0) + np.power(nonlens_obj[b + '_e2'], 2.0), 0.5)
    lens_obj[b + '_phi'] = np.arctan(lens_obj[b + '_e2']/lens_obj[b + '_e1'])
    nonlens_obj[b + '_phi'] = np.arctan(nonlens_obj[b + '_e2']/nonlens_obj[b + '_e1'])

In [None]:
feature, units = 'e', 'no units'
a = [b + '_' + feature for b in 'ugriz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

The ellipticity angle distribution seems to be quite uniform.

In [None]:
feature, units = 'phi', 'radians'
a = [b + '_' + feature for b in 'ugriz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 6.4 Ellipticity difference from z (u-z, g-z, r-z, i-z)

Taking the difference of ellipticity between bands will allow us to see the effect of quasar images' being blue.

In [None]:
for b in 'ugri':
    lens_obj[b + '_minus_z_e'] = lens_obj[b + '_e'] - lens_obj['z_e']
    nonlens_obj[b + '_minus_z_e'] = nonlens_obj[b + '_e'] - nonlens_obj['z_e']

In [None]:
feature, units = 'minus_z_e', 'no units'
a = [b + '_' + feature for b in 'ugri']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 6.5 Ellipticity difference from r (u-r, g-r, i-r, z-r)

In [None]:
for b in 'ugiz':
    lens_obj[b + '_minus_r_e'] = lens_obj[b + '_e'] - lens_obj['r_e']
    nonlens_obj[b + '_minus_r_e'] = nonlens_obj[b + '_e'] - nonlens_obj['r_e']

In [None]:
feature, units = 'minus_r_e', 'no units'
a = [b + '_' + feature for b in 'ugiz']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')

### 6.5 Size difference from z (u-z, g-z, r-z, i-z)

In [None]:
for b in 'ugri':
    lens_obj[b + '_minus_z_trace'] = lens_obj[b + '_trace'] - lens_obj['z_trace']
    nonlens_obj[b + '_minus_z_trace'] = nonlens_obj[b + '_trace'] - nonlens_obj['z_trace']

In [None]:
feature, units = 'minus_z_trace', 'arcsec^2'
a = [b + '_' + feature for b in 'ugri']
fig_dict['lens_' + feature] = draw_cornerplot(a, lens_obj, units=units, color='blue')
fig_dict['nonlens_' + feature] = draw_cornerplot(a, nonlens_obj, fig=fig_dict['lens_' + feature], units=units, color='red')