# QA training set - DP02

_Authors: Andreia Dourado, Bruno Moraes_ \
_Spatial distribution plot extracted from:: https://github.com/linea-it/OLD-pz-lsst-inkind/blob/main/doc/notebooks/DP02_QA_notebook_input.ipynb_\

__Description: exploratory plots for characterizing the training set data.__

## Imports

In [None]:
# General
import os
import sys
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Bokeh
import bokeh
from bokeh.io import output_notebook

# Holoviews
import holoviews as hv
from holoviews import opts

# Geoviews
import geoviews as gv
import geoviews.feature as gf
from geoviews.operation import project
import cartopy.crs as ccrs

In [None]:
hv.extension('bokeh')
gv.extension('bokeh')
output_notebook()
%matplotlib inline

## 1. Reading the data

In [None]:
data = pd.read_parquet('../dados_tcc/training_sets/training_set.parquet')

In [None]:
data.shape

In [None]:
data.columns

In [None]:
columns_map = {
    'ra': 'ra_truth',
    'dec': 'dec_truth',
    'coord_radp02_object': 'ra',
    'coord_decdp02_object': 'dec',
    'redshiftdp02_truth': "redshift",
    'mag_udp02_object': 'mag_u',
    'mag_gdp02_object': 'mag_g',
    'mag_rdp02_object': 'mag_r',
    'mag_idp02_object': 'mag_i',
    'mag_zdp02_object': 'mag_z',
    'mag_ydp02_object': 'mag_y',
    'magerr_udp02_object': 'magerr_u',
    'magerr_gdp02_object': 'magerr_g',
    'magerr_rdp02_object': 'magerr_r',
    'magerr_idp02_object': 'magerr_i',
    'magerr_zdp02_object': 'magerr_z',
    'magerr_ydp02_object': 'magerr_y'}
data = data.rename(columns=columns_map)

## 2. Spatial distribution


In [None]:
bins_ra_dec = (np.arange(48, 76, 0.056), np.arange(-46, -25, 0.042))

ra = data['ra'].to_numpy()
dec = data['dec'].to_numpy()
histogram_ra_dec, bins_ra, bins_dec = np.histogram2d(ra, dec, bins=bins_ra_dec)

In [None]:
### Changing the 0 values to NaN values.
histogram_ra_dec_NaN = histogram_ra_dec.astype(float)
histogram_ra_dec_NaN[histogram_ra_dec_NaN == 0] = np.nan

### Getting the bins centers.
bins_ra_centers = (bins_ra[1:] + bins_ra[:-1])/2
bins_dec_centers = (bins_dec[1:] + bins_dec[:-1])/2

### Plate Carrée projection - Changing the R.A. coordinates to the range [-180,180), and changing the 2d histogram counts accordingly.
bins_ra_centers_180_range = np.where(bins_ra_centers >= 180, bins_ra_centers - 360, bins_ra_centers)
sorted_indices_180_range = np.argsort(bins_ra_centers_180_range)
histogram_ra_dec_180_range = histogram_ra_dec_NaN[sorted_indices_180_range, :]
bins_ra_centers_180_range = bins_ra_centers_180_range[sorted_indices_180_range]

### Mollweide projection - Inverting the R.A. values (360 - values), and changing the 2d histogram counts accordingly.
bins_ra_centers_inverted = np.where(bins_ra_centers <= 360, 360 - bins_ra_centers, bins_ra_centers)
sorted_indices_inverted = np.argsort(bins_ra_centers_inverted)
histogram_ra_dec_inverted = histogram_ra_dec_NaN[sorted_indices_inverted, :]
bins_ra_centers_inverted = bins_ra_centers_inverted[sorted_indices_inverted]

### Transposing the histogram arrays for the holoviews plots.
histogram_ra_dec_180_range_transpose = histogram_ra_dec_180_range.T
histogram_ra_dec_inverted_transpose = histogram_ra_dec_inverted.T

### Mollweid projection

In [None]:
### Generating the R.A. and DEC ticks
longitudes = np.arange(30, 360, 30)
latitudes = np.arange(-75, 76, 15)

lon_labels = [f"{lon}°" for lon in longitudes]
lat_labels = [f"{lat}°" for lat in latitudes]

labels_data = {
    "lon": list(np.flip(longitudes)) + [-180] * len(latitudes),
    "lat": [0] * len(longitudes) + list(latitudes),
    "label": lon_labels + lat_labels,
}

df_labels = pd.DataFrame(labels_data)

labels_plot = gv.Labels(df_labels, kdims=["lon", "lat"], vdims=["label"]).opts(
    text_font_size="12pt",
    text_color="black",
    text_align='right',
    text_baseline='bottom',
    projection=ccrs.Mollweide()
)

### Creating the image using holoviews.
gv_image_ra_dec = gv.Image((bins_ra_centers_inverted, bins_dec_centers, histogram_ra_dec_inverted_transpose), [f'R.A.', f'DEC'], f'Counts')

### Doing the Mollweide projection.
gv_image_ra_dec_projected = gv.operation.project(gv_image_ra_dec, projection=ccrs.Mollweide())

### Generating the grid lines.
grid = gf.grid().opts(
    opts.Feature(projection=ccrs.Mollweide(), scale='110m', color='black')
)

### Adjusting the image options.
gv_image_ra_dec_projected = gv_image_ra_dec_projected.opts(cmap='viridis', cnorm='linear', colorbar=True, width=1000, height=500, 
                                                           clim=(10, np.nanmax(histogram_ra_dec_inverted_transpose)), 
                                                           title='Spatial Distribution of Objects - Mollweide projection', 
                                                           projection=ccrs.Mollweide(),  global_extent=True)

### Showing the plot.
combined_plot = gv_image_ra_dec_projected * grid * labels_plot
combined_plot

## 4. Magnitude distributions

In [None]:
def mag_histogram(catalog, title='', sigma=0,pop='',save=0):
    bands = ['u','g', 'r', 'i', 'z','y']
    colors = ['purple','dodgerblue', 'darkgreen', 'deeppink','orange','red']
    plt.figure(figsize=(13,9))
    bins = np.linspace(9, 80, 57)
    j=1
    for i, (band, color) in enumerate(zip(bands,colors)):
        plt.subplot(2,3,j)
        plt.hist(catalog[f'mag_{band}'],bins=20, label=f'{band} band',color = color, edgecolor='white',alpha = 0.5)
        plt.xlim(16,36)
        plt.yscale('log')
        plt.xlabel('mag',fontsize=20)
        plt.ylabel('counts',fontsize=20)
        plt.tick_params(axis='both', labelsize=15)
        plt.legend(loc='upper right', fontsize=15)
        #plt.grid(True)
        j+=1
    plt.suptitle(title)
    plt.tight_layout()
    plt.savefig('../dados_tcc/QA_training_set/mag_hist.pdf', format='pdf', bbox_inches='tight', dpi=300)
    plt.show()

In [None]:
mag_histogram(data)

# 5. Magnitude errors

In [None]:
def mag_err(catalog):
    plt.figure(figsize=(20, 30))
    bands = ['u','g', 'r', 'i', 'z','y']

    for i, band in enumerate(bands, 1):
        plt.subplot(3, 2, i)
        
        query = f'magerr_{band} < 2.1'
        data = catalog.query(query)
        mag = np.array(data[f'mag_{band}'])
        err = np.array(data[f'magerr_{band}'])

        hb = plt.hexbin(mag, err, gridsize=80, cmap='GnBu', bins='log', mincnt=1)
        
        plt.xlabel(f"mag {band}", fontsize=25)
        if i == 1:
            plt.ylabel("error", fontsize=25)
        plt.xlim(14,30)
        plt.ylim(0,2.1)
        plt.grid(True, linestyle='--', alpha=0.6)
        cbar = plt.colorbar(hb, label='log(N)')
        cbar.set_label('log(N)', fontsize=20)
        cbar.ax.tick_params(labelsize=20)
        plt.tick_params(axis='both', labelsize=20)

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.savefig('../dados_tcc/QA_training_set/mag_err.pdf', format='pdf', bbox_inches='tight', dpi=300)
    plt.show()

In [None]:
mag_err(data)

# 6. Magnitude x Color

In [None]:
def mag_color(catalog, band = 'i', color1 = 'g', color2 = 'r'):
    color = catalog[f'mag_{color1}']-catalog[f'mag_{color2}']
    mag = catalog[f'mag_{band}']
   
    plt.hexbin(color, mag, cmap='BuPu', bins='log', mincnt=1,gridsize=[400, 200])
    plt.ylabel("mag "+band,fontsize=20)
    plt.xlabel(f"{color1}-{color2}",fontsize=20)
   
    #plt.ylim(14,30)
    plt.xlim(-5,5)
    plt.grid(True)
    cbar = plt.colorbar()
    plt.tick_params(axis='both', labelsize=12)
    cbar.set_label('counts', fontsize=15)
    cbar.ax.tick_params(labelsize=15)
        

    plt.savefig('../dados_tcc/QA_training_set/mag_color.pdf', format='pdf', bbox_inches='tight', dpi=300)
    plt.show()

In [None]:
mag_color(data)

# 7. Color x color

In [None]:
def color_color(catalog, sigma=0,pop='',save=0):
    bands = ['u', 'g', 'r', 'i', 'z','y']
    i=1
    plt.figure(figsize=(12,12))
    for index in range(len(bands)-2):
        plt.subplot(3,2,i)
        i+=1
        color = catalog[f'mag_{bands[index+1]}']
        next_color = catalog[f'mag_{bands[index+2]}']
        past_color = catalog[f'mag_{bands[index]}']
        plt.hexbin(color-next_color,past_color-color, None, mincnt=1, cmap='RdPu', gridsize=[400,200], bins='log')
        plt.xlabel(f'{bands[index+1]}-{bands[index+2]}',fontsize=20)
        plt.ylabel(f'{bands[index]}-{bands[index+1]}',fontsize=20)
        plt.xlim(-5,5)
        plt.ylim(-5,5)
        cbar = plt.colorbar()
        cbar.set_label('counts', fontsize=15)
        cbar.ax.tick_params(labelsize=15)
        plt.tick_params(axis='both', labelsize=15)  
        
    plt.tight_layout()
    plt.savefig('../dados_tcc/QA_training_set/color_color.pdf', format='pdf', bbox_inches='tight', dpi=300)    
    plt.show()

In [None]:
color_color(data)

# 8. Redshift distribution

In [None]:
def z_hist(catalog, sigma=0,pop='',save=0):
    plt.hist(catalog['redshift'], bins=100,density=True, color='azure', edgecolor='blue',alpha=0.7)
    plt.ylabel(f'counts',fontsize=20)
    plt.xlabel(f'z',fontsize=20)
    plt.tick_params(axis='both', labelsize=15) 
    
    plt.savefig('../dados_tcc/QA_training_set/z_hist.pdf', format='pdf', bbox_inches='tight', dpi=300)
    plt.show()

In [None]:
z_hist(data)

# 9. Magnitude x redshift

In [None]:
def mag_z(catalog, band = 'i'):
  
    z = catalog['redshift']
    mag = catalog[f'mag_{band}']
   
    plt.hexbin(z, mag, cmap='viridis', bins='log', mincnt=1,gridsize=[400, 200])
    plt.ylabel("mag "+band,fontsize=20)
    plt.xlabel(f"z",fontsize=20)
    #plt.gca().set_facecolor('black')
  
   # plt.ylim(14,30)
    plt.xlim(0,3)
    plt.grid(True)
    cbar = plt.colorbar()
    cbar.set_label('counts', fontsize=15)
    cbar.ax.tick_params(labelsize=15)
    plt.tick_params(axis='both', labelsize=15) 
    plt.savefig('../dados_tcc/QA_training_set/mag_z.pdf', format='pdf', bbox_inches='tight', dpi=300)
    plt.show()

In [None]:
mag_z(data)

In [None]:
os.system(f'jupyter nbconvert --to html 01_QA_training_set.ipynb')