# Cross-matching HSC spec-z with Mizuki photo-z

Zooey Nguyen

2021-07-09

2021-07-13: Updated with HSC_v3 data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from scipy.stats import gaussian_kde

In [2]:
hsc_df = pd.read_csv('/data/HSC/HSC_v3/matched_photozdata_with_spectrozdata_full_unfiltered_readable.csv')

In [3]:
hsc_df

Unnamed: 0,# object_id,ra,dec,coord,skymap_id,g_cmodel_mag,r_cmodel_mag,i_cmodel_mag,z_cmodel_mag,y_cmodel_mag,...,user.specz_redshift,user.specz_redshift_isnull,user.specz_mag_i,user.specz_mag_i_isnull,user.specz_name,user.specz_name_isnull,user.specz_ra,user.specz_ra_isnull,user.specz_dec,user.specz_dec_isnull
0,4.230538e+16,222.172947,0.703217,"(-152855.796875, -138469.71875, 2531.517822265...",96190404,,,,,,...,0.000050,False,14.994000,False,DEEP3-201701-11033292,False,222.172910,False,0.703446,False
1,4.203723e+16,131.679885,0.664920,"(-137150.296875, 154042.96875, 2393.658203125)",95580504,22.204403,21.622118,21.416664,21.313837,21.267633,...,0.000250,False,21.292999,False,DEEP3-201701-11033153,False,131.679880,False,0.664911,False
2,4.219529e+16,185.419027,0.669425,"(-205328.921875, -19478.095703125, 2409.874023...",95940304,23.674694,22.077913,20.804096,20.299055,20.116999,...,0.613110,False,19.547001,False,DEEP3-201701-12020269,False,185.419060,False,0.669384,False
3,4.108313e+16,169.722852,-0.928743,"(-202928.890625, 36794.82421875, -3343.3278808...",93410703,21.377253,21.038179,,,20.825388,...,0.000410,False,20.643999,False,DEEP3-201701-11007805,False,169.722870,False,-0.928725,False
4,4.230538e+16,222.172947,0.703217,"(-152855.796875, -138469.71875, 2531.517822265...",96190404,,,,,,...,0.000050,False,14.994000,False,DEEP3-201701-11033292,False,222.172910,False,0.703446,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
587708,7.464861e+16,215.134836,52.990838,"(-101537.71875, -71454.171875, 164710.546875)",169730405,25.217243,25.169708,25.242388,25.437571,25.803164,...,0.501636,False,25.084095,False,DEEP3-201701-13025217,False,215.134901,False,52.990877,False
587709,7.464861e+16,215.085963,52.973551,"(-101639.296875, -71396.1015625, 164673.078125)",169730405,24.161261,23.943720,23.892523,23.683310,23.023567,...,0.574514,False,23.742451,False,DEEP3-201701-13025925,False,215.086020,False,52.973526,False
587710,7.464875e+16,214.756253,52.964975,"(-102068.7109375, -70824.0859375, 164654.484375)",169730505,25.315399,23.766863,22.617428,22.156572,21.976051,...,0.658518,False,22.518562,False,DEEP3-201701-13028127,False,214.756265,False,52.964976,False
587711,7.464861e+16,215.085963,52.973551,"(-101639.296875, -71396.1015625, 164673.078125)",169730405,24.161261,23.943720,23.892523,23.683310,23.023567,...,0.574514,False,23.742451,False,DEEP3-201701-13025925,False,215.086020,False,52.973526,False


In [4]:
mizuki_df = pd.read_csv('/data/HSC_v2/HSC_mizuki_spectra.csv')

In [5]:
mizuki_df

Unnamed: 0,# object_id,photoz_mean,photoz_mode,photoz_median,photoz_best,photoz_mc
0,36407046198788496,0.909663,0.98,0.937792,0.93,0.968890
1,36407046198788565,0.036461,0.00,0.000050,0.00,-0.003718
2,36407046198788572,0.021710,0.00,0.000042,0.00,0.003706
3,36407046198788777,0.709375,0.83,0.721828,0.71,0.886515
4,36407046198788807,0.710609,0.69,0.709178,0.71,0.617680
...,...,...,...,...,...,...
131195972,44166286871123655,0.502116,0.23,0.605081,0.70,0.363912
131195973,44166286871124076,1.989349,1.99,1.984982,1.96,1.606327
131195974,44166286871124590,1.440667,1.01,1.178715,1.05,2.922913
131195975,44166286871124714,0.915373,0.92,0.916785,0.91,0.867984


In [None]:
crossmatched_df = hsc_df.merge(mizuki_df, on=['# object_id'])

In [None]:
crossmatched_df

In [None]:
print(hsc_df.shape)
print(crossmatched_df.shape)
crossmatched_df['specz_redshift'].describe()

In [None]:
crossmatched_clean_df = crossmatched_df[(crossmatched_df['specz_redshift'] < 4) & (crossmatched_df['specz_redshift'] > 0.01)]

In [None]:
fig, axes = plt.subplots(2,2, figsize=(15,15))
sns.histplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_mean'], ax=axes[0,0])
sns.kdeplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_mean'], ax=axes[0,0])
sns.histplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_median'], ax=axes[0,1])
sns.kdeplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_median'], ax=axes[0,1])
sns.histplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_mode'], ax=axes[1,0])
sns.kdeplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_mode'], ax=axes[1,0])
sns.histplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_best'], ax=axes[1,1])
sns.kdeplot(x=crossmatched_clean_df['specz_redshift'], y=crossmatched_clean_df['photoz_best'], ax=axes[1,1])

In [None]:
# calculate binned statistics
from scipy.stats import binned_statistic
spec_z = crossmatched_clean_df['specz_redshift']
mizuki_z = crossmatched_clean_df['photoz_best']
delz = (mizuki_z-spec_z)/(1+spec_z)
adel_z = abs(mizuki_z - spec_z)-0.15*(1+spec_z)

In [None]:
bin_counts = binned_statistic(spec_z, mizuki_z, bins=20, range=(0,4), statistic=count)