## Imports

In [None]:
import os
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt

## Read data file

In [None]:
base = "SDSSxGaia/StatBatches/Ascom"
# base = "SDSS-Stripe82/Ascom/StatBatch"
datacsv = "pids_data.csv"
csvpath = os.path.join(base, datacsv)
df = pd.read_csv(csvpath, dtype=object)

## Data Org Functions

In [None]:
## Not needed when plotting both peaks in the same plot, but the function
## are pretty neat!! :) Let them remain!

df_to_np = lambda filt, s : np.array(list(
    map(lambda x : float(x),
       list(df[filt][s]))
))

colsN = lambda cols, n : list(map(lambda s : s+str(n), cols))
get_data = lambda filt, cols : {
    n : {c:df_to_np(filt, cn) for c, cn in zip(cols, colsN(cols, n))}
    for n in [1, 2]
}

# **Valid PhotoZ**

These are objects for which both the peaks have valid photoZ (not -9999)

## Data dictionary of valid photoZ

In [None]:
pz = (df['photoZ1'] != '-9999') & (df['photoZ2'] != '-9999')
pz_cols = ['photoZ', 'photoZ_err', 'absMagR', 'u', 'g', 'r', 'i', 'z']

photo_zs = []
us, gs, rs, iis, zs = [], [], [], [], []
absMagRs = []
same, diff = 0, 0

for i, row in df[pz].iterrows() :
    if row['pid1'] == row['pid2'] :
        photo_zs.append(float(row['photoZ1']))
        same +=1 
        
        us.append(float(row['u1']))
        gs.append(float(row['g1']))
        rs.append(float(row['r1']))
        iis.append(float(row['i1']))
        zs.append(float(row['z1']))
        
        absMagRs.append(float(row['absMagR1']))
    else :
        photo_zs += [float(row['photoZ1']), float(row['photoZ2'])]
        diff += 1
        
        us += [float(row['u1']), float(row['u2'])]
        gs += [float(row['g1']), float(row['g2'])]
        rs += [float(row['r1']), float(row['r2'])]
        iis += [float(row['i1']), float(row['i2'])]
        zs += [float(row['z1']), float(row['z2'])]
        
        absMagRs += [float(row['absMagR1']), float(row['absMagR2'])]
        
photo_zs = np.array(photo_zs)
us, gs, rs, iis, zs = np.array(us), np.array(gs), np.array(rs), np.array(iis), np.array(zs)
absMagRs = np.array(absMagRs)

print("Valid photoZ = {}".format(len(df[pz])))
print("Same = {}".format(same))
print("Different = {}".format(diff))

## Distribution of photoZ

In [None]:
fig, ax = plt.subplots(1, 1)
fig.set_figheight(5)
fig.set_figwidth(5)

ret = ax.hist(photo_zs, bins=50)
ax.set_xlabel('Photometric redshift', fontsize=15)
ax.set_ylabel('Counts', fontsize=15)

# fig.savefig(os.path.join(base, 'photoZ_redshift_hist.png'))
pass

## Goodness of Fit

In [None]:
import scipy.stats as st
from scipy.stats import kstest
from sklearn.preprocessing import StandardScaler

############################# Normalize Data ###############################

sc = StandardScaler() 
photo_zs = photo_zs.reshape (-1,1)
sc.fit(photo_zs)
z_std = sc.transform(photo_zs)
z_std = z_std.flatten()

# disttypes = ['norm', 'rayleigh', 'cauchy', 'lognorm', 'beta', 'gamma']
disttypes = ['norm']

for disttype in disttypes :
    ############################# KS-Test ###############################
    dist = getattr(st, disttype)
    ks = st.kstest(z_std, disttype, args=dist.fit(z_std))

    ######################### Plot Fitted Data #########################
    
    fig, ax = plt.subplots(1)
    fig.set_figheight(5)
    fig.set_figwidth(5)

    number_of_bins = 50
    bin_cutoffs = np.linspace(np.percentile(photo_zs, 0), np.percentile(photo_zs, 99),number_of_bins)
    h = ax.hist(photo_zs, bins = bin_cutoffs, color='0.75')
    dist = getattr(st, disttype)
    param = dist.fit(photo_zs)

    lin = np.linspace(0, np.max(photo_zs), 1000)
    pdf_fitted = dist.pdf(lin, *param[:-2], loc=param[-2], scale=param[-1])
    scale_pdf = np.trapz(h[0], h[1][:-1]) / np.trapz (pdf_fitted, lin)
    pdf_fitted *= scale_pdf

    # Add the line to the plot
    ax.plot(lin, scale_pdf*getattr(st, disttype)(*param).pdf(lin))
    ax.set_xlabel('photo-Z', fontsize=15)
    ax.set_ylabel('Counts', fontsize=15)
    # plt.close()

    # fig.savefig(os.path.join(base, 'photoZ_redshift_distfit.png'))
    print("param = {}".format(param))
    print("{} --> {}".format(disttype, ks))

## Color Diagram

In [None]:
fig, ax = plt.subplots(1, 1)
fig.set_figheight(5)
fig.set_figwidth(5)

ax.plot(absMagRs, gs-rs, 'o', markersize=3)
ax.set_xlabel('Absolute Magnitude', fontsize=15)
ax.set_ylabel('g - r', fontsize=15)
    
fig.savefig(os.path.join(base, 'photoZ_color_diagram.png'))
pass

## Photometric Distribution

In [None]:
for band, lst in zip("ugriz", [us, gs, rs, iis, zs]) :
    fig, ax = plt.subplots(1,1)
    fig.set_figheight(5)
    fig.set_figwidth(5)

    ax.hist(lst, bins=50)
    ax.set_xlabel("{}-Mag".format(band), fontsize=10)
    ax.set_ylabel('Counts', fontsize=10)
    fig.savefig(os.path.join(base, 'photoZ_{}_hist.png'.format(band)))
                
pass

# **Valid SchlegelZ**

These are objects for which both the peaks have valid schlegel redshit (taken from galSpecInfo)

## galSpecInfo list

In [None]:
gz = (df['photoZ1'] != '-9999') & (df['photoZ2'] != '-9999') & (df['schlegelZ1'].notnull()) & (df['schlegelZ2'].notnull())
gz_cols = ['schlegelZ', 'schlegelZ_err', 'absMagR', 'u', 'g', 'r', 'i', 'z']

schlegel_zs = []
us, gs, rs, iis, zs = [], [], [], [], []
absMagRs = []

for i, row in df[gz].iterrows() :
    if row['pid1'] == row['pid2'] :
        schlegel_zs.append(float(row['schlegelZ1']))
        
        us.append(float(row['u1']))
        gs.append(float(row['g1']))
        rs.append(float(row['r1']))
        iis.append(float(row['i1']))
        zs.append(float(row['z1']))
        
        absMagRs.append(float(row['absMagR1']))
    else :
        schlegel_zs += [float(row['schlegelZ1']), float(row['schlegelZ2'])]
        
        us += [float(row['u1']), float(row['u2'])]
        gs += [float(row['g1']), float(row['g2'])]
        rs += [float(row['r1']), float(row['r2'])]
        iis += [float(row['i1']), float(row['i2'])]
        zs += [float(row['z1']), float(row['z2'])]
        
        absMagRs += [float(row['absMagR1']), float(row['absMagR2'])]
        
print("Valid schlegelZ = {}".format(len(df[gz])))
schlegel_zs = np.array(schlegel_zs)
us, gs, rs, iis, zs = np.array(us), np.array(gs), np.array(rs), np.array(iis), np.array(zs)
absMagRs = np.array(absMagRs)

## Distribution of schlegelZ

In [None]:
fig, ax = plt.subplots(1, 1)
fig.set_figheight(5)
fig.set_figwidth(5)

ax.hist(schlegel_zs, bins=20)    
ax.set_xlabel('Schlegel redshift', fontsize=15)
ax.set_ylabel('Counts', fontsize=15)

fig.savefig(os.path.join(base, 'schlegelZ_redshift_hist.png'))
pass

## Color Diagram

In [None]:
fig, ax = plt.subplots(1, 1)
fig.set_figheight(5)
fig.set_figwidth(5)

ax.plot(absMagRs, gs-rs, 'o', markersize=3)
ax.set_xlabel('Absolute Magnitude', fontsize=15)
ax.set_ylabel('g - r', fontsize=15)

fig.savefig(os.path.join(base, 'schlegelZ_color_diagram.png'))

pass

## Photometric Distribution

In [None]:
for band, lst in zip("ugriz", [us, gs, rs, iis, zs]) :
    fig, ax = plt.subplots(1,1)
    fig.set_figheight(5)
    fig.set_figwidth(5)

    ax.hist(lst, bins=25)
    ax.set_xlabel(band)
    ax.set_ylabel('Counts')
    fig.savefig(os.path.join(base, 'schlegelZ_{}_hist.png'.format(band)))
    

pass