# Subsample the SpecObjPhot per redshift and validate the data
notebook by _Kara Ponder (UC Berkeley)_, (add your name here)

In [None]:
from astropy.io import fits
import corner
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sncosmo

%matplotlib inline

In [None]:
#df_unred = pd.read_csv('SpecObjPhot_unred.csv')
df = pd.read_csv('SpecObjPhot.csv')

In [None]:
z_SLICS = np.array([0.042, 0.080, 0.130, 0.221, 0.317, 0.418, 0.525, 0.640, 0.764, 0.897, 
           1.041, 1.199, 1.372, 1.562, 1.772, 2.007, 2.269, 2.565, 2.899])

In [None]:
endpoints = [(z_SLICS[i] + z_SLICS[i+1]) / 2.0 for i in range(0, 18)]
endpoints.append(3.066)
print(endpoints)

## Create subsamples by taking the endpoints to be the average number between the redshifts

Add in a cutoff at 0.023. 
- Came to this by what the distance is between 0.042 and it's upper endpoint
Added an upper cutoff as well of 3.066 (by same way for equal redshift space on either side)

In [None]:
bandpasses = ['lsstg', 'lsstr', 'lssti', 'lsstz', 'lssty',
              'sdssr', 'sdssi', 'sdssz']

In [None]:
survey_info = np.array([x.split("'")[1].strip() for x in df['SURVEY'].values])

In [None]:
subsamples, lens = [], []

subsamples.append(df.loc[(df['Z'] >= 0.023) & (df['Z'] < endpoints[0]) & (df['NQ'] > 2) &
                         ((df['lsstg'] > 0) |
                          (df['lsstr'] > 0) |
                          (df['lsstz'] > 0) |
                          (df['lssty'] > 0)
                          ) &
                         ((survey_info == 'GAMA') | 
                          (survey_info == 'SDSS') | 
                          (survey_info == 'VVDS') 
                         )])

lens.append(len(subsamples[-1]))

for i in np.arange(0, len(endpoints) -1 ):
    subsamples.append(df.loc[(df['Z'] >= endpoints[i]) & (df['Z'] < endpoints[i+1]) & (df['NQ'] > 2) &
                             ((df['lsstg'] > 0) |
                              (df['lsstr'] > 0) |
                              (df['lsstz'] > 0) |
                              (df['lssty'] > 0)
                             ) &
                             ((survey_info == 'GAMA') | 
                              (survey_info == 'SDSS') | 
                              (survey_info == 'VVDS') 
                             )])
    lens.append(len(subsamples[-1]))


In [None]:
#lens

In [None]:
# original numbers before throwing out anything that failed all photometry
lens_w0 = [7256,
 12853,
 37697,
 44233,
 31554,
 10777,
 2766,
 637,
 119,
 21,
 11,
 13,
 8,
 17,
 8,
 10,
 11,
 2,
 5]



In [None]:
(np.array(lens_w0) - np.array(lens))/np.array(lens_w0) * 100 # g

In [None]:
os.makedirs('SpecObjPhot', exist_ok=True)
for i, arr in enumerate(subsamples):
    arr.to_csv('./SpecObjPhot/SpecObjPhot_%5.3f.csv' % z_SLICS[i])

### The limits on the lowest and highest bins above are not quite the same as for the other bins.

There is a redshift evolution to the lower and upper limits per bin. The lowest and highest redshift bins are instead equal on both sides. 
- The lowest redshift bin lower limit should be at a larger wavlength making the bin smaller. 
- The highest redshift bin upper limit should also be at a larger wavelenght making the bin LARGER.

This doesn't matter for the highest one as it is an empty bin.

The code below shows the plot of the evolution of the different in the endpoints as a function of redshift. 

In [None]:
ch = []
for i in np.arange(0, len(endpoints) -1 ):
    #print((endpoints[i] - endpoints[i+1])/2.)
    ch.append((endpoints[i] - endpoints[i+1])/2.)

In [None]:
plt.plot(np.abs(ch), 'o')

## Validation

In [None]:
plt.figure()
plt.hist(df.loc[(df['lsstg'] > 0 ) & (df['NQ'] > 2)]['lsstg'], bins=np.arange(14, 25, 0.5), label='lsstg', alpha=0.5)
plt.hist(df.loc[(df['lsstr'] > 0) & (df['NQ'] > 2)]['lsstr'], bins=np.arange(14, 25, 0.5), label='lsstr', alpha=0.5)
plt.hist(df.loc[(df['lssti'] > 0)& (df['NQ'] > 2)]['lssti'], bins=np.arange(14, 25, 0.5), label='lssti', alpha=0.5)
plt.hist(df.loc[(df['lsstz'] > 0)& (df['NQ'] > 2)]['lsstz'], bins=np.arange(14, 25, 0.5), label='lsstz', alpha=0.5)
plt.hist(df.loc[(df['lssty'] > 0)& (df['NQ'] > 2)]['lssty'], bins=np.arange(14, 25, 0.5), label='lssty', alpha=0.5)
plt.legend()

plt.figure()
plt.hist(df.loc[(df['sdssr'] > 0)& (df['NQ'] > 2)]['sdssr'], bins=np.arange(14, 25, 0.5), label='sdssr', alpha=0.5)
plt.hist(df.loc[(df['sdssi'] > 0)& (df['NQ'] > 2)]['sdssi'], bins=np.arange(14, 25, 0.5), label='sdssi', alpha=0.5)
plt.hist(df.loc[(df['sdssz'] > 0)& (df['NQ'] > 2)]['sdssz'], bins=np.arange(14, 25, 0.5), label='sdssz', alpha=0.5)
plt.legend()
plt.xlabel('Mag')

In [None]:
print(np.shape(df['Z']/max(df['Z'])), len(df), df.iloc[0]['Z']/max(df['Z']))

plt.figure()
#for i, sub in enumerate(subsamples):
    
plt.scatter(df['Z'][df['Z']>0.], df['lsstg'][df['Z']>0.], c=np.log10(df['Z'][df['Z']>0.]), alpha=0.1, marker='.')
plt.title('lsstg')
plt.ylim(10, 32)
plt.xlim(-0.02, 0.8)

In [None]:
import matplotlib.colors as colors
import matplotlib.cm as cmx


In [None]:
jet = plt.cm.get_cmap('viridis')
cNorm  = colors.Normalize(vmin=0, vmax=len(bandpasses)-4)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)

sub = 2
print(z_SLICS[sub])

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['lsstg'], yerr=subsamples[sub]['lsstg_err'], 
             fmt='o', color=scalarMap.to_rgba(0), alpha=0.5)
plt.title('lsstg')
plt.ylim(10, 30)

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['lsstr'], yerr=subsamples[sub]['lsstr_err'], 
             fmt='o', color=scalarMap.to_rgba(1), alpha=0.5)
plt.title('lsstr')
plt.ylim(10, 30)

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['lssti'], yerr=subsamples[sub]['lssti_err'], 
             fmt='o', color=scalarMap.to_rgba(2), alpha=0.5)
plt.title('lssti')
plt.ylim(10, 30)

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['lsstz'], yerr=subsamples[sub]['lsstz_err'], 
             fmt='o', color=scalarMap.to_rgba(3), alpha=0.5)
plt.title('lsstz')
plt.ylim(10, 30)

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['lssty'], yerr=subsamples[sub]['lssty_err'], 
             fmt='o', color=scalarMap.to_rgba(4), alpha=0.5)
plt.title('lssty')
plt.ylim(10, 30)

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['sdssr'], yerr=subsamples[sub]['sdssr_err'], 
             fmt='o', color=scalarMap.to_rgba(1), alpha=0.5)
plt.title('sdssr')
plt.ylim(10, 30)

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['sdssi'], yerr=subsamples[sub]['sdssi_err'], 
             fmt='o', color=scalarMap.to_rgba(2), alpha=0.5)
plt.title('sdssi')
plt.ylim(10, 30)

plt.figure()
plt.errorbar(subsamples[sub]['Z'], subsamples[sub]['sdssz'], yerr=subsamples[sub]['sdssz_err'], 
             fmt='o', color=scalarMap.to_rgba(3), alpha=0.5)
plt.title('sdssz')
plt.ylim(10, 30)

In [None]:
nog_0 = np.array([x.split("'")[1].strip() for x in subsamples[0].iloc[
                  np.where(subsamples[0]['lsstg'] == 0)[0]]['FILENAME'].values])         

nog_8 = np.array([x.split("'")[1].strip() for x in subsamples[8].iloc[
                  np.where(subsamples[8]['lsstg'] == 0)[0]]['FILENAME'].values])    

In [None]:
def plot_bandpass_set(setname):
    """Plot the given set of bandpasses."""

    bandpass_meta = sncosmo.bandpasses._BANDPASSES.get_loaders_metadata()

    fig = plt.figure(figsize=(15, 4))
    ax = plt.axes()

    nbands = 0
    for m in bandpass_meta:
        if m['filterset'] != setname:
            continue
        b = sncosmo.get_bandpass(m['name'])

        # add zeros on either side of bandpass transmission
        wave = np.zeros(len(b.wave) + 2)
        wave[0] = b.wave[0]
        wave[1:-1] = b.wave
        wave[-1] = b.wave[-1]
        trans = np.zeros(len(b.trans) + 2)
        trans[1:-1] = b.trans

        ax.plot(wave, trans, label=m['name'])
        nbands += 1

    ax.set_xlabel("Wavelength ($\\AA$)", size=12)
    ax.set_ylabel("Transmission", size=12)

    ncol = 1 + (nbands-1) // 9  # 9 labels per column
    ax.legend(loc='upper right', frameon=False, fontsize='x-large',
              ncol=ncol)

    # Looks like each legend column takes up about 0.125 of the figure.
    # Make room for the legend.
    xmin, xmax = ax.get_xlim()
    xmax += ncol * 0.125 * (xmax - xmin)
    ax.set_xlim(xmin, xmax)
    plt.tight_layout()

In [None]:
f = '/Volumes/Backup/galaxy_data5' + nog_8[1]
print(f)
gama_hdu = fits.open(f)
gama_fits = gama_hdu[0]

In [None]:
x_gama = np.arange(0, len(gama_fits.data[0]))
wv_gama = gama_fits.header['CD1_1']*x_gama + gama_fits.header['WMIN']

print(wv_gama[0], wv_gama[-1])

In [None]:
## Quick plot of the spectrum plus the variance. 
# Turn on the the other plotting functions to see what non-calibrated spectra look like. 

plt.figure(figsize=(15, 6))
plot_bandpass_set('lsst')
plt.plot(wv_gama, gama_fits.data[0]/500, label='Spectrum')
# plt.plot(wv_gama, gama_fits.data[1], label=r'$1\sigma$ Error')
#plt.plot(wv_gama, gama_fits.data[0], linewidth=0.5, color='k')
#sigma = np.sqrt(gama_fits.data[1])
#plt.fill_between(wv_gama, gama_fits.data[0] - sigma, gama_fits.data[0] + sigma, alpha=0.5, color='r')
#plt.plot(wv_gama, gama_fits.data[2], label='Non-calibrated Spectrum')
#plt.plot(wv_gama, gama_fits.data[3], label=r'$1\sigma$ Error on Non-calibrated Spectrum')
#plt.plot(wv_gama, gama_fits.data[4], label='Sky Spectrum') 

plt.axhline(0, color='k')

#plt.ylim(-10, 30)
plt.xlim(gama_fits.header['WMIN'], gama_fits.header['WMAX'])

plt.title('GAMA Spectrum')
plt.xlabel(r'Wavelength [$\AA$]', size=13)
plt.ylabel(r'Flux  [$10^{-17}$ erg/s/$cm^2/\AA$]', size=13)


In [None]:
#for i in range(0, 19):
#    print(i, len(subsamples[i].iloc[np.where(subsamples[i]['lsstg'] == 0)[0]])/lens[i] * 100 , z_SLICS[i], lens[i])

In [None]:
plt.figure()
plot_bandpass_set('lsst')
plot_bandpass_set('sdss')

In [None]:
#len(df.loc[(df['NQ'] > 2)]['lsstg']), len(df.loc[(df['NQ'] > 2)]['sdssr'])


#154809

In [None]:
#df.loc[(df['NQ'] > 2)]

In [None]:
#df.iloc[2]

In [None]:
band_err = [name + '_err' for name in bandpasses]

In [None]:
## Open a file from SDSS
# this spectrum was throwing a nan for the error, but that has been fixed

f = '/Volumes/Backup/galaxy_data5/GAMA/dr3/data/spectra/sdss/spec-4022-55352-0880.fit'
print(f)
sdss_hdu = fits.open(f)
sdss_fits = sdss_hdu[0]

In [None]:
x_sdss = np.arange(0, len(sdss_fits.data[0]))
wv_sdss_log = sdss_fits.header['CRVAL1'] + sdss_fits.header['CDELT1']* x_sdss
wv_sdss = 10**(wv_sdss_log)

print(wv_sdss[0], wv_sdss[-1])

In [None]:
plt.figure(figsize=(15, 6))
plot_bandpass_set('lsst')
plt.plot(wv_sdss, sdss_fits.data[0]/400, label='Spectrum')
# plt.plot(wv_sdss, sdss_fits.data[1], label='Variance')
#plt.plot(wv_sdss, sdss_fits.data[0], color='k', linewidth=0.5)
#sigma = sdss_fits.data[1]
#plt.fill_between(wv_sdss, sdss_fits.data[0] - sigma, sdss_fits.data[0] + sigma, alpha=0.5, color='r')
#plt.plot(wv_sdss, sdss_fits.data[2], label='Sky Spectrum')
#plt.plot(wv_sdss, sdss_fits.data[3], label='Best fit model')
#plt.plot(wv_sdss, sdss_fits.data[5], label='Wavelenght dispersion')

plt.axhline(0, color='k')

# plt.legend()

plt.xlim(sdss_fits.header['WMIN'], sdss_fits.header['WMAX'])

plt.title('SDSS Spectrum')
plt.xlabel(r'Wavelength [$\AA$]', size=13)
plt.ylabel(r'Flux  [$10^{-17}$ erg/s/$cm^2/\AA$]', size=13)

In [None]:
subsamples[0].iloc[0]

In [None]:
sncosmo.get_bandpass('sdssr').wave[0], sncosmo.get_bandpass('sdssr').wave[-1]

In [None]:
plt.hist(subsamples[0][subsamples[0]['NQ'].values == 3]['lsstg'], bins=np.arange(15, 25, 0.5), label='lsstg: 3', 
        normed=True, alpha=0.5)

plt.hist(subsamples[0][subsamples[0]['NQ'].values == 4]['lsstg'], bins=np.arange(15, 25, 0.5), label='lsstg: 4', 
        normed=True, alpha=0.5)

plt.hist(subsamples[0][subsamples[0]['NQ'].values == 5]['lsstg'], bins=np.arange(15, 25, 0.5), label='lsstg: 5', 
        normed=True, alpha=0.5)
plt.legend()

### Note to selves: If you're seeing weird things, checkout the NQs

The cell below plots the LSST r/i bands versus the SDSS r/i bands. 

We expect these to give approximately the same result because they cover similar wavelength ranges. 
Most objects that do not agree in SDSS/LSST have large error bars. 
As you move to higher redshift bins, the errors become larger and the disagreements more prevalent. 

In the future, we can look at the affects of using different levels of data quality, i.e. do the analysis with only the NQ = 5 (the best quality redshifts) and compare to the full sample or lower qualities. 

### Also note that you should consider a signal to noise cut and maybe a magnitude cut.

In [None]:
sub = 3
print(z_SLICS[sub])

plt.figure(figsize=(8,8))
plt.errorbar(subsamples[sub][subsamples[sub]['NQ'].values == 3]['lsstr'], 
             subsamples[sub][subsamples[sub]['NQ'].values == 3]['sdssr'],
             xerr=subsamples[sub][subsamples[sub]['NQ'].values == 3]['lsstr_err'],
             yerr=subsamples[sub][subsamples[sub]['NQ'].values == 3]['sdssr_err'],
             fmt='o',
             alpha=0.4, 
             label='r: 3')

plt.errorbar(subsamples[sub][subsamples[sub]['NQ'].values == 4]['lsstr'], 
             subsamples[sub][subsamples[sub]['NQ'].values == 4]['sdssr'],
             xerr=subsamples[sub][subsamples[sub]['NQ'].values == 4]['lsstr_err'],
             yerr=subsamples[sub][subsamples[sub]['NQ'].values == 4]['sdssr_err'],
             fmt='o',
             alpha=0.3, 
             label='r: 4')

plt.errorbar(subsamples[sub][subsamples[sub]['NQ'].values == 5]['lsstr'], 
             subsamples[sub][subsamples[sub]['NQ'].values == 5]['sdssr'],
             xerr=subsamples[sub][subsamples[sub]['NQ'].values == 5]['lsstr_err'],
             yerr=subsamples[sub][subsamples[sub]['NQ'].values == 5]['sdssr_err'],
             fmt='o',
             alpha=0.1, 
             label='r: 5')

plt.legend()
plt.xlim(14, 30)
plt.ylim(14, 30)
plt.xlabel('lsst r', size='x-large')
plt.ylabel('sdss r', size='x-large')

plt.figure(figsize=(8,8))
plt.errorbar(subsamples[sub][subsamples[sub]['NQ'].values == 3]['lssti'], 
             subsamples[sub][subsamples[sub]['NQ'].values == 3]['sdssi'],
             xerr=subsamples[sub][subsamples[sub]['NQ'].values == 3]['lssti_err'],
             yerr=subsamples[sub][subsamples[sub]['NQ'].values == 3]['sdssi_err'],
             fmt='o',
             alpha=0.4, 
             label='r: 3')

plt.errorbar(subsamples[sub][subsamples[sub]['NQ'].values == 4]['lssti'], 
             subsamples[sub][subsamples[sub]['NQ'].values == 4]['sdssi'],
             xerr=subsamples[sub][subsamples[sub]['NQ'].values == 4]['lssti_err'],
             yerr=subsamples[sub][subsamples[sub]['NQ'].values == 4]['sdssi_err'],
             fmt='o',
             alpha=0.3, 
             label='r: 4')

plt.errorbar(subsamples[sub][subsamples[sub]['NQ'].values == 5]['lssti'], 
             subsamples[sub][subsamples[sub]['NQ'].values == 5]['sdssi'],
             xerr=subsamples[sub][subsamples[sub]['NQ'].values == 5]['lssti_err'],
             yerr=subsamples[sub][subsamples[sub]['NQ'].values == 5]['sdssi_err'],
             fmt='o',
             alpha=0.1, 
             label='r: 5')

plt.legend()
plt.xlim(14, 30)
plt.ylim(14, 30)
plt.xlabel('lsst i', size='x-large')
plt.ylabel('sdss i', size='x-large')

### Note: SDSS spans into reader wavelengths leading to possible BIAS in LOW redshifts

SDSS probably fills in the dimmer objects and so there may be a bias in the lower redshift bins for redder wavelengths.  

The code below plots photometry versus redshift separated by SDSS and GAMA. 

SDSS has a tighter distribution with a dimmer mean. They probably pulled SDSS to fill out their completeness for fainter objects. 

For lower redshift bins, there are photometry points in z/y for SDSS but not for GAMA. In the other bands, SDSS trends dim, so we can assume that these bias dim. How does that affect color estimates? It may target different TYPES of galaxies. 

In [None]:
sub=2
survey_info_sub = np.array([x.split("'")[1].strip() for x in subsamples[sub]['SURVEY'].values])

plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'GAMA')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lsstg'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lsstg_err'],
             fmt='o', alpha=0.3,
             label='GAMA')


#plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'SDSS')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lsstg'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lsstg_err'],
             fmt='o', alpha=0.3,
             label='SDSS')


plt.ylim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('z')
plt.ylabel('lsst g Mag')


plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'GAMA')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lsstr'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lsstr_err'],
             fmt='o', alpha=0.3,
             label='GAMA')

#plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'SDSS')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lsstr'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lsstr_err'],
             fmt='o', alpha=0.3,
             label='SDSS')


plt.ylim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('z')
plt.ylabel('lsst r Mag')

plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'GAMA')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lssti'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lssti_err'],
             fmt='o', alpha=0.3,
             label='GAMA')

#plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'SDSS')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lssti'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lssti_err'],
             fmt='o', alpha=0.3,
             label='SDSS')


plt.ylim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('z')
plt.ylabel('lsst i Mag')

plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'GAMA')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lsstz'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lsstz_err'],
             fmt='o', alpha=0.3,
             label='GAMA')

#plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'SDSS')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lsstz'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lsstz_err'],
             fmt='o', alpha=0.3,
             label='SDSS')


plt.ylim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('z')
plt.ylabel('lsst z Mag')

plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'GAMA')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lssty'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'GAMA')]['lssty_err'],
             fmt='o', alpha=0.3,
             label='GAMA')

#plt.figure(figsize=(8, 6))
plt.errorbar(subsamples[sub].loc[(survey_info_sub == 'SDSS')]['Z'],
             subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lssty'],
             yerr=subsamples[sub].loc[(survey_info_sub == 'SDSS')]['lssty_err'],
             fmt='o', alpha=0.3,
             label='SDSS')


plt.ylim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('z')
plt.ylabel('lsst y Mag')

### Is there a correlation with the number of spectra and the quality?

Low quality have 5 or less spectra. However, high quality data can have any number of spectra.

I don't really see anything. 

In [None]:
plt.plot(df['N_SPEC'], df['NQ'], 'o')

plt.axhline(2.9, color='r')

Do they
preserve the N_SPEC from SDSS? Looks like it. 

In [None]:
plt.plot(df.loc[survey_info == 'SDSS']['N_SPEC'], df.loc[survey_info == 'SDSS']['NQ'], 'o')

plt.axhline(2.9, color='r')

In [None]:
plt.figure(figsize=(8,8))
plt.errorbar(subsamples[sub][subsamples[sub]['N_SPEC'].values == 1]['lsstr'], 
             subsamples[sub][subsamples[sub]['N_SPEC'].values == 1]['sdssr'],
             xerr=subsamples[sub][subsamples[sub]['N_SPEC'].values == 1]['lsstr_err'],
             yerr=subsamples[sub][subsamples[sub]['N_SPEC'].values == 1]['sdssr_err'],
             fmt='o',
             alpha=0.1, 
             label='NSPEC 1')

plt.errorbar(subsamples[sub][subsamples[sub]['N_SPEC'].values == 2]['lsstr'], 
             subsamples[sub][subsamples[sub]['N_SPEC'].values == 2]['sdssr'],
             xerr=subsamples[sub][subsamples[sub]['N_SPEC'].values == 2]['lsstr_err'],
             yerr=subsamples[sub][subsamples[sub]['N_SPEC'].values == 2]['sdssr_err'],
             fmt='o',
             alpha=0.1, 
             label='NSPEC 2')

plt.ylim(10,30)
plt.xlim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('lsstr')
plt.ylabel('sdssr')

In [None]:
plt.figure(figsize=(8,8))

for i in range(1, 9):
    plt.errorbar(subsamples[sub][subsamples[sub]['N_SPEC'].values == i]['Z'], 
                 subsamples[sub][subsamples[sub]['N_SPEC'].values == i]['lsstr'],
                 yerr=subsamples[sub][subsamples[sub]['N_SPEC'].values == i]['lsstr_err'],
                 fmt='o',
                 alpha=0.1, 
                 label='NSPEC %s' %i)



plt.ylim(10,30)
#plt.xlim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.ylabel('lsstr')
plt.xlabel('z')

In [None]:
plt.figure(figsize=(8,8))

for i in range(1, 9):
    plt.hist(subsamples[sub][subsamples[sub]['N_SPEC'].values == i]['lsstr'], bins=np.arange(15, 24, 0.25),
             normed=True,
                 alpha=0.3, 
                 label='NSPEC %s' %i)



#plt.ylim(0, 2)
#plt.xlim(10,30)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('lsstr')

In [None]:
plt.figure(figsize=(8,8))

for i in range(1, 9):
    plt.hist(subsamples[sub][(subsamples[sub]['N_SPEC'].values == i) & 
                             (np.isfinite(subsamples[sub]['lsstr_err'].values))]['lsstr_err'], 
            bins=np.arange(0, 0.2, 0.005),
            normed=True,
            alpha=0.3, 
            label='NSPEC %s' %i)

plt.xlim(-0.001, 0.075)
plt.legend()
plt.title('z_bin = %s' % z_SLICS[sub])
plt.xlabel('lsstr')