# Exploring the __GAMA__ spectroscopic catalog
notebook by _Alex Malz (GCCL@RUB)_, (add your name here)

In [None]:
from astropy.io import fits
import corner
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import urllib.request as ur

%matplotlib inline

We want to get the spectra of galaxies matching conditions found [here](http://www.gama-survey.org/dr3/schema/table.php?id=31), specifically sorting them by redshift and redshift quality as well as field, since we probably only want to get environment statistics for subsets by location.

In [None]:
with fits.open('SpecObj.fits') as hdul:
    hdul.info()
#     print(hdul[1].header)
    df = pd.DataFrame(np.array(hdul[1].data).byteswap().newbyteorder())
    print(df.columns)
    df.index = df['CATAID']

In [None]:
plt.hist(df['Z'], bins=[0., 0.042, 0.080, 0.130, 0.221, 0.317, 0.418, 0.525, 0.640, 0.764, 0.897, 1.041, 1.199, 1.372, 1.562, 1.772, 2.007, 2.269, 2.565, 2.899, 3.])
plt.semilogy()
plt.xlabel('redshift')

In [None]:
for i in range(5):
    quality = df.loc[df['NQ'] == i+1, 'Z']
    plt.hist(quality, alpha=0.5, label=str(i+1))
plt.legend(loc='upper right')
plt.semilogy()
plt.xlim(0., 3.5)
plt.xlabel('Z')
plt.title('redshift distributions by quality flag "NQ"')

In [None]:
corner.corner(np.array([df['RA'], df['DEC']]).T, labels=['RA', 'DEC'], show_titles=True)

## select and download spectra based on redshift and angular coordinates

GAMA DR3 recommends NQ > 2 for science, and environment has to be done locally for each field in the sky.

In [None]:
RA_bin_ends = [0., 80., 160., 200., 360.]
subsamples, lens = [], []
for i in range(len(RA_bin_ends)-1):
    subsamples.append(df.loc[(df['RA'] >= RA_bin_ends[i]) & (df['RA'] < RA_bin_ends[i+1]) & (df['NQ'] > 2), ['RA', 'DEC', 'Z', 'NQ', 'URL', 'FILENAME']])
    lens.append(len(subsamples[-1]))

In [None]:
subset = np.argmin(lens)
chosen_ind = subsamples[subset].sample(1, random_state=42).index[0]
# chosen_ind = subsamples[subset].sample(1).index[0]
subsamples[subset].URL[chosen_ind].decode('ascii')

In [None]:
just_url = subsamples[subset].URL[chosen_ind].decode('ascii')
just_fn = subsamples[subset].FILENAME[chosen_ind].decode('ascii')[1:]
os.makedirs(os.path.dirname(just_fn), exist_ok=True)
spectrum = ur.urlretrieve(just_url, just_fn)

In [None]:
with fits.open(just_fn) as hdul:
    arr = np.array(hdul[0].data).byteswap().newbyteorder()
    metadata = hdul[0].header

### How do we find the wavelengths of the pixels in these spectra?

Just combine the info in the header with the [WCS conventions](http://iraf.noao.edu/projects/fitswcs/spec3d.html).

~~### [GAMA DR3 file server says](http://www.gama-survey.org/dr3/data/)~~

~~These fits files contain the fully reduced and flux-calibrated spectrum, the reduced spectrum without the flux calibration, the corresponding error arrays, and the mean sky spectrum of the field from which this spectrum was taken.~~

~~### [The original GAMA paper says](https://academic.oup.com/mnras/article/430/3/2047/980679)~~

~~AAOmega possesses a dual beam system which allows coverage of the wavelength range from 3750  to 8850 Å with the 5700 Å dichroic used by GAMA, in a single observation. 
Each arm of the AAOmega system is equipped with a 2k × 4k E2V CCD detector and an AAO2 CCD controller. 
The blue arm CCD is thinned for improved blue response. 
The red arm CCD is a low fringing type. 
The grating used in the blue arm is the 580V, centred at 4800 Å, which has a dispersion of 1 Å pixel−1 and gives a coverage of 2100 Å. 
The 385R grating is used in the red arm, centred at 7250 Å. 
This grating has a dispersion of 1.6 Å pixel−1 and gives a coverage of 3200 Å. 
This leads to spectra with a resolution that varies as a function of wavelength, from R ≈ 1000 at the blue end up to R ≈ 1600 at the red end.~~

In [None]:
metadata

The function below that implements the WCS conversions works on all the GAMA spectra obtained with the AAOmega instrument, but it fails on spectra for others (SDSS, I'm lookin' at you) because they're not using the same WCS onventions.
This is an incomplete attempt to get it working for all data sources.

In [None]:
def linfunc(pix, metadata):
    if metadata['CRVAL1'] < 10.:
        central = 10. ** metadata['CRVAL1']
        dispersion = 10. ** metadata['CD1_1']
    else:
        central = metadata['CRVAL1']
        dispersion = metadata['CD1_1']
    wav = central + dispersion * (pix - metadata['CRPIX1'])
    return wav

In [None]:
pixels = np.arange(1, metadata['NAXIS1'] + 1)
wavelengths = linfunc(pixels, metadata)
try:
    assert(np.isclose(wavelengths[0], metadata['WMIN']) and np.isclose(wavelengths[-1], metadata['WMAX']))
except:
    print('Sometimes the sanity check fails for spectra from other surveys. . .')
    print(((wavelengths[0], metadata['WMIN']), (wavelengths[-1], metadata['WMAX'])))

In [None]:
# plt.plot(wavelengths, arr[0], linewidth=0.2, alpha=0.5)
plt.errorbar(wavelengths, arr[0], yerr=arr[1], linewidth=0.2, alpha=0.5)
plt.xlabel(r'wavelength ($\AA$)')# metadata['CUNIT1']
plt.ylabel(r'flux ($10^{-17} erg/s/cm^{2}/\AA$)')# metadata['ARRAY1']

# Next steps


## download and summarize more spectra!

## choose and implement a measure of "environment"

## multiply spectra by photometric filter transmission curves to get colors

## construct redshift-environment-SED/color relationship