In [6]:
import astroquery
from astroquery.sdss import SDSS
from astropy.table import Table
from astropy.io import fits

import os


In [7]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID']
AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY']


In [8]:
dr16q_filename = '../data/DR16Q_v4.fits'

hdul = fits.open(dr16q_filename)

In [9]:
hdr = hdul[0].header
dr16_data = hdul[1].data

In [10]:
bal_qso_mask = (dr16_data['BAL_PROB'] == 0) & (dr16_data['zWarning'] == 0)
nonbal_qso_hdul = dr16_data[bal_qso_mask]

In [12]:
nonbal_qso_hdul

FITS_rec([('000000.36+070350.8', 1.53536894e-03,  7.06412931, 11279, 58449, 978, 'QSO', 'QSO', 1, 1.56554668, 0, -1., -1, -1, -1, 175096522, -1.   , 0, 0, -1.   , 0, -1., -1, -1., -1., 1, 1.57422745, 'PIPE', 1.57422745, 0, '1237669517441827491', 1.57483247, 0,   395.17112357, -1., 7682, 0., -1., 7682, 0.,  1.57139463,    0,  51.26856656, 1.57383165, 0,  28.66540576, 1.58936037, 0,  121.69651782, -1.        , 7682,    0.        ,  1.5748324, [-1.        , -1.        , -1.        , -1.        , -1.        ], [-1.        , -1.        , -1.        , -1.        , -1.        ], [-1., -1., -1., -1., -1.], 0., 0., 0., 0., 0., 0., 0., 0., 0.,             0, 0, 1024, 0, 0, 0, 0, 0, 0, [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -

In [7]:
col_l = ['SDSS_NAME', 'RA', 'DEC', 'PLATE', 'MJD', 'FIBERID', 'AUTOCLASS_PQN', 'Z', 'BAL_PROB', 'BI_CIV', 'AI_CIV']
nonbal_qso_df = Table(nonbal_qso_hdul)[col_l].to_pandas()

In [8]:
nonbal_qso_samp_df = nonbal_qso_df.sample(8000, random_state = 1234).reset_index(drop=True)

In [9]:
nonbal_qso_samp_df

Unnamed: 0,SDSS_NAME,RA,DEC,PLATE,MJD,FIBERID,AUTOCLASS_PQN,Z,BAL_PROB,BI_CIV,AI_CIV
0,111438.82+360548.1,168.661785,36.096720,4622,55629,228,QSO,2.468000,0.0,0.0,0.0
1,152538.86+100329.5,231.411935,10.058197,5493,56009,344,QSO,2.241000,0.0,0.0,0.0
2,012017.97+214854.4,20.074892,21.815122,5134,55868,866,QSO,2.774000,0.0,0.0,0.0
3,232436.84+175513.8,351.153518,17.920526,6131,56211,476,QSO,2.275000,0.0,0.0,0.0
4,103619.33+105232.4,159.080553,10.875686,5346,55955,784,QSO,3.092000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
7995,112048.90+143625.4,170.203752,14.607079,5367,55986,248,QSO,2.699000,0.0,0.0,0.0
7996,222607.53+230246.5,336.531390,23.046273,7583,56958,988,QSO,1.712627,0.0,0.0,0.0
7997,013012.41+302637.8,22.551726,30.443851,7730,58107,644,QSO,1.747934,0.0,0.0,0.0
7998,105849.70+001040.4,164.707087,0.177899,3836,55302,653,QSO,2.539000,0.0,0.0,0.0


In [10]:
nonbal_qso_samp_df.to_csv('../data/nonbal_qso_samp.csv')