Get RA, DEC coordinates from downloaded catalogue files from Gaia and SDSS.

In [1]:
from astropy.io.votable import parse_single_table
import numpy as np
import pandas as pd

## Gaia coordinates

In [2]:
ctlg_dir = '../../../data/catalogues'
read_path = f'{ctlg_dir}/gaia/gaia-variable-sources.vot'
save_path = f'{ctlg_dir}/gaia/gaia-coords.csv'

In [3]:
votable = parse_single_table(read_path, columns = ['source_id', 'ra', 'dec'])
table = votable.to_table()
coords = pd.DataFrame(np.array(table['source_id', 'ra', 'dec']))
coords.to_csv(save_path, index=False)



## SDSS coordinates

In [4]:
from astropy.io import fits
from astropy.table import Table
import numpy as np
import pandas as pd
from time import time

In [5]:
read_path = f'{ctlg_dir}/sdss/specObj-dr14-primary.fits'
save_path = f'{ctlg_dir}/sdss/sdss-coords.csv'

# To search for cross-match
search_keys = ['PLUG_RA', 'PLUG_DEC', 'PLATE', 'FIBERID', 'MJD', 'SPECPRIMARY', 'SURVEY']

def add_ids(coords):
    ffill = lambda s: s.zfill(4)
    plate = coords.PLATE.astype(str).apply(ffill)
    fiberid = coords.FIBERID.astype(str).apply(ffill)
    mjd = coords.MJD.astype(str)
    fjoin = lambda arr: '-'.join(arr)
    names = [fjoin(arr) for arr in zip(plate, mjd, fiberid)]
    coords['ID'] = names
    return coords

In [6]:
t0 = time()
hdul = fits.open(read_path, memmap=True)
data = hdul[1].data
table = Table(data)
coords = pd.DataFrame(np.array(table[search_keys]))
coords = add_ids(coords)
coords.to_csv(save_path, index=False)
t1 = time()

msg = 'Total time: '+ '{:.2f}'.format(t1-t0) + ' s.'
print(msg)

Total time: 156.33 s.
