# Create HDF5 archive of NASA IRTF template spectra

In [30]:
import matplotlib.pyplot as plt
import numpy as np
from astropy.io import fits
import h5py
from astropy.utils.data import download_file
from tarfile import TarFile
import os
from glob import glob

url_g = 'http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/G_fits_091201.tar'
url_k = 'http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/K_fits_091201.tar'
url_m = 'http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/M_fits_091201.tar'

hdf5_archive_path = 'data/irtf_templates.hdf5'

urls = [url_g, url_k, url_m]

Download and extract FITS archives of G/K/M template stars: 

In [14]:
for url in urls: 
    p = download_file(url)
    with TarFile(p, 'r') as tar_ref:
        tar_ref.extractall('data/.')

Downloading http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/G_fits_091201.tar [Done]
Downloading http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/K_fits_091201.tar [Done]
Downloading http://irtfweb.ifa.hawaii.edu/~spex/IRTF_Spectral_Library/Data/M_fits_091201.tar [Done]


In [26]:
fits_paths = glob('data/*/*.fits')
fits_names = [os.path.basename(p) for p in fits_paths]
sptypes = [p.split('_')[0] for p in fits_names]
sptypes_ms = [len(st) == 3 and st.endswith('V') for st in sptypes]
ms_paths = [p for p, st in zip(fits_paths, sptypes_ms) if st]

Create HDF5 archive of the same data: 

In [96]:
if not os.path.exists(hdf5_archive_path):

    with h5py.File(hdf5_archive_path, 'w') as f: 
        templates = f.create_group('templates')

        for p in ms_paths:

            header = fits.getheader(p)
            target_name = header['OBJECT']
            sptype = header['SPTYPE'].replace(' ', '')

            if not sptype in list(templates):
                data = fits.getdata(p)

                data = np.vstack([data[0, :], data[1, :], data[2, :]]).T
                not_nans = np.logical_not(np.any(np.isnan(data), axis=1))

                dset = templates.create_dataset(sptype, data=data[not_nans, :], 
                                                compression='gzip')
                for key, val in header.items():
                    dset.attrs[key] = val

Then delete the directories of FITS templates:

In [99]:
import shutil

for directory in glob('data/*fits*'):
    shutil.rmtree(directory)