# Download alerts and related info

In [23]:
from bs4 import BeautifulSoup
from astropy.utils.data import download_file
from astropy.table import Table

Download the `tar.gz` file with all of the light curves

In [3]:
alerts_url = 'https://archive.stsci.edu/prepds/tess-data-alerts/'
alerts_path = download_file(alerts_url)

Downloading https://archive.stsci.edu/prepds/tess-data-alerts/ [Done]


In [26]:
csv_url = 'https://archive.stsci.edu/hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_alert-summary-s01+s02_tess_v5_spoc.csv'
csv_table = Table.read(download_file(csv_url), format='csv')

Downloading https://archive.stsci.edu/hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_alert-summary-s01+s02_tess_v5_spoc.csv [Done]


In [56]:
csv_table.add_index('#tic_id')

In [4]:
bs = BeautifulSoup(open(alerts_path, 'r', encoding="utf-8"), 'html.parser')

In [92]:
import h5py

archive = h5py.File('archive.hdf5', 'r')
for group in ['1', '2']:
    if not group in archive:
        archive.create_group(group)

<HDF5 group "/2" (0 members)>

In [93]:
from toolkit import LightCurve, concatenate_light_curves

In [98]:
%matplotlib inline
import matplotlib.pyplot as plt
from astropy.io import fits
import numpy as np

base_url = 'https://archive.stsci.edu/'

for table_row in bs.find_all('tr')[13:-7]: 
    data = table_row.find_all('td')
    links = table_row.find_all('a')
    tic_id = data[0].text
    sector_id = data[2].text
    
    lc_link = None
    for link in links: 
        if link.get('href').endswith('lc.fits'):
            lc_link = base_url + link.get('href')
        
    if lc_link is not None and tic_id not in list(archive[sector_id]):
        rel_path = '{0}/{1}'.format(sector_id, tic_id) 
        if not rel_path in archive: 
            path = download_file(lc_link)

            f = fits.getdata(path)
            header = fits.getheader(path)

            lc = LightCurve(times=f['TIME'][~np.isnan(f["TIME"]) & ~np.isnan(f["PDCSAP_FLUX"]) & (f['PDCSAP_FLUX'] != 0)], 
                            fluxes=f['PDCSAP_FLUX'][~np.isnan(f["TIME"])& ~np.isnan(f["PDCSAP_FLUX"]) & (f['PDCSAP_FLUX'] != 0)], 
                            name=header['OBJECT'])
            data = np.vstack([lc.times.jd, lc.fluxes])
            dset = archive.create_dataset(rel_path, data=data, compression='lzf')        

Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_00032090583-s01_tess_v1_lc.fits [Done]
Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_00032090583-s02_tess_v1_lc.fits [Done]
Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_00038846515-s01_tess_v1_lc.fits [Done]
Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_00038846515-s02_tess_v1_lc.fits [Done]
Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_00051912829-s01_tess_v1_lc.fits [Done]
Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_00051912829-s02_tess_v1_lc.fits [Done]
Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_tess-data-alerts_tess_phot_00052204645-s01_tess_v1_lc.fits [Done]
Downloading https://archive.stsci.edu//hlsps/tess-data-alerts/hlsp_te

In [125]:
for sector in archive: 
    for target in archive[sector]: 
        entry = csv_table.loc[int(target)]
        
        colnames = list(entry.colnames)
        colnames.pop(colnames.index('Disposition'))
        
        keys, values = colnames, [entry[col] for col in colnames]
        for k, v in zip(keys, values):
            if not isinstance(v, str): 
                archive[sector][target].attrs[k] = v

In [129]:
archive.close()