In [2]:
import numpy as np
import pandas as pd
import glob
import os
import sys
import datetime
from collections import Counter

## Visualizing the STIS Data Archive

### Storing metadata in an pandas dataframe
Currently, we can grab higher level metadata from MAST, the below code was written by Sean Lockwood and constructs a pandas dataframe of STIS metadata out of all STIS science observations. This doesn't yet take advantage of astroquery's MAST API, but that is on the to-do list. Once lower-level metadata comes to astroquery (Hopefully Fall 2019?) we will wrap that into this project.

In [24]:
def download_mast_metadata(datatype='S', instrument='STIS',output_csv=False):
    '''
    Downloads all HST/{STIS,COS} science metadata from MAST.
    
    'datatype' is one of:
        'S'         -- science obserations (default)
        'C'         -- calibration observations
        '%' | 'ALL' -- both science and calibration observations
    
    Mast documentation:
        MAST GET Requests:      https://archive.stsci.edu/vo/mast_services.html#GET
        HST-specific keywords:  https://archive.stsci.edu/search_fields.php?mission=hst
    '''
    import urllib.request
    import urllib.parse
    
    # Determine if we want 'science', 'calibration', or 'all' datasets:
    datatype = datatype.upper()
    assert datatype in ['S', 'C', '%', 'ALL'], "'datatype' is not a valid selection."
    if datatype == 'ALL':
        datatype = '%'
    
    url = 'https://archive.stsci.edu/hst/search.php'
    
    # Output columns
    selectedColumnsCsv = \
        'sci_data_set_name,'      + \
        'sci_obset_id,'           + \
        'sci_targname,'           + \
        'sci_start_time,'         + \
        'sci_stop_time,'          + \
        'sci_actual_duration,'    + \
        'sci_instrume,'           + \
        'sci_instrument_config,'  + \
        'sci_operating_mode,'     + \
        'sci_aper_1234,'          + \
        'sci_spec_1234,'          + \
        'sci_central_wavelength,' + \
        'sci_fgslock,'            + \
        'sci_mtflag,'             + \
        'sci_pep_id,'             + \
        'sci_aec,'                + \
        'sci_obs_type,'           + \
        'scp_scan_type'
    
    # Loop year-by-year to avoid data limits:
    all_years = []
    for year in np.arange(1997, datetime.datetime.now().year + 1):
        print ('Working on {}...'.format(year))
        data = [ \
            ('sci_instrume',       instrument), 
            ('sci_aec',            datatype), 
            ('sci_start_time',     'Jan 1 {} .. Jan 1 {}'.format(year, year+1)), 
            ('max_records',        '25000'), 
            ('ordercolumn1',       'sci_start_time'), 
            ('outputformat',       'JSON'), 
            ('selectedColumnsCsv', selectedColumnsCsv), 
            ('nonull',             'on'), 
            ('action',             'Search'), ]
        
        try:
            url_values = urllib.parse.urlencode(data)
            full_url = url + '?' + url_values
            #print (full_url)
            with urllib.request.urlopen(full_url) as response:
                json_file = response.read()
            
            # Convert to Pandas table:
            all_years.append(pd.read_json(json_file.decode()))
        except ValueError:
            pass  # Sad years with no data
    
    # Concatenate individual years together:
    mast = pd.concat(all_years)
    
    # Modify/add some rows:
    mast['Start Time'] = [datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') for x in mast['Start Time']]
    mast['obstype'] = ['Imaging' if 'MIR' in x else 'Spectroscopic' for x in mast['Filters/Gratings']]
    mast.loc[mast['Apertures'] == '50CORON', 'obstype'] = 'Coronagraphic'
    mast['Instrument Config'] = [x.strip() for x in mast['Instrument Config']]
    
    if output_csv == True:
        mast.to_csv("stis_archive.csv")
    return mast

generate = False
if generate:
    mast = download_mast_metadata(instrument='STIS', output_csv=True)  # Or analyze 'COS'
else:
    mast = pd.read_csv("stis_archive.csv")
    mast = mast[mast.keys()[1:]]
print ('Number rows:  {}'.format(len(mast)))

Number rows:  108231


  interactivity=interactivity, compiler=compiler, result=result)


In [25]:
mast

Unnamed: 0,AEC,Apertures,Central Wavelength,Dataset,Exp Time,FGS Lock,Filters/Gratings,Instrument,Instrument Config,MT Flag,Obs Type,Obset ID,Operating Mode,Proposal ID,Scan Type,Start Time,Stop Time,Target Name,obstype
0,S,50CCD,5735.862,O3SX01040,12.0,FINE,MIRVIS,STIS,STIS/CCD,,IMAGE,01,ACCUM,7065,,1997-03-03 11:10:58,1997-03-03 11:11:53,NGC-188-95,Imaging
1,S,50CCD,5733.716,O3SX01AMQ,6.0,FINE,MIRVIS,STIS,STIS/CCD,,IMAGE,01,ACCUM,7065,,1997-03-03 11:13:14,1997-03-03 11:13:20,NGC-188-95,Imaging
2,S,50CCD,5733.716,O3SX01ANQ,6.0,FINE,MIRVIS,STIS,STIS/CCD,,IMAGE,01,ACCUM,7065,,1997-03-03 11:14:04,1997-03-03 11:14:10,NGC-188-95,Imaging
3,S,50CCD,5733.716,O3SX01AOQ,6.0,FINE,MIRVIS,STIS,STIS/CCD,,IMAGE,01,ACCUM,7065,,1997-03-03 11:15:33,1997-03-03 11:15:39,NGC-188-95,Imaging
4,S,50CCD,5733.716,O3SX01APQ,6.0,FINE,MIRVIS,STIS,STIS/CCD,,IMAGE,01,ACCUM,7065,,1997-03-03 11:16:23,1997-03-03 11:16:29,NGC-188-95,Imaging
5,S,50CCD,5735.862,O3SX01010,12.0,FINE,MIRVIS,STIS,STIS/CCD,,IMAGE,01,ACCUM,7065,,1997-03-03 11:43:21,1997-03-03 11:44:17,NGC-188-95,Imaging
6,S,F28X50OII,3737.553,O3TT01010,0.2,FINE/GYRO,MIRVIS,STIS,STIS/CCD,,IMAGE,01,ACCUM,7063,,1997-03-03 17:49:02,1997-03-03 17:49:47,G191B2B,Imaging
7,S,6X6,2375.000,O3TT01020,144.0,FINE/GYRO,G230LB,STIS,STIS/CCD,,SPECTRUM,01,ACCUM,7063,,1997-03-03 17:54:27,1997-03-03 17:57:35,G191B2B,Spectroscopic
8,S,6X6,4300.000,O3TT01030,144.0,FINE/GYRO,G430L,STIS,STIS/CCD,,SPECTRUM,01,ACCUM,7063,,1997-03-03 18:02:52,1997-03-03 18:08:59,G191B2B,Spectroscopic
9,S,6X6,7751.000,O3TT01040,792.0,FINE/GYRO,G750L,STIS,STIS/CCD,,SPECTRUM,01,ACCUM,7063,,1997-03-03 18:14:16,1997-03-03 18:28:12,G191B2B,Spectroscopic


In [47]:
def filter_data(mast, years=1):
    past_cutoff = datetime.datetime.now() - datetime.timedelta(days=365 * years)
    print (past_cutoff)
    
    # Filter on date and remove ACQs:
    start_times = np.array([datetime.datetime.strptime(str(start_time), "%Y-%m-%d %H:%M:%S") for start_time in mast['Start Time']])
    mast_filtered = mast[(start_times >= past_cutoff) & \
            (['ACQ' not in x for x in mast['Operating Mode']]) & \
            (mast['AEC'] == 'S') & \
            (mast['Target Name'] != 'ANY') & \
            (mast['Target Name'] != 'DARK') & \
            (mast['Target Name'] != 'BIAS') & \
            (mast['Target Name'] != 'FLAT') & \
            (mast['Target Name'] != 'LAMP') & \
            (mast['Target Name'] != 'CCDFLAT') & \
            (mast['Target Name'] != 'NONE')]

    
    return mast_filtered

def stats(mast_metadata, years=1, details='obstype'):
    assert details in ['obstype', 'Filters/Gratings'], 'Detail view not supported!'
    
    df = filter_data(mast_metadata, years)
    
    print ('Past {} year(s) in MAST (non-calibration):'.format(years))
    print ('Detector, Operating Mode, {}, # Archive Entries, Summed Exptime, % Exptime on Detector\n'.format(details))
    
    for detector, op_modes in df.groupby(['Instrument Config']):
        detector = detector.rsplit('/',1)[1]
        for op_mode, op_mode_group in op_modes.groupby(['Operating Mode']):
            for grating, grating_group in op_mode_group.groupby([details]):
                print ('{:10}  {:10}  {:15}  {:5}   {:12.2f}   {:5.1f}%'.format(
                    detector, op_mode, grating, len(grating_group), 
                    np.sum(grating_group['Exp Time']), 
                    np.sum(grating_group['Exp Time'])/np.sum(op_modes['Exp Time']) * 100.))
                grating = '"'
                op_mode = '"'
                detector = '"'
        print ()
    print ('-'*80 + '\n')
    
stats(mast, 1)
stats(mast, 25)

2018-05-29 11:36:23.442856
Past 1 year(s) in MAST (non-calibration):
Detector, Operating Mode, obstype, # Archive Entries, Summed Exptime, % Exptime on Detector

CCD         ACCUM       Coronagraphic      336      125913.27    40.6%
"           "           Imaging             31         838.77     0.3%
"           "           Spectroscopic      433      183079.25    59.1%

FUV-MAMA    ACCUM       Imaging              9        3600.00     0.4%
"           "           Spectroscopic      216      261620.68    27.1%
"           TIME-TAG    Imaging             52      125253.55    13.0%
"           "           Spectroscopic      293      573692.93    59.5%

NUV-MAMA    ACCUM       Imaging             38       31592.00     5.0%
"           "           Spectroscopic      306      491912.47    77.3%
"           TIME-TAG    Spectroscopic       64      112938.11    17.7%

--------------------------------------------------------------------------------

1994-06-04 11:36:24.854797
Past 25 year(s) 