In [24]:
import numpy as np
import pandas as pd
from astropy.io import fits
import glob
import os, re
from bs4 import BeautifulSoup 

In [9]:
## Define paths
basepath = '/Volumes/Seagate/MUSE'
datapath = f'{basepath}/raw_data'
sofpath = f'{basepath}/sof'
prepath = f'{basepath}/scibasic_out'
postpath = f'{basepath}/scipost_out'
calpath = '/Users/isabelkain/Desktop/MUSE/share/esopipes/datastatic/muse-2.8.5'


## SCIBASIC SOF

In [5]:
## Grab XML filetrees -- this tells where all the data required
## for preprocessing lives
xml_files = glob.glob(f'{datapath}/*.xml*', recursive=True)
xml_files

['/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21T06_23_20.820.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21T06_32_13.819.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21T07_07_42.823.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21T07_15_08.822.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21T07_22_34.820.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21T08_09_06.822.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T06_27_43.821.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T06_30_27.819.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T06_37_55.822.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T06_45_23.822.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T06_52_51.821.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T06_59_25.822.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T07_32_10.822.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22T07_42_58.820.xml',
 '/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-22

In [44]:
fname_key = pd.DataFrame(columns=['Filename', 'Tag'])

In [48]:
xml.split('/')[-1].split('.xml')[0]

'MUSE.2018-06-22T09_05_57.128'

In [50]:
## Iterate through XML files. Read pointers in each XML file. 
## From each XML file, build one SOF file containing paths and filetypes for each pointed file.

for i, xml in enumerate(xml_files):

    ## Open, parse XML file

    with open(xml_files[i], 'r') as f:
        data = f.read()
        f.close()
        
    ## Save name of XML file, number tag
    fname_key = fname_key.append({'Filename':xml.split('/')[-1].split('.xml')[0], 'Tag':i+1}, ignore_index=True)

    bs_data = BeautifulSoup(data, 'xml')

    ## Pull filenames and types from XML tree, save to pandas DF

    filenames = []
    filetypes = []

    filedivs = bs_data.find_all('file')

    for line in filedivs:

        ## Pull filename and type from XML tree
        catg = line['category']
        name = line['name'].replace(':', '_')

        ## Use XML filename to search for actual filename in data directory
        ## (there are some weird formatting differences)
        find_name = glob.glob(f'{datapath}/*{name}*.fits*')

        if len(find_name) != 1:
            print('ERROR: more than 1 filename match.', find_name)
            continue

        filenames.append(find_name[0])
        filetypes.append(catg)

    sofDF = pd.DataFrame(np.array([filenames, filetypes]).T, columns=('Filenames', 'Filetypes'))


    ## Drop multiple frames for ILLUM, ___ (max allowed: 1 per image)
    sofDF.drop_duplicates(subset='Filetypes', inplace=True, ignore_index=True)

    if i==1:
        display(sofDF)
    
    
#     ## Check if all files the XML tree points to can been found

#     check = []

#     for pth in sofDF['Filenames']:
#         check.append(os.path.exists(pth))

#     print('All pointer files found in', xml_files[i], np.all(check))

#     ## Write .sof file if passes check

#     if check:
#         sofDF.to_csv(f'{sofpath}/scibasic_{i+1}.sof', header=None, index=None, sep=' ')

Unnamed: 0,Filenames,Filetypes
0,/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21...,OBJECT
1,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2019-09-...,ASTROMETRY_REFERENCE
2,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2019-03-...,ASTROMETRY_WCS
3,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2014-06-...,EXTINCT_TABLE
4,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2014-06-...,FILTER_LIST
5,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2017-05-...,GEOMETRY_TABLE
6,/Volumes/Seagate/MUSE/raw_data/MUSE.2018-04-21...,ILLUM
7,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2019-03-...,LSF_PROFILE
8,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2018-04-...,MASTER_BIAS
9,/Volumes/Seagate/MUSE/raw_data/M.MUSE.2018-04-...,MASTER_DARK


## SCIPOST SOF

In [111]:
folders = glob.glob(f'{prepath}/*/', recursive=False)

if len(folders)==0:
    raise ValueError('Preprocessing output not found, or not in expected file structure. Please organize output files or run muse_scibasic.')
    
folders

['/Volumes/Seagate/MUSE/scibasic_out/sof1/']

In [108]:
## Build SOF files for muse_scipost

for fld in folders:
    
    ## Determine file number
    
    fstr = fld.split('/')[-2]
    num = int(re.findall(r'\d+', fstr)[0])
    print(fstr, num)
    
    
    ## Copy contents of scibasic_N.sof
    
    df = pd.read_csv(f'{sofpath}/scibasic_{num}.sof', delimiter=' ', names=['Filename', 'Type'])
    
    
    ## Add PIXELTABLES (output from muse_scibasic)

    pxtables = glob.glob(f'{folders[0]}*PIXTABLE*.fits')
    
    if len(pxtables)==0:
        raise ValueError('Preprocessing output not found. Please run muse_scibasic.')

    newrows = pd.DataFrame({'Filename':pxtables, 'Type':['PIXTABLE_OBJECT']*len(pxtables)})
    df = df.append(newrows, ignore_index=True)
                   
    
    ## Add LSF_PROFILE (need to dynamically create, or ok to use static calib?)
    if np.sum(df['Type']=='LSF_PROFILE')==0:
        df = df.append({'Path':f'{calpath}/lsf_profile_slow_nfm-ao-n.fits', 'Type':'LSF_PROFILE'}, ignore_index=True)
    
    ## Add ASTROMETRY_WCS
    if np.sum(df['Type']=='ASTROMETRY_WCS')==0:
        df = df.append({'Path':f'{calpath}/astrometry_wcs_nfm.fits', 'Type':'ASTROMETRY_WCS'}, ignore_index=True)
    
    ## Add FILTER_LIST
    if np.sum(df['Type']=='FILTER_LIST')==0:
        df = df.append({'Path':f'{calpath}/filter_list.fits', 'Type':'FILTER_LIST'}, ignore_index=True)
   
    ## Add SKY_LINES
    if np.sum(df['Type']=='SKY_LINES')==0:
        df = df.append({'Path':f'{calpath}/sky_lines.fits', 'Type':'SKY_LINES'}, ignore_index=True)

    ## Add EXTINCT_TABLE
    if np.sum(df['Type']=='EXTINCT_TABLE')==0:
        df = df.append({'Path':f'{calpath}/extinct_table.fits', 'Type':'EXTINCT_TABLE'}, ignore_index=True)
    
    ## Add STD_RESPONSE
    if np.sum(df['Type']=='STD_RESPONSE')==0:
        df = df.append({'Path':f'{calpath}/std_response_nfm-ao-n.fits', 'Type':'STD_RESPONSE'}, ignore_index=True)
    
    ## Save as scipost_N.sof
    df.to_csv(f'{sofpath}/scipost_{num}.sof', header=None, index=None, sep=' ')

sof1 1


In [None]:
## add LSF_PROFILE files

for fld in folders:
    
    df = pd.DataFrame(columns=['Path', 'Type'])

In [65]:
keys = pd.read_csv('/Volumes/Seagate/MUSE/filenames_key.txt', delimiter=' ')
keys.loc[keys['Tag']==1, 'Filename'][0]

'MUSE.2018-04-21T06_23_20.820'

In [85]:
scibasic_sof = pd.read_csv(f'{sofpath}/scibasic_{1}.sof', delimiter=' ', names=['Filename', 'Type'], header=None)
scibasic_sof.loc[scibasic_sof['Type']=='STD_TELLURIC', 'Filename']

14    /Volumes/Seagate/MUSE/raw_data/M.MUSE.2018-04-...
Name: Filename, dtype: object

In [None]:
PIXTABLE_OBJECT_0001-01.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-02.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-03.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-04.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-05.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-06.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-07.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-08.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-09.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-10.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-11.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-12.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-13.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-14.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-15.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-16.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-17.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-18.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-19.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-20.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-21.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-22.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-23.fits PIXTABLE_OBJECT
PIXTABLE_OBJECT_0001-24.fits PIXTABLE_OBJECT
cal/LSF_PROFILE-01.fits LSF_PROFILE
cal/LSF_PROFILE-02.fits LSF_PROFILE
cal/LSF_PROFILE-03.fits LSF_PROFILE
cal/LSF_PROFILE-04.fits LSF_PROFILE
cal/LSF_PROFILE-05.fits LSF_PROFILE
cal/LSF_PROFILE-06.fits LSF_PROFILE
cal/LSF_PROFILE-07.fits LSF_PROFILE
cal/LSF_PROFILE-08.fits LSF_PROFILE
cal/LSF_PROFILE-09.fits LSF_PROFILE
cal/LSF_PROFILE-10.fits LSF_PROFILE
cal/LSF_PROFILE-11.fits LSF_PROFILE
cal/LSF_PROFILE-12.fits LSF_PROFILE
cal/LSF_PROFILE-13.fits LSF_PROFILE
cal/LSF_PROFILE-14.fits LSF_PROFILE
cal/LSF_PROFILE-15.fits LSF_PROFILE
cal/LSF_PROFILE-16.fits LSF_PROFILE
cal/LSF_PROFILE-17.fits LSF_PROFILE
cal/LSF_PROFILE-18.fits LSF_PROFILE
cal/LSF_PROFILE-19.fits LSF_PROFILE
cal/LSF_PROFILE-20.fits LSF_PROFILE
cal/LSF_PROFILE-21.fits LSF_PROFILE
cal/LSF_PROFILE-22.fits LSF_PROFILE
cal/LSF_PROFILE-23.fits LSF_PROFILE
cal/LSF_PROFILE-24.fits LSF_PROFILE
std/STD_TELLURIC_moffat.fits STD_TELLURIC
cal/astrometry_wcs.fits ASTROMETRY_WCS
cal/filters.fits FILTER_LIST
cal/sky_lines.fits SKY_LINES
cal/extinction_paranal.fits EXTINCT_TABLE
std/STD_RESPONSE_moffat.fits STD_RESPONSE

In [38]:
raise ImportError('These files do not exist.')

ImportError: These files do not exist

In [39]:
raise UserError

NameError: name 'UserError' is not defined