<img style="float: center;" src='https://github.com/STScI-MIRI/MRS-ExampleNB/raw/main/assets/banner1.png' alt="stsci_logo" width="900px"/> 

<a id="title_ID"></a>
# Store JWST data #

**Goal:** Store JWST data into folders based on a logical architecture

**Author:** Boris Trahin, Staff Scientist II, MIRI team

**Last updated:** August 1st, 2024

<div class="alert alert-block alert-info">
How to use:

- Change input_folder and output_folder below.

- Code will find all FITS and ECSV files and store them in a corresponding folder based on the following structure:<br>
&emsp;→ Output_dir <br>
&emsp;&emsp;↳ Program ID <br>
&emsp;&emsp;&emsp;↳ Source name <br>
&emsp;&emsp;&emsp;&emsp;↳ Instrument (MIRI, NIRCam, NIRSpec, NIRISS, FGS) <br>
&emsp;&emsp;&emsp;&emsp;&emsp;↳ Mode (Imaging, MRS, etc.) <br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;↳ Observation number <br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;↳ Array or subarray (FULL, BRIGHTSKY, etc.) <br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;↳ Observation type (Background, Science or Imprint) <br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;↳ (OPTIONAL: Filter, Channel, Band) <br>
&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;↳ Pipeline stage <br>

- By default all filters and/or channel/band data are stored in a same stage folders. One can turn on the corresponding *_folder parameter below.

- Other files (i.e. json) will be placed in an Other_files folder
</div>

***
## Import packages

In [6]:
import glob
import os
import shutil
from astropy.io import fits

***
## Get input and output folders

In [7]:
input_folder = '/Users/btrahin/Desktop/MAST_2024-08-06T13_56_50.582Z'
output_folder = '/Users/btrahin/Data/'

filter_folder = False
channel_folder = False
band_folder = False

***
## Store FITS files

In [8]:
# do fits files first
fits_files = glob.glob(input_folder + '/**/*.fits', recursive=True)

for f in fits_files:
    with fits.open(f) as hdu:
        # store data in corresponding PID folder
        program = hdu[0].header['PROGRAM']

        # store data in corresponding source name folder
        # based on TARGNAME
        if 'TARGNAME' in hdu[0].header.keys():
            if hdu[0].header['TARGNAME'] != '':
                source = hdu[0].header['TARGNAME'].strip(' ').replace(' ', '_')
            else:
                source = hdu[0].header['TARGPROP'].strip(' ').replace(' ', '_')
        
        # based on TARGPROP
        # if 'TARGPROP' in hdu[0].header.keys():
        #     if hdu[0].header['TARGPROP'] != '':
        #         source = hdu[0].header['TARGPROP'].strip(' ').replace(' ', '_')
        #     else:
        #         source = hdu[0].header['TARGNAME'].strip(' ').replace(' ', '_')

        # store data in corresponding instrument, mode and subarray folder
        instru = hdu[0].header['INSTRUME']
        mode = hdu[0].header['EXP_TYPE']
        subarray = hdu[0].header['SUBARRAY']
        ppsaperture = hdu[0].header['PPS_APER']

        # Check if simultaneous observations
        if any(i in mode for i in ['IMG', 'IMAGE']) and 'MIRIFU' in ppsaperture:
                mode += '_PARALLEL_MRS'
        
        # store data in corresponding instrument folder
        if 'MIR_' in mode:
            instru == 'MIRI'
        elif 'NRC' in mode:
            instru = 'NIRCam'
        elif 'NRS' in mode:
            instru = 'NIRSpec'
        elif 'NIS_' in mode:
            instru = 'NIRISS'
        
        # Check if parallel observations
        if hdu[0].header['EXPRIPAR']=='PARALLEL_COORDINATED':
            mode += f"_PARALLEL_{hdu[0].header['TEMPLATE'].split(' ')[0]}"

         # store data in observation number folder
        obs_number = hdu[0].header['OBSERVTN']

        # store data in corresponding readout pattern folder
        readout = hdu[0].header['READPATT']

        # store data in corresponding Background, Science or Imprint folder
        bkg_names = ['-BK', ' BK', 'BK', '-BG', ' BG', 'BG', '-OFFSET', ' OFFSET', 'OFFSET', '-BKG', ' BKG',
                        'BKG']
        if hdu[0].header['BKGDTARG'] is True:
            data_type = 'Background'
        elif any(w in source for w in bkg_names):
            for i in bkg_names:
                source = source.replace(i, '')
            data_type = 'Background'
        elif 'IS_IMPRT' in hdu[0].header.keys():
            if hdu[0].header['IS_IMPRT'] is True:
                data_type += '_Imprint'
        else:
            data_type = 'Science'
        
        # OPTIONAL: store data in corresponding filter folder, channel and/or band
        filter_dir, channel_dir, band_dir = False, False, False
        
        if filter_folder:
            if 'FILTER' in hdu[0].header.keys():
                data_filter = hdu[0].header['FILTER']
                filter_dir = True
            else:
                filter_dir = False
        if channel_folder:
            if 'CHANNEL' in hdu[0].header.keys():
                data_channel = hdu[0].header['CHANNEL']
                channel_dir = True
            else:
                channel_dir = False
        if band_folder:
            if 'BAND' in hdu[0].header.keys():
                data_band = hdu[0].header['BAND']
                band_dir = True
            else:
                band_dir = False

        # Create folder path
        if output_folder != '':
            output_dir = output_folder
        else:
            output_dir = os.path.join(input_folder, '..')
            
        folder_path = [program, source, instru, mode, obs_number, data_type, subarray, readout]

        if filter_dir:
            folder_path.append(data_filter)
        if channel_dir:
            folder_path.append(data_channel)
        if band_dir:
            folder_path.append(data_band)

        for i in folder_path:
            if not os.path.exists(os.path.join(output_dir, i)):
                os.makedirs(os.path.join(output_dir, i))
            output_dir = os.path.join(output_dir, i)

        # Store data in corresponding pipeline stage folder
        stage0_ext = ['_uncal']
        stage1_ext = ['_rate', '_rateints', '_ramp', '_trapsfilled']
        stage2_ext = ['_cal', '_calints', '_x1d', '_x1dints', '_s3d', '_i2d', '_s2d', '_bsub', '_bsubints']
        stage3_ext = ['_crf', '_crfints', '_whtlt', '_psfstack', '_phot', '_psfalign', '_psfsub', '_cat', '_segm', '_x1d', '_s3d', '_i2d', '_s2d', '_c1d', '_ami', '_aminorm']
        if any(ext in f for ext in stage0_ext):
            if not os.path.exists(os.path.join(output_dir, 'stage0')):
                os.makedirs(os.path.join(output_dir, 'stage0'))
            stage0 = os.path.join(output_dir, 'stage0')
            shutil.move(f, os.path.join(stage0, f.split('/')[-1]))
        elif any(ext in f for ext in stage1_ext):
            if not os.path.exists(os.path.join(output_dir, 'stage1')):
                os.makedirs(os.path.join(output_dir, 'stage1'))
            stage1 = os.path.join(output_dir, 'stage1')
            shutil.move(f, os.path.join(stage1, f.split('/')[-1]))
        elif ('Level3' in f):
            if not os.path.exists(os.path.join(output_dir, 'stage3')):
                os.makedirs(os.path.join(output_dir, 'stage3'))
            stage3 = os.path.join(output_dir, 'stage3')
            shutil.move(f, os.path.join(stage3, f.split('/')[-1]))
        elif ('_t0' not in f) and any(ext in f for ext in stage2_ext):
            if not os.path.exists(os.path.join(output_dir, 'stage2')):
                os.makedirs(os.path.join(output_dir, 'stage2'))
            stage2 = os.path.join(output_dir, 'stage2')
            shutil.move(f, os.path.join(stage2, f.split('/')[-1]))
        elif ('_t0' in f) and any(ext in f for ext in stage3_ext):
            if not os.path.exists(os.path.join(output_dir, 'stage3')):
                os.makedirs(os.path.join(output_dir, 'stage3'))
            stage3 = os.path.join(output_dir, 'stage3')
            shutil.move(f, os.path.join(stage3, f.split('/')[-1]))
        elif any(ext in f for ext in stage3_ext):
            if not os.path.exists(os.path.join(output_dir, 'stage3')):
                os.makedirs(os.path.join(output_dir, 'stage3'))
            stage3 = os.path.join(output_dir, 'stage3')
            shutil.move(f, os.path.join(stage3, f.split('/')[-1]))

***
## Store ECSV files

In [9]:
# do ecsv (_cat, _whtlt, _phot) files
ecsv_files = glob.glob(input_folder + '/**/*.ecsv', recursive=True)
fits_files_sort = glob.glob(output_folder + '/**/*.fits', recursive=True)
for e in ecsv_files:
    e_name = e.split('/')[-1].rsplit('_',1)[0]
    for f in fits_files_sort:
        if e_name in f:
            folder = f.rsplit('/',1)[0]
            shutil.move(e, os.path.join(folder, e.split('/')[-1]))
            break

***
## Store other files

In [10]:
# put other files into Other_files folder
other_files = [f for f in glob.glob(input_folder + '/**/*', recursive=True) if os.path.isfile(f)]
for o in other_files:
    folder = os.path.join(output_folder, 'Other_files')
    if len(other_files) > 0:
        if not os.path.exists(folder):
            os.makedirs(folder)
        shutil.move(o, os.path.join(folder, o.split('/')[-1]))

***
<img style="float: center;" src="https://www.stsci.edu/~dlaw/stsci_logo.png" alt="stsci_logo" width="200px"/> 