### Retroactively seeds metadata for MDI or HMI. Using the DRMS protocol='fits' is time consuming since a data export request is issued to JSOC 

In [None]:
import drms
import h5py
import numpy as np
import csv
from tqdm import tqdm
import warnings
from pandas.core.common import SettingWithCopyWarning
from Mission_utility.product_time_sync import csv_times_reader
import json
from astropy.io.fits import Header

In [None]:
warnings.simplefilter(action = "ignore", category = FutureWarning)
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [None]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

In [None]:
def downsample_header_local(mission, image_size_output, query, mag_keys):
    
    if mission == 'SOHO': 
        orig_img_size = 1024

    elif mission == 'SDO':     
        orig_img_size = 4096

    rescale_factor = int(orig_img_size / image_size_output)
    #print('rescale_factor:', rescale_factor)

    for key in mag_keys:
        
        if (key == 'CDELT1') or (key == 'CDELT2'):
            query[key] = query[key]*rescale_factor #this updates the original data frame
        elif (key == 'CRPIX1') or (key == 'CRPIX2'):
            query[key] = query[key]/rescale_factor
        
        try: 
            query['RSUN_OBS'] = query['RSUN_OBS']/rescale_factor 
            query['R_SUN'] = query['R_SUN']/rescale_factor
            query['X0'] = query['X0']/rescale_factor 
            query['Y0'] = query['Y0']/rescale_factor 
            query['CROP_RAD'] = query['CROP_RAD']/rescale_factor
            query['SOLAR_R'] = query['SOLAR_R']/rescale_factor
        except KeyError:
            pass
                    
    
    return query

In [None]:
### Update as appropriate ###

base = 'MDI_96m' # HMI_720s # MDI_96m
mission = 'SOHO' #SDO #SOHO
image_size_output = 128

In [None]:
client = drms.Client()
query_mag = 'mdi.fd_M_96m_lev182[]' ### or ('hmi.M_720s[]') if want SDO HMI instead of SOHO MDI
mag_keys = client.query(query_mag, key=drms.const.all) 
print('len(mag_keys):', len(mag_keys))

In [None]:
mag_keys_list = list(client.keys('mdi.fd_M_96m_lev182')) #or ('hmi.M_720s') if want SDO HMI instead of SOHO MDI
print('mag_keys_list:', mag_keys_list)
print('len(mag_keys_list):', len(mag_keys_list))

In [None]:
### Path to MDI or HMI data cube that want to add metadata to retroactively ###
### Be sure to include '/' at the end of every path variable

path_to_mag_cube = '/home/carl/Documents/synced_1_3_7_experiments_calibrated/synced_1_3_7_experiments_calibrated_1_product_MDI_only/' 
mag_cube_name = '1999-02-02-16:00:00_to_2011-01-01-00:00:00_MDI_96m_subsample_6_LASCOlev1-N_SOHO_128.h5'
cube_orig = h5py.File(f'{path_to_mag_cube}{mag_cube_name}','r') 
print(list(cube_orig.keys()))
cube_orig_data = cube_orig[list(cube_orig.keys())[0]][:] ### this is more generic
#cube_orig[f'{base}_{mission}_{image_size_output}'][:] ### this corresponds to the most updated nomenclature
print('np.shape(cube_orig_data):', np.shape(cube_orig_data))

times_list = csv_times_reader(path_to_mag_cube, pattern = f'*{base}*{mission}*[!sync].csv')

In [None]:
print('times_list[0:10]:', times_list[0:10])
print('times_list[-10:]:', times_list[-10:])
print('np.shape(times_list):', np.shape(times_list))

###### Should  get as output:
'MDI_96m_SOHO_128'
np.shape(cube_orig_data): (15456, 128, 128)
name from csv_times_reader: 1999-01-01_to_2010-12-31_MDI_96m_times_subsample_6_LASCOlev1-N_SOHO_128.csv
len(csv_uniq_times): 15456


In [None]:
### creat cube copy with data from original cube and add the metadata via attributes which can now write ###

full_mag_cube_name = f'{path_to_mag_cube}{mag_cube_name}'
mag_cube_name_new = full_mag_cube_name.split('.')[0] + '_retroactive_metadata.h5'
print(mag_cube_name_new)

In [None]:
### tqdm generates nice progress bar ###

data_cube_new = h5py.File(mag_cube_name_new,'w')
data_cube_new.create_dataset(f'{base}_{mission}_{image_size_output}', data=cube_orig_data, compression="gzip")

counter = 0
meta_data_dict = {}

for t_pre in tqdm(times_list[0:2]): ###first two slices only [0:2] as demonstration
    t_drms_split = str(drms.to_datetime(t_pre)).split(' ')
    t_tai = '_'.join((t_drms_split[0].replace('-','.'),t_drms_split[1]))+'_TAI'
    
    print(t_tai)
    ### this is the old method which was exponentially slowed down while running on JSOC ###
    #query = client.query(f'mdi.fd_M_96m_lev182[{t_tai}]', key = client.keys('mdi.fd_M_96m_lev182'))
    
    query_pre = mag_keys.loc[mag_keys['T_REC'] == t_tai]
    query = mag_keys.loc[query_pre.index[0]]
    
    query_metadata_update = downsample_header_local(mission, image_size_output, query, mag_keys)
    print(query_metadata_update['T_REC'])
    
    for j, key in enumerate(mag_keys):
        if (key == 'COMMENT') or (key == 'HISTORY'):
            key1 = f'{key}{counter}'
            #data_cube_new.attrs[f'{key1}_{counter}'] = query_metadata_update[key]      
            meta_data_dict[f'{key1}_{counter}'] = query_metadata_update[key]      
        
        else:
            #data_cube_new.attrs[f'{key}_{counter}'] = query_metadata_update[key]
            #if type(query_metadata_update[key]) == 'int':
            #    meta_data_dict[f'{key}_{counter}'] = int(query_metadata_update[key])
            #else:
            meta_data_dict[f'{key}_{counter}'] = query_metadata_update[key]
    
    #data_cube_new.attrs[f'COMMENT_{counter}'] = f'Zeros outside solar disk for {base}'
    meta_data_dict[f'COMMENT_{counter}'] = f'Zeros outside solar disk for {base}'
    
    counter += 1
    
#data_cube_new.attrs.update(meta_data_dict)
data_cube_new.create_dataset(f'{base}_{mission}_{image_size_output}_metadata', data=json.dumps(meta_data_dict, cls=NpEncoder))
data_cube_new.attrs['NOTE'] = 'JSON serialization'
data_cube_new.close()

In [None]:
data_cube_new.close() ### in case error encountered in above cell, need to close the cube first

######  For the full data cubes, this retroactive metadata seeding can take several days. This is still currently faster than using the FITS protocol to fetch FITS files with metadata from JSOC

In [None]:
with h5py.File(f'{mag_cube_name_new}', 'r') as hfile:
    metadata = json.loads(hfile[f'{base}_{mission}_{image_size_output}_metadata'][()])
    print('list(hfile.attrs.items()):', list(hfile.attrs.items()))
    for k in metadata:
        print(f'{k} => {metadata[k]}')

In [None]:
list(metadata.keys())

In [None]:
list(metadata.values())