### How to use CloudCatalog for accessing AIA data from HelioCloud
draft, Dec 2024

In [1]:
print("How to use CloudCatalog for accessing AIA data from HelioCloud")
# see also https://pypi.org/project/cloudcatalog/
# use 'pip install cloudcatalog' and 'pip install s3fs' to use
import cloudcatalog
import matplotlib.pyplot as plt
import astropy.io.fits
import math
import s3fs

How to use CloudCatalog for accessing AIA data from HelioCloud


In [2]:
fs=s3fs.S3FileSystem(anon=True) # bit needed to read from Cloud

print("sample Searches")
search = cloudcatalog.EntireCatalogSearch()
print(f"Searching for any 'aia', there are {len(search.search_by_id('aia'))} datasets")
ids = [e['id'] for e in search.search_by_keywords(['193', '211', '304'])]
print("\nSearching for any datasets with '193', '211' or '304' in them, just the dataIDs",ids)
print("\nSearch for just '211', all metadata:",search.search_by_id('211'))

sample Searches
Searching for any 'aia', there are 9 datasets

Searching for any datasets with '193', '211' or '304' in them, just the dataIDs ['aia_0193', 'aia_0211', 'aia_0304', 'euvml_304', 'euvml_soho_304', 'euvml_stereoa_304', 'euvml_stereob_304', 'THG_L2_MAG_DED']

Search for just '211', all metadata: [{'id': 'aia_0211', 'index': 's3://gov-nasa-hdrl-data1/sdo/aia/registries/', 'title': 'AIA 0211 FITS data', 'start': '2015-06-01T00:00:00Z', 'stop': '2021-12-31T23:59:59Z', 'modification': '2023-05-04:00:00:00Z', 'indextype': 'csv', 'filetype': 'fits'}]


In [3]:
print("\nGetting the actual list of files from a search query")
myid, start, stop = 'aia_0193', '2020-02-01T00:00:00Z', '2020-02-02T00:00:00Z'
fr=cloudcatalog.CloudCatalog("s3://gov-nasa-hdrl-data1/")
mycat = fr.request_cloud_catalog(myid, start_date=start, stop_date=stop)
print(f"{len(mycat)} files found")
print("\nLooking at the filelist query result")
filelist = mycat['datakey'].to_list()
print(f"For dataID {myid} over times {start}-{stop}, the first 3 files found are:\n{filelist[0:3]}\n")


Getting the actual list of files from a search query
351 files found

Looking at the filelist query result
For dataID aia_0193 over times 2020-02-01T00:00:00Z-2020-02-02T00:00:00Z, the first 3 files found are:
['s3://gov-nasa-hdrl-data1/sdo/aia/20200201/0193/sdo_aia_h2_20200201T000000_0193_v1.fits', 's3://gov-nasa-hdrl-data1/sdo/aia/20200201/0193/sdo_aia_h2_20200201T000400_0193_v1.fits', 's3://gov-nasa-hdrl-data1/sdo/aia/20200201/0193/sdo_aia_h2_20200201T000800_0193_v1.fits']



In [4]:
print("Running simple calc on 1st 3 files")
for i in range(3):
    myitem = mycat.iloc[i]
    datastart = myitem['start']
    filename = myitem['datakey']
    hdul = astropy.io.fits.open(fs.open(filename))
    print(datastart,", mean value=",hdul[1].data.mean())

Running simple calc on 1st 3 files
2020-02-01 00:00:00 , mean value= 191.15523592
2020-02-01 00:04:00 , mean value= 191.21884312
2020-02-01 00:08:00 , mean value= 191.32419744
