In [1]:
import datetime
import pandas as pd
import pyaurorax

aurorax = pyaurorax.PyAuroraX()

# Search for data product records

Just like ephemeris searches, you can also search the AuroraX database for data product records.

In [2]:
# set search parameters
start = datetime.datetime(2020, 2, 1, 0, 0, 0)
end = datetime.datetime(2020, 2, 5, 23, 59, 59)
programs = ["trex"]
instrument_types = ["RGB ASI"]

# perform search
s = aurorax.search.data_products.search(start, end, programs=programs, verbose=True)

[2024-12-13 17:13:41.429519] Search object created
[2024-12-13 17:13:46.367433] Request submitted
[2024-12-13 17:13:46.367672] Request ID: 9f5b2274-eebf-4d96-b344-3fc3a1504c7b
[2024-12-13 17:13:46.367705] Request details available at: https://api.aurorax.space/api/v1/data_products/requests/9f5b2274-eebf-4d96-b344-3fc3a1504c7b
[2024-12-13 17:13:46.367723] Waiting for data ...
[2024-12-13 17:13:59.441256] Data is now available
[2024-12-13 17:13:59.441492] Retrieving data ...
[2024-12-13 17:14:12.686997] Retrieved 3.4 MB of data containing 1400 records


In [8]:
# show the first 10 data product records
#
# NOTE: while here we format the results into a Pandas dataframe, this
# is not required. We actually don't include Pandas as a dependency since
# it's used simply as a nice add-on to view data. If you're good with slicing
# and dicing lists and dictionaries, you'll be fine without it.
data_products = [d.__dict__ for d in s.data]
df = pd.DataFrame(data_products)
df.sort_values("start")[0:10]

Unnamed: 0,data_source,data_product_type,start,end,url,metadata
0,"DataSource(identifier=104, program='trex', pla...",montage,2020-02-01T00:00:00,2020-02-01T00:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'hourly', 'imaging_end_time':..."
26,"DataSource(identifier=103, program='trex', pla...",montage,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'daily', 'imaging_end_time': ..."
27,"DataSource(identifier=101, program='trex', pla...",montage,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'daily', 'imaging_end_time': ..."
28,"DataSource(identifier=101, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
29,"DataSource(identifier=102, program='trex', pla...",movie,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'movie_type': 'real-time daily', 'imaging_end..."
30,"DataSource(identifier=104, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
31,"DataSource(identifier=104, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
32,"DataSource(identifier=104, program='trex', pla...",montage,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'daily', 'imaging_end_time': ..."
33,"DataSource(identifier=96, program='trex', plat...",montage,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'daily', 'imaging_end_time': ..."
34,"DataSource(identifier=103, program='trex', pla...",movie,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'movie_type': 'real-time daily', 'imaging_end..."


# Do the search step-by-step

Under the hood, the AuroraX API performs a data products search asynchronously. Note that this does not mean that it can be done using a Python async method; it means that PyAuroraX does more than just a single HTTP request when doing a search. With the API operating this way, it adds some more complexity within PyAuroraX but also opens the search up to some very important capabilities. 

The main capability enabled by this architecture is being able to perform queries for large timeframes, and/or between a large number of data sources. Queries like this can sometimes take several minutes, and cause browsers and programmatic HTTP requests to timeout.

Instead of using the `aurorax.search.data_products.search()` method, you can also perform a data product search step-by-step if you want more control over the process. One use case for this is if you want to start a series of data product searches, and then go through each getting the results back as they finish, as opposed to doing one search at a time.

In [2]:
# set up the search parameters
start = datetime.datetime(2020, 2, 1, 0, 0, 0)
end = datetime.datetime(2020, 2, 5, 23, 59, 59)
programs = ["trex"]
instrument_types = ["RGB ASI"]

# create the Search object
s = pyaurorax.search.DataProductSearch(aurorax, start, end, programs=programs, instrument_types=instrument_types)
s.pretty_print()

DataProductSearch:
  executed     : False
  completed    : False
  request_id   : 
  request      : None
  request_url  : 
  data_url     : 
  query        : {'data_sources': {'programs': ['trex'], 'platforms': [], 'instrument_types': ['R...
  status       : {}
  data         : 
  logs         : 


In [3]:
# submit the search to begin
s.execute()
s.pretty_print()

DataProductSearch:
  executed     : True
  completed    : False
  request_id   : 343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  request      : AuroraXAPIResponse [202] (Accepted)
  request_url  : https://api.aurorax.space/api/v1/data_products/requests/343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  data_url     : 
  query        : {'data_sources': {'programs': ['trex'], 'platforms': [], 'instrument_types': ['R...
  status       : {}
  data         : [0 data product results]
  logs         : [0 log messages]


In [4]:
# update the search request status
s.update_status()
s.pretty_print()

DataProductSearch:
  executed     : True
  completed    : True
  request_id   : 343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  request      : AuroraXAPIResponse [202] (Accepted)
  request_url  : https://api.aurorax.space/api/v1/data_products/requests/343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  data_url     : https://api.aurorax.space/api/v1/data_products/requests/343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e/data
  query        : {'data_sources': {'programs': ['trex'], 'platforms': [], 'instrument_types': ['R...
  status       : {'search_request': {'request_id': '343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e', 'query...
  data         : [0 data product results]
  logs         : [7 log messages]


In [5]:
# wait for the data to be available
s.wait()
s.update_status()
s.pretty_print()

DataProductSearch:
  executed     : True
  completed    : True
  request_id   : 343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  request      : AuroraXAPIResponse [202] (Accepted)
  request_url  : https://api.aurorax.space/api/v1/data_products/requests/343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  data_url     : https://api.aurorax.space/api/v1/data_products/requests/343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e/data
  query        : {'data_sources': {'programs': ['trex'], 'platforms': [], 'instrument_types': ['R...
  status       : {'search_request': {'request_id': '343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e', 'query...
  data         : [0 data product results]
  logs         : [7 log messages]


In [6]:
# now that we know the request is complete, let's retrieve the data
s.get_data()
s.pretty_print()

DataProductSearch:
  executed     : True
  completed    : True
  request_id   : 343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  request      : AuroraXAPIResponse [202] (Accepted)
  request_url  : https://api.aurorax.space/api/v1/data_products/requests/343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e
  data_url     : https://api.aurorax.space/api/v1/data_products/requests/343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e/data
  query        : {'data_sources': {'programs': ['trex'], 'platforms': [], 'instrument_types': ['R...
  status       : {'search_request': {'request_id': '343e2384-c9e7-4ba0-8bc9-5e2e4ded9c4e', 'query...
  data         : [820 data product results]
  logs         : [7 log messages]


In [7]:
# show the first 10 conjunction results 
data_products = [d.__dict__ for d in s.data]
df = pd.DataFrame(data_products)
df.sort_values("start")[0:10]

Unnamed: 0,data_source,data_product_type,start,end,url,metadata
0,"DataSource(identifier=102, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
20,"DataSource(identifier=101, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_hires', 'imaging_end_t..."
21,"DataSource(identifier=101, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
22,"DataSource(identifier=103, program='trex', pla...",montage,2020-02-01T00:00:00,2020-02-01T00:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'hourly', 'imaging_end_time':..."
23,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T00:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
24,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
26,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_hires_200px', 'imaging..."
27,"DataSource(identifier=103, program='trex', pla...",montage,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'daily', 'imaging_end_time': ..."
28,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
29,"DataSource(identifier=103, program='trex', pla...",movie,2020-02-01T00:00:00,2020-02-01T23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'movie_type': 'real-time daily', 'imaging_end..."
