In [1]:
import pyaurorax
import pprint

aurorax = pyaurorax.PyAuroraX()

# Search Engine Data Sources

The AuroraX search engine is around organizing metadata from instruments into 'data sources'. These are identities unique to three attributes: program, platform, and instrument type.

A full dive into the AuroraX search engine data can be found [here](https://docs.aurorax.space/about_the_data/overview/).

In [2]:
# retrieve all data sources
datasets = aurorax.search.sources.list()

pprint.pprint(datasets[0:5])

[DataSource(identifier=3, program='swarm', platform='swarma', instrument_type='footprint', source_type='leo', display_name='Swarm A', ...),
 DataSource(identifier=29, program='swarm', platform='swarmb', instrument_type='footprint', source_type='leo', display_name='Swarm B', ...),
 DataSource(identifier=30, program='swarm', platform='swarmc', instrument_type='footprint', source_type='leo', display_name='Swarm C', ...),
 DataSource(identifier=32, program='epop', platform='epop', instrument_type='footprint', source_type='leo', display_name='ePOP', ...),
 DataSource(identifier=33, program='themis', platform='themisa', instrument_type='footprint', source_type='heo', display_name='THEMIS-A', ...)]


In [3]:
# data sources also have a helpful print function
datasets[0].pretty_print()

DataSource:
  identifier                    : 3
  program                       : swarm
  platform                      : swarma
  instrument_type               : footprint
  source_type                   : leo
  display_name                  : Swarm A
  metadata                      : {}
  owner                         : dchaddoc@ucalgary.ca
  maintainers                   : []
  ephemeris_metadata_schema     : [{'field_name': 'nbtrace_region', 'description': "Region based on where the magn...
  data_product_metadata_schema  : []
  stats                         : []
  format                        : full_record


In [4]:
# order data sources using the 'program' attribute
sources = aurorax.search.sources.list(order="program")

pprint.pprint(sources[0:5])

[DataSource(identifier=492, program='amisr', platform='poker flat', instrument_type='ISR', source_type='ground', display_name='PFISR', ...),
 DataSource(identifier=494, program='amisr', platform='resolute bay', instrument_type='ISR', source_type='ground', display_name='RISR-N', ...),
 DataSource(identifier=38, program='arase', platform='arase', instrument_type='footprint', source_type='heo', display_name='Arase', ...),
 DataSource(identifier=175, program='auroramax', platform='yellowknife', instrument_type='DSLR', source_type='ground', display_name='AuroraMAX', ...),
 DataSource(identifier=406, program='campaigns', platform='lake minnewanka', instrument_type='observer', source_type='ground', display_name='Observer at Lake Minnewanka', ...)]


In [5]:
# we can alternatively print the data sources in a table format
#
# this works identically to the list() function, simply printing the
# table instead of returning the Dataset objects.
aurorax.search.sources.list_in_table(program="trex", order="platform")

Identifier   Program   Platform      Instrument Type     Source Type   Display Name          
339          trex      athabasca     RGB ASI             ground        TREx RGB ATHA         
341          trex      athabasca     near-infrared ASI   ground        TREx NIR ATHA         
344          trex      athabasca     blueline ASI        ground        TREx Blue ATHA        
96           trex      fort smith    RGB ASI             ground        TREx RGB FSMI         
92           trex      gillam        near-infrared ASI   ground        TREx NIR GILL         
93           trex      gillam        blueline ASI        ground        TREx Blue GILL        
103          trex      gillam        RGB ASI             ground        TREx RGB GILL         
95           trex      lucky lake    spectrograph        ground        TREx Spectrograph LUCK
101          trex      lucky lake    RGB ASI             ground        TREx RGB LUCK         
374          trex      lucky lake    blueline ASI        gro

# Filtering data sources

Using the filtering parameters to the `list()` and `list_in_table()` functions, we can retrieve data sources which match certain criteria.

In [6]:
# list all THEMIS ASI data sources
#
# filter on program
aurorax.search.sources.list_in_table(program="themis-asi")

Identifier   Program      Platform        Instrument Type    Source Type   Display Name   
44           themis-asi   athabasca       panchromatic ASI   ground        THEMIS-ASI ATHA
45           themis-asi   fort simpson    panchromatic ASI   ground        THEMIS-ASI FSIM
46           themis-asi   gillam          panchromatic ASI   ground        THEMIS-ASI GILL
47           themis-asi   inuvik          panchromatic ASI   ground        THEMIS-ASI INUV
48           themis-asi   kapuskasing     panchromatic ASI   ground        THEMIS-ASI KAPU
49           themis-asi   kuujjuaq        panchromatic ASI   ground        THEMIS-ASI KUUJ
50           themis-asi   pinawa          panchromatic ASI   ground        THEMIS-ASI PINA
51           themis-asi   rankin inlet    panchromatic ASI   ground        THEMIS-ASI RANK
52           themis-asi   sanikiluaq      panchromatic ASI   ground        THEMIS-ASI SNKQ
53           themis-asi   taloyoak        panchromatic ASI   ground        THEMIS-ASI TALO

In [7]:
# list all RGB ASIs
#
# all filter on instrument type
aurorax.search.sources.list_in_table(instrument_type="RGB ASI")

Identifier   Program    Platform       Instrument Type   Source Type   Display Name     
86           rainbow    fort simpson   RGB ASI           ground        Rainbow FSIM     
87           rainbow    gillam         RGB ASI           ground        Rainbow GILL     
88           rainbow    fort smith     RGB ASI           ground        Rainbow FSMI     
89           rainbow    rabbit lake    RGB ASI           ground        Rainbow RABB     
96           trex       fort smith     RGB ASI           ground        TREx RGB FSMI    
98           rainbow    pinawa         RGB ASI           ground        Rainbow PINA     
101          trex       lucky lake     RGB ASI           ground        TREx RGB LUCK    
102          trex       pinawa         RGB ASI           ground        TREx RGB PINA    
103          trex       gillam         RGB ASI           ground        TREx RGB GILL    
104          trex       rabbit lake    RGB ASI           ground        TREx RGB RABB    
339          trex    

In [8]:
# list all Swarm spacecraft data sources
#
# filter by program
aurorax.search.sources.list_in_table(program="swarm")

Identifier   Program   Platform   Instrument Type   Source Type   Display Name
3            swarm     swarma     footprint         leo           Swarm A     
29           swarm     swarmb     footprint         leo           Swarm B     
30           swarm     swarmc     footprint         leo           Swarm C     


In [9]:
# we can also filter based on the 'source_type' attribute. Valid values
# are ground, leo, heo, event_list, and others. See the documentation for
# a full list.
#
# list all HEO data sources (highly-elliptical orbit)
aurorax.search.sources.list_in_table(source_type="heo")

Identifier   Program   Platform   Instrument Type   Source Type   Display Name
33           themis    themisa    footprint         heo           THEMIS-A    
34           themis    themisb    footprint         heo           THEMIS-B    
35           themis    themisc    footprint         heo           THEMIS-C    
36           themis    themisd    footprint         heo           THEMIS-D    
37           themis    themise    footprint         heo           THEMIS-E    
38           arase     arase      footprint         heo           Arase       
39           cluster   cluster1   footprint         heo           Cluster-1   
67           mms       mms1       footprint         heo           MMS 1       
68           rbsp      rbspa      footprint         heo           RBSP-A      
69           rbsp      rbspb      footprint         heo           RBSP-B      
653          goes      goes17     footprint         heo           GOES-17     
654          goes      goes8      footprint         

# Working with identifiers

We may want to perform some tasks where the data source identifier is required as an input (ie. updating a data source, adding ephemeris records, etc.). Let's have a look at a few ways to get the identifier.

In [10]:
# retrieve data source and extract the identifier
data_sources = aurorax.search.sources.list(program="swarm", platform="swarma", instrument_type="footprint")
identifier = data_sources[0].identifier
print("Identifier: %d" % (identifier))
print()

# get a data source by using the identifier
data_source = aurorax.search.sources.get_using_identifier(3)
data_source.pretty_print()

Identifier: 3

DataSource:
  identifier                    : 3
  program                       : swarm
  platform                      : swarma
  instrument_type               : footprint
  source_type                   : leo
  display_name                  : Swarm A
  metadata                      : {}
  owner                         : dchaddoc@ucalgary.ca
  maintainers                   : []
  ephemeris_metadata_schema     : [{'field_name': 'nbtrace_region', 'description': "Region based on where the magn...
  data_product_metadata_schema  : []
  stats                         : []
  format                        : full_record


# Get data source information with different levels of information

You can retrieve varying amounts of information about data sources using the `format` parameter. The default, `pyaurorax.FORMAT_BASIC_INFO`, provides basic information, and other format types provide smaller or larger amounts of information.

The `DataSource` objects will have all attributes, but depending on the format some fields will be set to `None`.

In [11]:
# default information
data_source = aurorax.search.sources.get_using_identifier(3, format=pyaurorax.search.FORMAT_BASIC_INFO)
print(data_source)

DataSource(identifier=3, program='swarm', platform='swarma', instrument_type='footprint', source_type='leo', display_name='Swarm A', ...)


In [12]:
# as little information as possible
data_source = aurorax.search.sources.get("swarm", "swarma", "footprint", format=pyaurorax.search.FORMAT_IDENTIFIER_ONLY)
print(data_source)

DataSource(identifier=3, program='None', platform='None', instrument_type='None', source_type='None', display_name='None', ...)


In [13]:
# all information
data_source = aurorax.search.sources.get("swarm", "swarma", "footprint", format=pyaurorax.search.FORMAT_FULL_RECORD)
data_source.pretty_print()

DataSource:
  identifier                    : 3
  program                       : swarm
  platform                      : swarma
  instrument_type               : footprint
  source_type                   : leo
  display_name                  : Swarm A
  metadata                      : {}
  owner                         : dchaddoc@ucalgary.ca
  maintainers                   : []
  ephemeris_metadata_schema     : [{'field_name': 'nbtrace_region', 'description': "Region based on where the magn...
  data_product_metadata_schema  : []
  stats                         : []
  format                        : full_record


# Get statistics for a data source

Each data source has some addition stats details about them. You can include this information by setting the `include_stats` parameter to `True`.

In [14]:
# get data source with stats information
data_source = aurorax.search.sources.get("themis", "themise", "footprint", include_stats=True)
data_source.pretty_print()
print()
data_source.stats.pretty_print()

DataSource:
  identifier                    : 37
  program                       : themis
  platform                      : themise
  instrument_type               : footprint
  source_type                   : heo
  display_name                  : THEMIS-E
  metadata                      : {}
  owner                         : dchaddoc@ucalgary.ca
  maintainers                   : []
  ephemeris_metadata_schema     : [{'field_name': 'nbtrace_region', 'description': "Region based on where the magn...
  data_product_metadata_schema  : []
  stats                         : []
  format                        : full_record

DataSourceStatistics:
  ephemeris_count               : 11712947
  data_product_count            : 0
  earliest_ephemeris_loaded     : 2007-02-18 00:14:00
  latest_ephemeris_loaded       : 2029-05-27 00:00:00
  earliest_data_product_loaded  : None
  latest_data_product_loaded    : None
