# Imports

In [None]:
import aurorax
import datetime
import pandas as pd
import pprint

# Search

In [15]:
# set values
start_dt = datetime.datetime(2020, 2, 1, 0, 0, 0)
end_dt = datetime.datetime(2020, 2, 5, 23, 59, 59)
programs = ["trex"]
instrument_types = ["RGB ASI"]

In [16]:
# perform search
s = aurorax.data_products.search(start_dt,
                                end_dt,
                                programs=programs,
                                verbose=True)

[2021-06-04 14:19:00.812953] Search object created
[2021-06-04 14:19:00.938218] Request submitted
[2021-06-04 14:19:00.938218] Request ID: b817d060-8588-43be-a05b-d6d2baf02e11
[2021-06-04 14:19:00.938218] Request details available at: https://api.staging.aurorax.space/api/v1/data_products/requests/b817d060-8588-43be-a05b-d6d2baf02e11
[2021-06-04 14:19:00.938218] Waiting for data ...
[2021-06-04 14:19:02.041763] Checking for data ...
[2021-06-04 14:19:02.085149] Data is now available
[2021-06-04 14:19:02.085149] Retrieving data ...
[2021-06-04 14:19:02.301863] Retrieved 986.6 kB of data containing 410 records


In [17]:
# output data as a pandas dataframe
data_products = [d.__dict__ for d in s.data]
df = pd.DataFrame(data_products)
df.sort_values("start")

Unnamed: 0,data_source,data_product_type,start,end,url,metadata
0,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 00:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
17,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
16,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
15,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
14,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
...,...,...,...,...,...,...
402,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 12:00:00,2020-02-05 12:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
407,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
408,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
406,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."


# Do the search asynchronously

Under the hood a data products search is asynchronous. This adds some more complexity for the user, but also opens the search up to some additional possibilities. The main advantage is being able to perform queries for a large timeframe. These queries will take a bit longer and could exceed several minutes and return more than a gigabyte of data. Separating the request and the resulting data into two different API calls allows flexibility in duration and size of the overall request.

In [18]:
# set up the request
s = aurorax.data_products.Search(start_dt,
                                 end_dt,
                                 programs=programs,
                                 instrument_types=instrument_types)
print(s)


{'completed': False,
 'data': [],
 'data_url': '',
 'end': datetime.datetime(2020, 2, 5, 23, 59, 59),
 'executed': False,
 'instrument_types': ['RGB ASI'],
 'logs': [],
 'metadata_filters': None,
 'platforms': None,
 'programs': ['trex'],
 'query': {},
 'request': None,
 'request_id': '',
 'request_url': '',
 'start': datetime.datetime(2020, 2, 1, 0, 0),
 'status': {}}


In [19]:
# execute the search
s.execute()
print(s)

{'completed': False,
 'data': [],
 'data_url': '',
 'end': datetime.datetime(2020, 2, 5, 23, 59, 59),
 'executed': True,
 'instrument_types': ['RGB ASI'],
 'logs': [],
 'metadata_filters': None,
 'platforms': None,
 'programs': ['trex'],
 'query': {'data_sources': {'data_product_metadata_filters': [],
                            'instrument_types': ['RGB ASI'],
                            'platforms': [],
                            'programs': ['trex']},
           'end': '2020-02-05T23:59:59',
           'start': '2020-02-01T00:00:00'},
 'request': {'data': None, 'request': <Response [202]>, 'status_code': 202},
 'request_id': '684fa023-c3b1-4da3-b660-3bae87992ce4',
 'request_url': 'https://api.staging.aurorax.space/api/v1/data_products/requests/684fa023-c3b1-4da3-b660-3bae87992ce4',
 'start': datetime.datetime(2020, 2, 1, 0, 0),
 'status': {}}


In [20]:
# get request status
s.update_status()
pprint.pprint(s.status)

{'logs': [{'level': 'debug',
           'summary': 'Search request arrived',
           'timestamp': '2021-06-04T20:19:42.281158Z'},
          {'level': 'info',
           'summary': 'Starting query for 684fa023-c3b1-4da3-b660-3bae87992ce4',
           'timestamp': '2021-06-04T20:19:42.287291Z'},
          {'level': 'info',
           'summary': 'Finished query in 0.01 seconds.',
           'timestamp': '2021-06-04T20:19:42.297021Z'},
          {'level': 'debug',
           'summary': 'Starting to write data to file',
           'timestamp': '2021-06-04T20:19:42.300442Z'},
          {'level': 'info',
           'summary': 'Finished writing data, found 410 records in 0.05 '
                      'seconds.',
           'timestamp': '2021-06-04T20:19:42.353315Z'},
          {'level': 'debug',
           'summary': 'File size is 986570 bytes.',
           'timestamp': '2021-06-04T20:19:42.355981Z'},
          {'level': 'info',
           'summary': 'Finished search in 0.07 seconds and foun

In [21]:
# view just the logs for the request (update the status beforehand, but we do that in the above cell)
pprint.pprint(s.logs)

[{'level': 'debug',
  'summary': 'Search request arrived',
  'timestamp': '2021-06-04T20:19:42.281158Z'},
 {'level': 'info',
  'summary': 'Starting query for 684fa023-c3b1-4da3-b660-3bae87992ce4',
  'timestamp': '2021-06-04T20:19:42.287291Z'},
 {'level': 'info',
  'summary': 'Finished query in 0.01 seconds.',
  'timestamp': '2021-06-04T20:19:42.297021Z'},
 {'level': 'debug',
  'summary': 'Starting to write data to file',
  'timestamp': '2021-06-04T20:19:42.300442Z'},
 {'level': 'info',
  'summary': 'Finished writing data, found 410 records in 0.05 seconds.',
  'timestamp': '2021-06-04T20:19:42.353315Z'},
 {'level': 'debug',
  'summary': 'File size is 986570 bytes.',
  'timestamp': '2021-06-04T20:19:42.355981Z'},
 {'level': 'info',
  'summary': 'Finished search in 0.07 seconds and found 410 records.',
  'timestamp': '2021-06-04T20:19:42.360213Z'}]


In [25]:
# wait for the data
s.wait()
s.update_status()

In [28]:
# get data
s.get_data()

# show data as pandas dataframe
data_products = [d.__dict__ for d in s.data]
fd = pd.DataFrame(data_products)
df.sort_values("start")

Unnamed: 0,data_source,data_product_type,start,end,url,metadata
0,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 00:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
17,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
16,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
15,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
14,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
...,...,...,...,...,...,...
402,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 12:00:00,2020-02-05 12:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
407,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
408,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
406,"{'data_product_metadata_schema': [],\n 'displa...",keogram,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly', 'imaging_end_time':..."
