# Imports

In [2]:
import datetime
import pprint
import pyaurorax
aurorax = pyaurorax.PyAuroraX()

# Search for ephemeris records and filter on the Calgary APA model

UCalgary has developed a machine learning model for identifying Amorphous Pulsating Aurora (APA) in the THEMIS all-sky imagers. This is a binary classification performed on a 10-minute basis, and included in all THEMIS ASI AuroraX search engine 'ephemeris' records as a metadata field.

Metadata fields in AuroraX can be searched upon, allowing users to filter results based on them. Below, we're going to show an example of finding all 1-minute ephemeris records for any THEMIS ASI instrument where this particular model thinks there is APA in the field-of-view for that camera, over a 1 month period.

More information about this model can be found [here](https://docs.aurorax.space/ml/models/ucalgary_apa/).

In [3]:
# set timeframe and program values
start = datetime.datetime(2008, 1, 1, 0, 0, 0)
end = datetime.datetime(2008, 1, 31, 23, 59, 59)
programs = ["themis-asi"]

# to filter based the ML model data, we need to perform the ephemeris
# search using metadata filters. 
# 
# below, we're going to filter to only retrieve ephemeris data for when
# the 'calgary_apa_ml_v1' field says 'classified as APA', and has a 
# confidence score greater than or equal to 95%.
metadata_filters_logical_operator = "AND"
metadata_filters = [
          {
            "key": "calgary_apa_ml_v1",     #
            "operator": "in",               #   Here, we set up the metadata filter to retrieve
            "values": [                     #   only records that were classified as APA....
              "classified as APA"           #
            ]
          },
          {
            "key": "calgary_apa_ml_v1_confidence",
            "operator": ">=",                         # ... with a confidence of 
            "values": [                               # at least 95%
              "95"
            ]
          }
        ]

# perform the search
s = aurorax.search.ephemeris.search(
    start=start,
    end=end,
    programs=programs,
    metadata_filters_logical_operator=metadata_filters_logical_operator,
    metadata_filters=metadata_filters,
    verbose=True)

[2024-11-27 21:37:09.770337] Search object created
[2024-11-27 21:37:09.806133] Request submitted
[2024-11-27 21:37:09.806205] Request ID: cd9bcaf3-ab02-4cbe-a6f1-9b84121bd0f6
[2024-11-27 21:37:09.806232] Request details available at: https://api.aurorax.space/api/v1/ephemeris/requests/cd9bcaf3-ab02-4cbe-a6f1-9b84121bd0f6
[2024-11-27 21:37:09.806255] Waiting for data ...
[2024-11-27 21:37:11.267888] Checking for data ...
[2024-11-27 21:37:11.692583] Data is now available
[2024-11-27 21:37:11.692789] Retrieving data ...
[2024-11-27 21:37:13.679613] Retrieved 18.7 MB of data containing 5407 records


In [None]:
# Print the first 10 records
pprint.pprint(s.data[0:10])

[EphemerisData(epoch=datetime.datetime(2008, 1, 1, 11, 30), location_geo=Location(lat=61.755798, lon=-121.227005), location_gsm=Location(lat=None, lon=None), nbtrace=Location(lat=61.755798, lon=-121.227005), sbtrace=Location(lat=-64.7499007670771, lon=-172.0643415195183), metadata={'clausen_ml_oath': ...}, data_source=DataSource(...)),
 EphemerisData(epoch=datetime.datetime(2008, 1, 1, 11, 31), location_geo=Location(lat=61.755798, lon=-121.227005), location_gsm=Location(lat=None, lon=None), nbtrace=Location(lat=61.755798, lon=-121.227005), sbtrace=Location(lat=-64.74990082859775, lon=-172.06434121158588), metadata={'clausen_ml_oath': ...}, data_source=DataSource(...)),
 EphemerisData(epoch=datetime.datetime(2008, 1, 1, 11, 32), location_geo=Location(lat=61.755798, lon=-121.227005), location_gsm=Location(lat=None, lon=None), nbtrace=Location(lat=61.755798, lon=-121.227005), sbtrace=Location(lat=-64.74990089011834, lon=-172.06434090365335), metadata={'clausen_ml_oath': ...}, data_source=

In [22]:
# Let's print the results of the first 10 records as a table
print(f"{'Timestamp':<{25}}"
      f"{'calgary_apa_ml_v1':<{25}} "
      f"{'confidence':<{18}}")
print("====================================================================")
for i in range(10):
    timestamp_str = s.data[i].epoch.strftime('%Y-%m-%d %H:%M:%S')
    print(f"{timestamp_str:<{25}}"
          f"{s.data[i].metadata['calgary_apa_ml_v1']:<{23}} "
          f"  {s.data[i].metadata['calgary_apa_ml_v1_confidence']:<{18}} ")

Timestamp                calgary_apa_ml_v1         confidence        
2008-01-01 01:11:00      classified as not APA     100.0              
2008-01-01 01:19:00      classified as not APA     100.0              
2008-01-01 01:20:00      classified as not APA     100.0              
2008-01-01 01:22:00      classified as not APA     100.0              
2008-01-01 01:25:00      classified as not APA     100.0              
2008-01-01 01:26:00      classified as not APA     100.0              
2008-01-01 01:27:00      classified as not APA     100.0              
2008-01-01 01:28:00      classified as not APA     100.0              
2008-01-01 01:29:00      classified as not APA     100.0              
2008-01-01 01:30:00      classified as not APA     100.0              


# Search for ephemeris records and filter on the Calgary cloud model

UCalgary has developed a machine learning model for identifying cloud in the THEMIS all-sky imagers. This is a binary classification performed on a 10-minute basis, and included in all THEMIS ASI AuroraX search engine 'ephemeris' records as a metadata field.

Below, we're going to show an example of finding all 1-minute ephemeris records for any THEMIS ASI instrument where this particular model thinks there is cloud in the field-of-view at Gillam, over a 7 day period.

More information about this model can be found [here](https://docs.aurorax.space/ml/models/ucalgary_cloud/).

In [None]:
# set up search parameters
start = datetime.datetime(2008, 1, 1, 0, 0, 0)
end = datetime.datetime(2008, 1, 7, 23, 59, 59)
programs = ["themis-asi"]
platforms = ["gillam"]

# we're going to filter to only retrieve ephemeris data for when
# the 'calgary_cloud_ml_v1' field says 'classified as not cloudy', 
# and has a confidence score greater than or equal to 75%.
metadata_filters_logical_operator = "AND"
metadata_filters = [
          {
            "key": "calgary_cloud_ml_v1",
            "operator": "in",
            "values": [
              "classified as not cloudy"
            ]
          },
          {
            "key": "calgary_cloud_ml_v1_confidence",
            "operator": ">",
            "values": [
              "75"
            ]
          }
        ]

# Perform the search
s = aurorax.search.ephemeris.search(start=start,
                                    end=end,
                                    programs=programs,
                                    platforms=platforms,
                                    metadata_filters_logical_operator=metadata_filters_logical_operator,
                                    metadata_filters=metadata_filters,
                                    verbose=True)

[2024-11-27 22:15:44.030167] Search object created
[2024-11-27 22:15:44.059359] Request submitted
[2024-11-27 22:15:44.059426] Request ID: 417610af-5ebf-40cb-a3f9-bdbeecfe96c2
[2024-11-27 22:15:44.059453] Request details available at: https://api.aurorax.space/api/v1/ephemeris/requests/417610af-5ebf-40cb-a3f9-bdbeecfe96c2
[2024-11-27 22:15:44.059476] Waiting for data ...
[2024-11-27 22:15:45.530094] Checking for data ...
[2024-11-27 22:15:45.953475] Data is now available
[2024-11-27 22:15:45.953686] Retrieving data ...
[2024-11-27 22:15:46.246634] Retrieved 2.1 MB of data containing 597 records


In [17]:
# Again, print some results
print(f"{'Timestamp':<{25}}"
      f"{'Classification':<{30}} "
      f"{'Confidence':<{18}}")
print("====================================================================")
for i in range(10):
    timestamp_str = s.data[i].epoch.strftime('%Y-%m-%d %H:%M:%S')
    print(f"{timestamp_str:<{25}}"
          f"{s.data[i].metadata['calgary_cloud_ml_v1']:<{28}} "
          f"  {s.data[i].metadata['calgary_cloud_ml_v1_confidence']:<{18}} ")

Timestamp                Classification                 Confidence        
2008-01-01 01:11:00      classified as cloudy           79.45              
2008-01-01 01:19:00      classified as cloudy           79.45              
2008-01-01 01:20:00      classified as not cloudy       73.82              
2008-01-01 01:22:00      classified as not cloudy       73.82              
2008-01-01 01:25:00      classified as not cloudy       73.82              
2008-01-01 01:26:00      classified as not cloudy       73.82              
2008-01-01 01:27:00      classified as not cloudy       73.82              
2008-01-01 01:28:00      classified as not cloudy       73.82              
2008-01-01 01:29:00      classified as not cloudy       73.82              
2008-01-01 01:30:00      classified as not cloudy       87.03              


# Search for ephemeris records and filter on the OATH model

The OATH model was developed by [Clausen & Nickisch](https://doi.org/10.1029/2018JA025274), and has been run by the AuroraX team for all THEMIS ASI data in the AuroraX search engine. has developed a machine learning model for identifying cloud in the THEMIS all-sky imagers. 

- Paper: https://doi.org/10.1029/2018JA025274
- More information: http://tid.uio.no/plasma/oath

This paper has two versions of the model, one that is a binary classifier (aurora/no aurora), and one that is a 6-class classifier. AuroraX currently only contains metadata derived from the 6-class model.

The OATH mode is a binary classification performed on a 10-minute basis, and included in all THEMIS ASI AuroraX search engine 'ephemeris' records as a metadata field.

Below, we're going to show an example of finding all 1-minute ephemeris records for any THEMIS ASI instrument where the OATH model has classified the minute as diffuse or discrete aurora, over a 1 day period.

More information about the AuroraX utilization of this model can be found [here](https://docs.aurorax.space/ml/models/clausen_oath/).

In [11]:
# set up search parameters
start = datetime.datetime(2008, 1, 1, 0, 0, 0)
end = datetime.datetime(2008, 1, 1, 23, 59, 59)
programs = ["themis-asi"]

# we're going to filter to only retrieve ephemeris data for when
# the 'clausen_ml_oath' field says 'classified as diffuse' or 
# 'classified as discrete'
metadata_filters_logical_operator = "AND"
metadata_filters = [
          {
            "key": "clausen_ml_oath",
            "operator": "in",
            "values": [
              "classified as diffuse",
              "classified as discrete",
            ]
          }
        ]

# Perform the search
s = aurorax.search.ephemeris.search(start=start,
                                    end=end,
                                    programs=programs,
                                    metadata_filters_logical_operator=metadata_filters_logical_operator,
                                    metadata_filters=metadata_filters,
                                    verbose=True)

[2024-11-27 22:30:29.991861] Search object created
[2024-11-27 22:30:30.016479] Request submitted
[2024-11-27 22:30:30.016520] Request ID: 5c66820d-32c4-49e1-860c-4853cb799188
[2024-11-27 22:30:30.016532] Request details available at: https://api.aurorax.space/api/v1/ephemeris/requests/5c66820d-32c4-49e1-860c-4853cb799188
[2024-11-27 22:30:30.016566] Waiting for data ...
[2024-11-27 22:30:31.444297] Checking for data ...
[2024-11-27 22:30:31.863775] Data is now available
[2024-11-27 22:30:31.863949] Retrieving data ...
[2024-11-27 22:30:32.447506] Retrieved 5.3 MB of data containing 1532 records


In [14]:
# Again, print some results
print(f"{'Timestamp':<{25}}"
      f"{'Classification':<{18}}")
print("=================================================")

for i in range(10):
    timestamp_str = s.data[i].epoch.strftime('%Y-%m-%d %H:%M:%S')
    print(f"{timestamp_str:<{23}}"
          f"  {s.data[i].metadata['clausen_ml_oath']:<{18}} ")

Timestamp                Classification    
2008-01-01 01:11:00      classified as discrete 
2008-01-01 01:19:00      classified as diffuse 
2008-01-01 01:20:00      classified as diffuse 
2008-01-01 01:22:00      classified as discrete 
2008-01-01 01:25:00      classified as diffuse 
2008-01-01 01:26:00      classified as diffuse 
2008-01-01 01:27:00      classified as diffuse 
2008-01-01 01:28:00      classified as diffuse 
2008-01-01 01:29:00      classified as diffuse 
2008-01-01 01:30:00      classified as diffuse 
