In [1]:
import datetime
import straxen

In [None]:
# Create an arbitrary datetime object to demonstrate time based queries 
dtime = datetime.datetime(2019, 2, 16, 13, 10, 4, 781502)

# Introduction
Processing XENON data requires a large amount of detector parameters, correction information and other metadata not produced in the plugin dependency chain. The goal of CMT2.0 is to offer a way to define a schema and common interface for accessing this data from multiple data sources such as mongodb, pandas dataframes and API servers. The schema definition enforces consistent and uniform data and the common interface prevents hard coding of data access details in the plugins, notebooks and scripts that use the data. This allows analysts to easily switch out the data source for eg testing, development or when a database connection is not available.

## Versioned documents
The scope of CMT has been generalized to include all versioned documents. A collection of versioned documents has at least one index field named `version` and a common schema for all documents. The set of all index fields for each document of a given schema must be unique. By default collections are insert-only, meaning you cannot change the values for an already set index.
All versioned documents in straxen should inherit from `straxen.BaseCorrectionSchema` or one of its subclasses

## Bodega
Bodega (detector parameters) is a perfect example of a collection of versioned documents which share a commong schema. The first step in migrating Bodega to CMT2.0 is defining the schema, this code can be found in `straxen/correction/bodega.py`

```python

import strax
import rframe
import datetime

from .base_corrections import BaseCorrectionSchema

export, __all__ = strax.exporter()


@export
class Bodega(BaseCorrectionSchema):
    '''Detector parameters
       A collection of non-time dependent detector
       values.
    '''
    _NAME = 'bodega'
    
    field: str = rframe.Index()

    value: float
    uncertainty: float
    definition: str
    reference: str
    date: datetime.datetime

```

Notice that we inherit from the `BaseCorrectionSchema` class, so the `version` field is already defined for us. We add an additional index called `field` which will store the field name of the document. The rest of the schema is simply copied from the structure of the bodega json collection. Standard python type hints can be used to enforce the field types. All `pydantic` fields are supported by the framework but a given data storage backend may have some constraints.


### Query interface
Querying a specific datasource can be done using the query api. `Schema.find(datasource, version='v1', field=...)` will return a list of matching documents and `Schema.find_one(datasource, version='v1', field=...)` will return the first match. Each document will be an instance of the schema class. If you do not pass a datasource to the query methods, the default datasource will be queried.

In [None]:
drift_velocities = straxen.Bodega.find(field='drift_velocity')
drift_velocity = drift_velocities[0]

In [None]:
drift_velocity = straxen.Bodega.find_one(field='drift_velocity', version='v1')
drift_velocity

In [None]:
# Document fields can be accessed as attributes
drift_velocity.value

In [None]:
# convert to python dictionary
drift_velocity.dict()

# convert to json string
drift_velocity.json()

## Fax Configs
The WFSim configuration has also been migrated to the CMT2.0 framework in `straxen/corrections/fax.py`, the schema definition is as followes:

```python
import strax
import rframe

from typing import Literal, Union

from .base_corrections import BaseCorrectionSchema

export, __all__ = strax.exporter()



@export
class FaxConfig(BaseCorrectionSchema):
    '''fax configuration values for WFSim
    '''
    _NAME = 'fax_configs'
    class Config:
        smart_union = True
        
    field: str = rframe.Index()
    experiment: Literal['1t','nt','nt_design'] = rframe.Index(default='nt')
    detector: Literal['tpc', 'muon_veto', 'neutron_veto'] = rframe.Index(default='tpc')
    science_run: str = rframe.Index()
    version: str = rframe.Index(default='nt')

    value: Union[int,float,bool,str,list,dict]
    resource: str


```

In this case the documents are also indexed by experiment, detector, and science run.

In [None]:
s2_secondary_sc_gain = straxen.FaxConfig.find_one(field='s2_secondary_sc_gain', version='v0')
s2_secondary_sc_gain.value

In [None]:
straxen.FaxConfig.get_column_fields()

In [None]:
fax_configs = straxen.FaxConfig.find(experiment='nt', version='v0')
fax_configs[:2]

### Resource References
Documents which reference an external resource should inherit from `straxen.ResourceReference` and can define a fmt property which will be used to fetch the resource when the `.load()` method is called.

Example: Position reconstruction models

```python

import strax
import rframe
import datetime
from typing import Literal
from .base_references import ResourceReference

export, __all__ = strax.exporter()


@export
class PosRecModel(ResourceReference):
    _NAME = "posrec_models"
    fmt = 'json'

    kind: Literal['cnn','gcn','mlp'] = rframe.Index()
    time: rframe.Interval[datetime.datetime] = rframe.IntervalIndex()

    value: str


```

Here we use the `typing.Literal` generic to define a field that can only be one of three model types and set it to be an index field.
We also introduce an additional time field which is of type Interval and references the validity interval of the model. Setting it to be an IntervalIndex means that queries will be matched by overlap instead of exact value matching.

In [None]:
model_name = straxen.PosRecModel.find_one(version='v1', time=dtime)

# The value field contains the resource name
model_name.value


In [None]:
# Calling the load method loads the actual model using straxen.get_resource().
model = model_name.load()


# RemoteFrame: pandas/xarray interface
For convenience additional query APIs are implemented inspired for the pandas and xarray packages. Most of these methods return a padnas dataframe with the requested data selection.

In [None]:
# The straxen.cframes namespace holds a collection
# of remote frames for all defined corrections

gains_rf = straxen.cframes.pmt_gains

In [None]:
# List the index names
gains_rf.index.names

### xarray api

In [None]:
# calling the .sel() method returns a pandas
# dataframe with the selection result

df = gains_rf.sel(detector='tpc', version='v1', time=dtime)
df

### pandas api

In [None]:
# pandas style multi-indexing also returns a pandas
# dataframe with the selection result

df = gains_rf.loc['v1',dtime,'tpc',:]
df

#### Scalar lookup

In [None]:
# pandas api

gains_rf.at[('v1',dtime,'tpc',1), 'value']

In [None]:
# simple callable

gains_rf('value', detector='tpc', version='v1', time=dtime, pmt=1)

# Global Versions
The concept of a global version in CMT2.0 is generalized to a collection of versioned documents where each document references one or more documents in another versioned collection. The schema is implemented in `straxen/corrections/global_versions.py` and is as follows:

```python


@export
class GlobalVersion(TimeIntervalCorrection):
    '''A GlobalVersion document references one or 
    more corrections by storing the name and labels required
    to locate the correction in a datasource     
    '''
    _NAME = 'global_versions'

    # arbitrary alias for this reference,
    # this should match the straxen config name
    alias: str = rframe.Index() 

    # the global version
    version: str = rframe.Index()

    # validity interval of the document
    time: rframe.Interval[datetime.datetime] = rframe.IntervalIndex()

    # Name of the correction being referenced
    correction: str

    # The attribute in the correction being referenced e.g `value`
    attribute: str

    # The index labels being referenced, eg pmt=[1,2,3], version='v3' etc.
    labels: dict

    def load(self, datasource=None, **overrides):
        ''' Load the referenced documents
        '''
        labels = dict(self.labels, **overrides)
        if self.correction not in BaseCorrectionSchema._SCHEMAS:
            raise KeyError(f'Reference to undefined schema name {self.correction}')
        schema = BaseCorrectionSchema._SCHEMAS[self.correction]
        return schema.find(datasource, **labels)

    @property
    def url_config(self):
        '''Convert reference to a URLConfig URL
        '''
        url = f'{self.correction}://{self.attribute}'
        url = straxen.URLConfig.format_url_kwargs(url, **self.labels)
        return url

    @property
    def config_dict(self):
        return {self.name: self.url_config}

    @classmethod
    def get_global_config(cls, version, datasource=None, names=None, extra_labels=None):
        '''Build a context config from the given global version.
        '''
        if extra_labels is None:
            extra_labels = dict(run_id='plugin.run_id')
        refs = cls.find(datasource, version=version, alias=names)
        config = {}
        for ref in refs:
            url = ref.url_config
            if extra_labels is not None:
                url = straxen.URLConfig.format_url_kwargs(url, **extra_labels)
            config[ref.alias] = url
        return config

```

In [None]:
import straxen

st = straxen.contexts.xenonnt()
config = straxen.GlobalVersion.get_global_config('v1')

st.set_config(config)