# Play with Mapping
This notebook lets us play with the mapping that we can create guide the MappedHD5Ingestor class into producing a document stream (ingesting) from an hdf5 file.

1. Build a projection
A [projection](https://blueskyproject.io/event-model/data-model.html#projections) maps from metadata and fields in a docstream to a datastructure keyed on keys with a known ontology. Ontologies could be a structure like fields needed to recreate a NeXus file, or fields needed to display data in an application, like Splash.

2. Import projections file
For now, we have a projection stored in [projections.py](./projections.py). We're using python instead of json because we could add comments. But as long as you use double quotes instead of single quotes for strings and keys, the format is almost the same. This projection will be added to the start doc when we ingest the file.

3. Build a file
First, let's create a sample hdf5 file. We can modify the fields here and watch them update down below when we generate the docstream.

4. Build a mapping
Let's work on the mapping. You create a file...could be python or json, and provide it to the `ingestor`. The exact mechnanism for doing that has not quite been designed, yet. 

For now, we have a mapping stored in [mapping.py](./mapping.py). We're using python instead of json because we could add comments. But as long as you use double quotes instead of single quotes for strings and keys, the format is almost the same.

5. Import the mapping

In [18]:
import datetime
from importlib import reload
import json
import os
import pytz
import sys
sys.path.append("../..") 
import tempfile
from IPython.utils.tempdir import TemporaryWorkingDirectory
from IPython.display import FileLink
import h5py
import numpy as np
from docstream import MappedHD5Ingestor, MappingNotFoundError
from docstream.model import Mapping

def build_file():
    temp_dir = TemporaryWorkingDirectory()
    local = pytz.timezone("America/Los_Angeles")
    num_frames = 3
    date_data_type = h5py.string_dtype(encoding='ascii')
    image_date = np.empty((num_frames), dtype=object)
    scan_date = np.empty((num_frames), dtype=object)
    data = np.empty((num_frames, 16, 9))
    data_dark = np.empty((1, 16, 9))
    data_white = np.empty((1, 16, 9))

    # three data frames
    for x in range(0, num_frames):
        image_date[x] = (str(local.localize(datetime.datetime.now(), is_dst=None)))
        scan_date[x] = (str(local.localize(datetime.datetime.now(), is_dst=None)))
        data[x, :, :] = np.random.random_sample((16, 9))
    # one dark, one white
    data_dark[0, :, :] = np.random.random_sample((16, 9))
    data_white[0, :, :] = np.random.random_sample((16, 9))


    image_date[x] = (str(local.localize(datetime.datetime.now(), is_dst=None)))
    file = h5py.File(os.path.join(temp_dir.name, 'test.hdf5'), 'w')
    file.create_dataset('/measurement/sample/name', data='nifty sample')
    file.create_dataset('/measurement/instrument/name', data='my station')
    file.create_dataset('/measurement/instrument/source/beamline', data='my beam')
    file.create_dataset('/process/acquisition/image_date', data=image_date, dtype=date_data_type)
    file.create_dataset('/process/acquisition/scan_date', data=scan_date, dtype=date_data_type)
    file.create_dataset('/exchange/data', data=data)
    file.create_dataset('/exchange/dark', data=data_dark)
    file.create_dataset('/process/acquisition/sample_position_x', data=[1.0, 2.0])
    file.close()
    # reopen for reading
    file = h5py.File(os.path.join(temp_dir.name, 'test.hdf5'), 'r')
    yield file
    file.close()

def build_projections():
    projections_dict = {}
    # with open('mapping.json') as json_file:
    #    mapping_dict = json.load(json_file)
    import projections

    reload(projections)
    return projections.projections

def build_mapping():
    mapping_dict = {}
    # with open('mapping.json') as json_file:
    #    mapping_dict = json.load(json_file)
    import mapping
    reload(mapping)
    mapping_dict = mapping.mapping_dict
    # construct a mapping object from dict to validate that we typed it correctly
    return Mapping(**mapping_dict)

## Ingest!
Now we construct an instance of the MappedHD5Ingestor and ask it to generate us a docstream that reads the mapping and provides fields from the file

The root directory 'test_root' variable that can help us find the file based on a configurable root dir. It will be written directly into the resource document.


In [19]:
detailed_output = False

file_gen = build_file()

file = next(build_file())
mapping = build_mapping()
projections = build_projections()

ingestor = MappedHD5Ingestor(mapping, file, 'find_me_in_the_resource_document', projections=projections)

start_doc = {}
stop_doc = {}

# fill up a dictionary to later run a projection from
from databroker.core import BlueskyRun, SingleRunCache
run_cache = SingleRunCache()
try:
    for name, doc in ingestor.generate_docstream():
        run_cache.callback(name, doc)
        if name == "start":
            start_doc = doc
        if detailed_output:
            print("\n\n===============")
            print("Document:  " + name)
            if name == 'event':
                print(repr(doc))
            else:
                print (json.dumps(doc, indent=1))
        else:
            if name == 'start' or name == 'stop':
                doc_str = json.dumps(doc, indent=1)
                print (f"{name}: {doc_str}")
            else:
                print(name)
       
            
except MappingNotFoundError as e:
    print('Indigestion! ' + repr(e))
    
next(file_gen) # cleanup
run = run_cache.retrieve()


start: {
 "uid": "915efd31-d44e-49e2-96cc-f4408b19c619",
 "time": 1601481587.0551686,
 "measurement:sample:name": "nifty sample",
 "measurement:instrument:name": "my station",
 "measurement:instrument:source:beamline": "my beam",
 "projections": [
  {
   "name": "app_display",
   "version": "42.0.0",
   "configuration": {},
   "projection": {
    "/entry/instrument/detector/data": {
     "type": "linked",
     "location": "event",
     "stream": "primary",
     "field": "entry/instrument/detector/data"
    }
   }
  }
 ]
}
resource
descriptor
datum
event
datum
event
datum
event
descriptor
datum
event
stop: {
 "uid": "d72d9c0d-30f3-417e-8ac7-f31b5d0589b7",
 "time": 1601481587.0609992,
 "run_start": "915efd31-d44e-49e2-96cc-f4408b19c619",
 "exit_status": "success",
 "reason": "",
 "num_events": {
  "primary": 3,
  "darks": 1
 }
}


In [20]:

from databroker.projector import project_xarray

xarray = project_xarray(run)

ProjectionError: error projecting field: /entry/instrument/detector/data