In [1]:
from pymongo import MongoClient
from collections import deque
from metadataclient.api import (insert_run_start, insert_descriptor,
                                insert_run_stop, insert_event, bulk_insert_events)
from metadataclient.api import (find_run_starts, find_descriptors, find_events,
                               find_run_stops, find_last)
from metadataclient.conf import connection_config
from metadataclient.api import (insert_run_start, insert_descriptor, insert_event,
                               bulk_insert_events, insert_run_stop)

In [2]:
class MdsMigrationException(RuntimeError):
    """Goodbye cruel world!"""
    pass

# Before this script is run, make sure you create a database called metadataservice

# TODO: Switch to deque from OrderedDict if too slow

**_Ensure global connection management is functional by providing fake host information_**

In [None]:
try:
    MONGO_HOST = 'kusadasi'
    connection_config['host'] = MONGO_HOST 
    res = next(find_last())
except Exception:
    pass

**Connection Management**

In [3]:
MONGO_HOST = 'localhost'
MONGO_PORT = 27017
MIGRATION_DB = 'datastore'
# assuming both mongo and tornado server on the same machine
connection_config['host'] = MONGO_HOST 
connection_config['port'] = 7770
connection_config['database'] = 'metadataservice'
pymongo_client = MongoClient(MONGO_HOST, MONGO_PORT)
database = pymongo_client[MIGRATION_DB]
# try:
#     res = (next(find_last()))
# except Exception:
#     raise MdsMigrationException('Seems like metadataservice is not accessible or empty')

**Get all run_starts**

In [4]:
print('Database to be migrated', database) # visual sanity check
print('Database data being saved ', connection_config['host'], connection_config['database'])

Database to be migrated Database(MongoClient('localhost', 27017), 'datastore')
Database data being saved  localhost metadataservice


In [5]:
def rstart_oid_lookup(database, oid):
    try:
        res = next(database.run_start.find({'_id':oid}))
    except StopIteration:
        raise MdsMigrationException('Seems like run_start with this oid does not exist')
    return res

In [6]:
def descriptor_oid_lookup(database, oid):
    try:
        res = next(database.event_descriptor.find({'_id':oid}))
    except StopIteration:
        raise MdsMigrationException('Seems like descriptor with this oid does not exist')
    return res

**Save all run_starts into an OrderedDict()**

**Go over all beamline configurations, delete run_start reference fields, add as a embedded document to run_start, and update entry in run_starts deque()**

In [7]:
rstart_crsr = database.run_start.find()
run_starts = deque()
print(connection_config)
for rstart in rstart_crsr:
    rs_id = rstart.pop('_id') # no leakeage from older version!
    bcfg = database.beamline_config.find({'run_start': rs_id})
    configs = {}
    for b in bcfg:
        del(b['_id'], b['run_start'])
        configs[b['uid']] = b
    tr1 = rstart.pop('time_as_datetime', None)
    tr2 = rstart.pop('beamline_config_id', None)
    params = dict(time=rstart.pop('time'), scan_id=rstart.pop('scan_id'),
                 beamline_id=rstart.pop('beamline_id'), uid=rstart.pop('uid'), 
                 configs= (configs if configs else {}), migrated=True,
                 **rstart)
    run_starts.append(rstart)
    insert_run_start(**params)

{'database': 'metadataservice', 'port': 7770, 'timezone': 'US/Eastern', 'protocol': 'http', 'host': 'localhost'}


#### **Get all descriptors and insert them using metadataclient.Iterate over descriptors, replace run_start oid references with run_start uid foreign keys, and save them to a deque and a dict that is keyed on uids(for caching)**

In [8]:
# TODO: Test this with an older schema
desc_crsr = database.event_descriptor.find()
descriptors = deque()
desc_dict = {}
invalid_descs = deque()
for desc in desc_crsr:
    rstart = rstart_oid_lookup(database, desc.pop('run_start_id'))
    # make sure run_start exists
    descr_id = desc.pop('_id') # clear the leaked oid field 
    desc['run_start'] = rstart['uid'] # overwrite the old foreign key
    desc['migrated'] = True
    tr3 = desc.pop('time_as_datetime', None)
    try:
        insert_descriptor(**desc)
    except ValueError:
        invalid_descs.append(desc)
    desc['_id'] = descr_id
    desc_dict[desc['uid']] = desc
    descriptors.append(desc)

In [9]:
# CSX Specific-- Migration created some None dtype that are consistently numbers
print(len(invalid_descs))
# Fix the None dtype fields in these invalid documents
# Only 7 so it is not that bad
for d in invalid_descs:
    tb_inspec = d['data_keys']
    for k, v in tb_inspec.items():
        if v['dtype'] is None:
            v['dtype'] = 'number' # 

0


**Insert all run_starts and descriptors so far**

In [None]:
# pop descriptor from deque, get all corresponding events
# bulk insert events at hand
# Something is clearly wrong with the way I insert bulk_events. Tests pass but smth below is wrong
# working on it
for d in descriptors:
    event_crsr = database.event.find({'descriptor_id': d.pop('_id')})
    events = deque()
    for e in event_crsr:
        e['descriptor'] = d['uid']
        e['migrated'] = True
        del(e['_id'], e['descriptor_id'])
        events.append(e)
#         print(e)
    bulk_insert_events(event_descriptor=d, events=events, validate=False)
        

In [10]:
rstop_crsr = database.run_stop.find()
rstops = deque()
for rstop in rstop_crsr:
    del(rstop['_id'])
    rstart = rstart_oid_lookup(database, rstop.pop('run_start_id'))
    rstop['run_start'] = rstart['uid']
    rstop['migrated'] = True
    try:
        insert_run_stop(**rstop)
    except RuntimeError:
        print('I caught duplicate run_stops! More than one run_stop per start!')
    