In [1]:
from pymongo import MongoClient
from collections import deque
from metadataclient.api import (insert_run_start, insert_descriptor,
                                insert_run_stop, insert_event, bulk_insert_events)
from metadataclient.api import (find_run_starts, find_descriptors, find_events,
                               find_run_stops, find_last)
from metadataclient.conf import connection_config
from urllib3.exceptions import ConnectionError
from metadataclient.api import (insert_run_start, insert_descriptor, insert_event,
                               bulk_insert_events, insert_run_stop)

In [2]:
class MdsMigrationException(RuntimeError):
    """Goodbye cruel world!"""
    pass

# Before this script is run, make sure you create a database called metadataservice

# TODO: Switch to deque from OrderedDict if too slow

**_Ensure global connection management is functional by providing fake host information_**

In [3]:
try:
    MONGO_HOST = 'kusadasi'
    connection_config['host'] = MONGO_HOST 
    res = next(find_last())
except Exception:
    pass

**Connection Management**

In [4]:
MONGO_HOST = 'localhost'
MONGO_PORT = 27017
MIGRATION_DB = 'datastore2'
# assuming both mongo and tornado server on the same machine
connection_config['host'] = MONGO_HOST 
connection_config['port'] = 7770
connection_config['database'] = 'metadataservice'
pymongo_client = MongoClient(MONGO_HOST, MONGO_PORT)
database = pymongo_client[MIGRATION_DB]
# try:
#     res = (next(find_last()))
# except Exception:
#     raise MdsMigrationException('Seems like metadataservice is not accessible or empty')

**Get all run_starts**

In [5]:
print('Database to be migrated', database) # visual sanity check
print('Database data being saved ', connection_config['host'], connection_config['database'])

Database to be migrated Database(MongoClient('localhost', 27017), 'datastore2')
Database data being saved  localhost metadataservice


In [6]:
def rstart_oid_lookup(database, oid):
    try:
        res = next(database.run_start.find({'_id':oid}))
    except StopIteration:
        raise MdsMigrationException('Seems like run_start with this oid does not exist')
    return res

In [7]:
def descriptor_oid_lookup(database, oid):
    try:
        res = next(database.event_descriptor.find({'_id':oid}))
    except StopIteration:
        raise MdsMigrationException('Seems like descriptor with this oid does not exist')
    return res

**Save all run_starts into an OrderedDict()**

**Go over all beamline configurations, delete run_start reference fields, add as a embedded document to run_start, and update entry in run_starts deque()**

In [8]:
rstart_crsr = database.run_start.find()
run_starts = deque()
print(connection_config)
for rstart in rstart_crsr:
    rs_id = rstart.pop('_id') # no leakeage from older version!
    bcfg = database.beamline_config.find({'run_start': rs_id})
    configs = {}
    for b in bcfg:
        del(b['_id'], b['run_start'])
        configs[b['uid']] = b
    tr1 = rstart.pop('time_as_datetime', None)
    tr2 = rstart.pop('beamline_config_id', None)
    params = dict(time=rstart.pop('time'), scan_id=rstart.pop('scan_id'),
                 beamline_id=rstart.pop('beamline_id'), uid=rstart.pop('uid'), 
                 configs= (configs if configs else {}), migrated=True,
                 **rstart)
    run_starts.append(rstart)
    insert_run_start(**params)

{'port': 7770, 'timezone': 'US/Eastern', 'protocol': 'http', 'database': 'metadataservice', 'host': 'localhost'}


#### **Get all descriptors and insert them using metadataclient.Iterate over descriptors, replace run_start oid references with run_start uid foreign keys, and save them to a deque and a dict that is keyed on uids(for caching)**

In [9]:
# TODO: Test this with an older schema
desc_crsr = database.event_descriptor.find()
descriptors = deque()
desc_dict = {}
invalid_descs = deque()
for desc in desc_crsr:
    rstart = rstart_oid_lookup(database, desc.pop('run_start_id'))
    # make sure run_start exists
    descr_id = desc.pop('_id') # clear the leaked oid field 
    desc['run_start'] = rstart['uid'] # overwrite the old foreign key
    desc['migrated'] = True
    tr3 = desc.pop('time_as_datetime', None)
    try:
        insert_descriptor(**desc)
    except ValueError:
        invalid_descs.append(desc)
    desc['_id'] = descr_id
    desc_dict[desc['uid']] = desc
    descriptors.append(desc)

In [10]:
# CSX Specific-- Migration created some None dtype that are consistently numbers
print(len(invalid_descs))
# Fix the None dtype fields in these invalid documents
# Only 7 so it is not that bad
for d in invalid_descs:
    tb_inspec = d['data_keys']
    for k, v in tb_inspec.items():
        if v['dtype'] is None:
            v['dtype'] = 'number' # 

7


**Insert all run_starts and descriptors so far**

In [11]:
# pop descriptor from deque, get all corresponding events
# bulk insert events at hand
# Something is clearly wrong with the way I insert bulk_events. Tests pass but smth below is wrong
# working on it
for d in descriptors:
    event_crsr = database.event.find({'descriptor_id': d.pop('_id')})
    events = deque()
    for e in event_crsr:
        e['descriptor'] = d['uid']
        e['migrated'] = True
        del(e['_id'], e['descriptor_id'])
        events.append(e)
#         print(e)
    print(len(events))
    bulk_insert_events(event_descriptor=d, events=events, validate=False)
        

21
1
1
25
11
31
1
1
1
1
4
1
1354
4
1
1
81
30
2552
4
1
1
1
1
1
61
26
26
2552
4
1
1
11
20
1
11
2250
10
1
1
1
1
1
1
20
1
1051
1
1
26
251
1
26
11
1
26
1
1
51
61
1
81
1
1
535
1
1
1
50
1000
1
1
1
21
20
1
1
1051
1
1
26
11
11
1
2051
1
1
1
1
1
500
1
1
2249
10
1
41
1
1
1
51
26
6683
29
3096
1
28
1
1
75
1051
1
1
61
1
4
1
26
1
1
61
11
11
2
5
1
1
11
11
6
1
1
1
12
1
1
1
5
694
500
1
1
2517
10
1
31
1
1
1
51
26
4
1320
1
1
41
12
41
1
1
1
1250
5
1
1
31
1
1
14
1
30
1
26
20
1
21
2
1086
1
877
3
1
20
1
21
21
251
81
5
21
140
360
1
1
1
1
41
3
1000
1
1
41
1
26
6
21
4
10
1
651
5
1
1
61
26
2550
4
1
31
606
1
1
21
1
600
1
251
21
1
1
201
1
1
1
1
1051
1
1
1
1
1
51
3847
1
4
1
1
51
1
1
53
31
1
1
41
21
10
1
11
1
31
4
1
1
31
60
1
1
20
1
1
4
1
10
1
6105
1200
1
2553
4
1
49
11
4
1
5
1
1
51
449
1
1
11
2552
4
1
1
1
1
4
51
2
1
26
1
25
26
21
851
3
1
1
1
1200
1
4
1051
1
1
4
450
1
1
4
1
1
5078
1000
1
1
11
61
1050
1
1
1
1
2249
10
1
853
3
1
450
1
1
21
26
26
1
20
51
1
1
650
5
1
200
1
19
1
181
1
1
11
1
11
1
16
11
539
1
1
21
1
1
21
6
1

ERROR: Internal Python error in the inspect module.
Below is the traceback from this internal error.


KeyboardInterrupt


In [12]:
rstop_crsr = database.run_stop.find()
rstops = deque()
for rstop in rstop_crsr:
    del(rstop['_id'])
    rstart = rstart_oid_lookup(database, rstop.pop('run_start_id'))
    rstop['run_start'] = rstart['uid']
    rstop['migrated'] = True
    try:
        insert_run_stop(migrated=True, **rstop)
    except RuntimeError:
        print('I caught duplicate run_stops! More than one run_stop per start!')
    

TypeError: insert_run_stop() got multiple values for keyword argument 'migrated'