In [None]:
# basics

In [2]:
for i in [1,2,3]:
    print i

1
2
3


In [7]:
for i in [{'a': 'aa', 'b': 'bb'}, {'c': 'cc'}]:
    for item in i.iteritems():
        print item

('a', 'aa')
('b', 'bb')
('c', 'cc')


In [None]:
# samples from RoV

In [8]:
arr = [ {  "register": "1", "folio": "1v", "vtn": "1.002", "image": "heb90021.0002" },
 {  "register": "1", "folio": "2v", "vtn": "1.004", "image": "heb90021.0004" },
 {  "register": "1", "folio": "3v", "vtn": "1.006", "image": "heb90021.0006" }]

In [9]:
arr

[{'folio': '1v', 'image': 'heb90021.0002', 'register': '1', 'vtn': '1.002'},
 {'folio': '2v', 'image': 'heb90021.0004', 'register': '1', 'vtn': '1.004'},
 {'folio': '3v', 'image': 'heb90021.0006', 'register': '1', 'vtn': '1.006'}]

In [27]:
arr[0]

{'folio': '1v', 'image': 'heb90021.0002', 'register': '1', 'vtn': '1.002'}

In [28]:
arr[0]['folio']

'1v'

In [29]:
'1v' in arr[0]['folio']

True

In [None]:
# new object literal

In [37]:
transcripts = {'registers': {}}

In [40]:
transcripts

{'registers': {}}

In [39]:
'1' in transcripts['registers']

False

In [None]:
# proof-of-concept

In [42]:
for i in arr:
    # Init a register
    if i['register'] not in transcripts['registers']:
        transcripts['registers'][i['register']] = {'folios': {}}
    # Init a folio 
    if i['folio'] not in transcripts['registers'][i['register']]['folios']:
        transcripts['registers'][i['register']]['folios'][i['folio']] = {'vtns': {}}
    # Populate folio w/ corresponding transcript info (vtn, image)
    transcripts['registers'][i['register']]['folios'][i['folio']]['vtns'][i['vtn']] = {'image': i['image']}           

In [45]:
transcripts

{'registers': {'1': {'folios': {'1v': {'vtns': {'1.002': {'image': 'heb90021.0002'}}},
    '2v': {'vtns': {'1.004': {'image': 'heb90021.0004'}}},
    '3v': {'vtns': {'1.006': {'image': 'heb90021.0006'}}}}}}}

In [47]:
import pprint
pp = pprint.PrettyPrinter()
pp.pprint(transcripts)

{'registers': {'1': {'folios': {'1v': {'vtns': {'1.002': {'image': 'heb90021.0002'}}},
                                '2v': {'vtns': {'1.004': {'image': 'heb90021.0004'}}},
                                '3v': {'vtns': {'1.006': {'image': 'heb90021.0006'}}}}}}}


In [48]:
# real deal

In [92]:
def rovEntryToJson(entry, transcripts={'registers': {}}):
    
    # Init a register
    if entry['register'] not in transcripts['registers']:
        transcripts['registers'][entry['register']] = {'folios': {}}
    
    # Init a folio 
    if entry['folio'] not in transcripts['registers'][entry['register']]['folios']:
        transcripts['registers'][entry['register']]['folios'][entry['folio']] = {'vtns': {}}
    
    # Populate folio w/ corresponding transcript info (vtn, image)
    transcripts['registers'][entry['register']]['folios'][entry['folio']]['vtns'][entry['vtn']] = {'image': entry['image']}
    
    return transcripts

In [54]:
import ast

In [93]:
transcripts = {'registers': {}}

In [94]:
transcripts

{'registers': {}}

In [95]:
with open('rovTranscripts.txt') as inputfile:
    for line in inputfile.readlines():
        try:
            entry = ast.literal_eval(line[1:-2])
            rovEntryToJson(entry, transcripts)
        except SyntaxError:
            print "Data has been processed."
    inputfile.close()

Data has been processed.


In [None]:
# If there gets to be more, we can throw it into a MongoDB.

In [97]:
pp.pprint(transcripts)

{'registers': {'1': {'folios': {'10v': {'vtns': {'1.02': {'image': 'heb90021.0020'}}},
                                '11v': {'vtns': {'1.022': {'image': 'heb90021.0022'}}},
                                '12v': {'vtns': {'1.024': {'image': 'heb90021.0024'}}},
                                '13v': {'vtns': {'1.026': {'image': 'heb90021.0026'}}},
                                '14v': {'vtns': {'1.028': {'image': 'heb90021.0028'}}},
                                '15v': {'vtns': {'1.03': {'image': ''}}},
                                '16v': {'vtns': {'1.032': {'image': 'heb90021.0031'}}},
                                '17v': {'vtns': {'1.034': {'image': ''}}},
                                '18v': {'vtns': {'1.036': {'image': ''}}},
                                '19v': {'vtns': {'1.038': {'image': ''}}},
                                '1v': {'vtns': {'1.002': {'image': 'heb90021.0002'},
                                                '2.004': {'image': 'heb90021.0114'}}},
  

In [98]:
type(transcripts)

dict

In [None]:
# export as JSON

In [99]:
import json

In [104]:
transcriptsJSON = json.dumps(transcripts, sort_keys=True)

In [114]:
type(transcriptsJSON)

str

In [119]:
with open('rovTranscripts.json', 'w') as outfile:
    json.dump(transcriptsJSON, outfile)

In [None]:
#

In [106]:
transcriptsJSON

'{"registers": {"1": {"folios": {"10v": {"vtns": {"1.02": {"image": "heb90021.0020"}}}, "11v": {"vtns": {"1.022": {"image": "heb90021.0022"}}}, "12v": {"vtns": {"1.024": {"image": "heb90021.0024"}}}, "13v": {"vtns": {"1.026": {"image": "heb90021.0026"}}}, "14v": {"vtns": {"1.028": {"image": "heb90021.0028"}}}, "15v": {"vtns": {"1.03": {"image": ""}}}, "16v": {"vtns": {"1.032": {"image": "heb90021.0031"}}}, "17v": {"vtns": {"1.034": {"image": ""}}}, "18v": {"vtns": {"1.036": {"image": ""}}}, "19v": {"vtns": {"1.038": {"image": ""}}}, "1v": {"vtns": {"1.002": {"image": "heb90021.0002"}, "2.004": {"image": "heb90021.0114"}}}, "20v": {"vtns": {"1.04": {"image": "heb90021.0035"}}}, "21v": {"vtns": {"1.042": {"image": "heb90021.0037"}}}, "22v": {"vtns": {"1.044": {"image": ""}}}, "23v": {"vtns": {"1.046": {"image": ""}}}, "24v": {"vtns": {"1.048": {"image": "heb90021.0039"}}}, "25v": {"vtns": {"1.05": {"image": "heb90021.0041"}}}, "26v": {"vtns": {"1.052": {"image": "heb90021.0043"}}}, "27v"

In [103]:
print json.dumps(transcripts, sort_keys=True, indent=4, separators=(',', ': '))

{
    "registers": {
        "1": {
            "folios": {
                "10v": {
                    "vtns": {
                        "1.02": {
                            "image": "heb90021.0020"
                        }
                    }
                },
                "11v": {
                    "vtns": {
                        "1.022": {
                            "image": "heb90021.0022"
                        }
                    }
                },
                "12v": {
                    "vtns": {
                        "1.024": {
                            "image": "heb90021.0024"
                        }
                    }
                },
                "13v": {
                    "vtns": {
                        "1.026": {
                            "image": "heb90021.0026"
                        }
                    }
                },
                "14v": {
                    "vtns": {
                        "1.028": {
           

In [None]:
# test - reading in JSON file

In [120]:
with open('rovTranscripts.json') as data_file:
    data_loaded = json.load(data_file)

In [121]:
transcriptsJSON == data_loaded

True

In [124]:
type(data_loaded)

unicode