Browse files

initial public commit

  • Loading branch information...
0 parents commit e6aa674eb9f9b06ec400454a6c51fe4a93fc91ad straup committed Aug 10, 2012
Showing with 428 additions and 0 deletions.
  1. +8 −0 README.md
  2. +75 −0 bin/csv-to-json-files.py
  3. +111 −0 bin/generate-glossary.py
  4. +119 −0 bin/generate-meta.py
  5. +70 −0 bin/publish-glossary.py
  6. +45 −0 bin/utils.py
8 README.md
@@ -0,0 +1,8 @@
+collection-tools
+==
+
+Tools for working with museum collections.
+
+They are derived from tools originally written for the [Cooper-Hewitt metadata collections data](https://github.com/cooperhewitt/collection) but should be general purpose enough to be applied in other contexts.
+
+They are still a work-in-progress. In particular their lack of documentation.
75 bin/csv-to-json-files.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+# I think there might still be some weirdness in the UTF-8 wrangling
+# (20120225/straup)
+
+"""
+Export the contents of objects.csv in to individual JSON files.
+"""
+
+import sys
+import json
+import csv
+import utils
+import os
+import os.path
+
+class UnicodeCsvReader(object):
+ def __init__(self, f, encoding="utf-8", **kwargs):
+ self.csv_reader = csv.reader(f, **kwargs)
+ self.encoding = encoding
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ # read and split the csv row into fields
+ row = self.csv_reader.next()
+ # now decode
+
+ try:
+ return [unicode(cell, self.encoding) for cell in row]
+ except Exception, e:
+ return []
+
+ @property
+ def line_num(self):
+ return self.csv_reader.line_num
+
+class UnicodeDictReader(csv.DictReader):
+ def __init__(self, f, encoding="utf-8", fieldnames=None, **kwds):
+ csv.DictReader.__init__(self, f, fieldnames=fieldnames, **kwds)
+ self.reader = UnicodeCsvReader(f, encoding=encoding, **kwds)
+
+
+if __name__ == '__main__':
+
+ whoami = os.path.abspath(sys.argv[0])
+ bindir = os.path.dirname(whoami)
+ collection = os.path.dirname(bindir)
+
+ objects = os.path.join(collection, 'objects')
+
+ obj_csv = os.path.join(collection, 'objects.csv')
+ obj_fh = open(obj_csv, 'r')
+
+ reader = UnicodeDictReader(obj_fh)
+
+ for row in reader:
+
+ fname = "%s.json" % row['id']
+
+ root = utils.id2path(row['id'])
+ root = os.path.join(objects, root)
+
+ out = os.path.join(root, fname)
+ print out
+ continue
+
+ if not os.path.exists(root):
+ os.makedirs(root)
+
+ out_fh = open(out, 'w')
+ json.dump(row, out_fh, indent=2)
+
+ print out
111 bin/generate-glossary.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import os.path
+import json
+import types
+import utils
+
+def crawl(root) :
+
+ glossary = {}
+
+ for root, dirs, files in os.walk(root):
+
+ for f in files:
+
+ if not f.endswith(".json") :
+ continue
+
+ path = os.path.join(root, f)
+ path = os.path.abspath(path)
+
+ fh = open(path, 'r')
+ data = json.load(fh)
+
+ munge(glossary, data)
+
+ return glossary
+
+def munge(glossary, thing, prefix=None):
+
+ if type(thing) == types.DictType:
+
+ for k, v in thing.items():
+
+ label = k
+
+ if prefix:
+ label = "%s.%s" % (prefix, label)
+
+ if type(v) == types.DictType:
+
+ add_key(glossary, label)
+ munge(glossary, v, label)
+
+ elif type(v) == types.ListType:
+
+ add_key(glossary, label)
+ munge(glossary, v, label)
+
+ else:
+
+ add_key(glossary, label)
+
+ elif type(thing) == types.ListType:
+
+ for stuff in thing:
+ munge(glossary, stuff, prefix)
+
+ else:
+ pass
+
+def add_key(glossary, key):
+
+ if glossary.get(key, False):
+ return
+
+ glossary[key] = {
+ "description": "",
+ "notes": [],
+ "sameas": []
+ }
+
+if __name__ == '__main__':
+
+ import optparse
+
+ parser = optparse.OptionParser(usage="python generate-glossary.py --options")
+
+ parser.add_option('--objects', dest='objects',
+ help='The path to your collection objects',
+ action='store')
+
+ parser.add_option('--glossary', dest='glossary',
+ help='The path where your new glossary file should be written',
+ action='store')
+
+ options, args = parser.parse_args()
+
+ #
+
+ old_glossary = None
+
+ if os.path.exists(options.glossary):
+ fh = open(options.glossary, 'r')
+ old_glossary = json.load(fh)
+ fh.close()
+
+ #
+
+ new_glossary = crawl(options.objects)
+
+ if old_glossary:
+ new_glossary = dict(new_glossary.items() + old_glossary.items())
+
+ #
+
+ fh = open(options.glossary, 'w')
+ json.dump(new_glossary, fh, indent=2)
+ fh.close()
119 bin/generate-meta.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import os.path
+import json
+import utils
+
+import logging
+
+def update_bucket(options, bucket, object_path):
+
+ # sudo put me in utils.py
+
+ bucket = utils.clean_meta_name(bucket)
+
+ object_root = os.path.abspath(options.objects)
+ object_path = os.path.abspath(object_path)
+
+ object_path = object_path.replace(object_root + "/", "")
+
+ bucket_name = "%s.txt" % bucket
+ bucket_path = os.path.join(options.meta, bucket_name)
+
+ logging.debug("%s %s" % (bucket, object_path))
+
+ fh = open(bucket_path, "a")
+ fh.write(object_path + "\n")
+ fh.close()
+
+def generate_meta(options):
+
+ categories = (
+ 'culture',
+ 'dynasty',
+ 'movement',
+ 'period',
+ 'region',
+ 'school',
+ 'style'
+ )
+
+ index = {}
+
+ for category in categories:
+ index[category] = {}
+
+ for root, dirs, files in os.walk(options.objects):
+
+ for f in files:
+
+ if not f.endswith(".json") :
+ continue
+
+ path = os.path.join(root, f)
+ path = os.path.abspath(path)
+
+ logging.debug("generate meta for %s" % path)
+
+ fh = open(path, 'r')
+ data = json.load(fh)
+
+ for category in categories:
+
+ if data.get(category, False):
+
+ subject = data[category]
+ bucket = "%s.%s" % (category, subject)
+
+ # update_bucket(options, bucket, path)
+
+ bucket_name = utils.clean_meta_name(bucket) + ".txt"
+ bucket_path = os.path.join(options.meta, bucket_name)
+
+ index[category][subject] = bucket_path
+
+ if options.index:
+
+ fh = open(options.index, 'w')
+ json.dump(index, fh, indent=2)
+ fh.close()
+
+ logging.info("created meta data index at %s" % options.index)
+
+
+if __name__ == '__main__':
+
+ import optparse
+
+ parser = optparse.OptionParser(usage="python generate-meta.py --options")
+
+ parser.add_option('--objects', dest='objects',
+ help='The path to your collection objects (folder)',
+ action='store')
+
+ parser.add_option('--meta', dest='meta',
+ help='The path to your meta data (folder)',
+ action='store', default=None)
+
+ parser.add_option('--index', dest='index',
+ help='The path to create an index of the meta files',
+ action='store', default=None)
+
+ parser.add_option('--debug', dest='debug',
+ help='Enable debug logging',
+ action='store_true', default=False)
+
+ options, args = parser.parse_args()
+
+ if options.debug:
+ logging.basicConfig(level=logging.DEBUG)
+ else:
+ logging.basicConfig(level=logging.INFO)
+
+
+ generate_meta(options)
+ logging.info("done");
+
+ sys.exit()
70 bin/publish-glossary.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+import sys
+import json
+
+if __name__ == '__main__':
+
+ import optparse
+
+ parser = optparse.OptionParser(usage="python generate-glossary.py --options")
+
+ parser.add_option('--glossary', dest='glossary',
+ help='The path where your new glossary file should be written',
+ action='store')
+
+ parser.add_option('--markdown', dest='markdown',
+ help='The path to your collection objects',
+ action='store', default=None)
+
+ options, args = parser.parse_args()
+
+ fh = open(options.glossary, 'r')
+ glossary = json.load(fh)
+ fh.close()
+
+ keys = glossary.keys()
+ keys.sort()
+
+ if options.markdown:
+ fh = open(options.markdown, 'w')
+ else:
+ fh = sys.stdout
+
+ fh.write("_This file was generated programmatically using the `%s` document._\n" % options.glossary)
+ fh.write("\n")
+
+ for k in keys:
+
+ details = glossary[k]
+
+ fh.write("%s\n" % k)
+ fh.write("==\n")
+ fh.write("\n")
+
+ if details['description'] != '':
+ fh.write("_%s_\n" % details['description'])
+ fh.write("\n")
+
+ if len(details['notes']):
+
+ fh.write("notes\n")
+ fh.write("--\n")
+
+ for n in details['notes']:
+ fh.write("* %s\n" % n)
+ fh.write("\n")
+
+ if len(details['sameas']):
+
+ fh.write("same as\n")
+ fh.write("--\n")
+
+ for other in details['sameas']:
+ fh.write("* %s\n" % other)
+ fh.write("\n")
+
+ if options.markdown:
+ fh.close()
+
+ sys.exit()
45 bin/utils.py
@@ -0,0 +1,45 @@
+import pprint
+import os.path
+import string
+import unicodedata
+
+def dumper(data):
+ print pprint.pformat(data)
+
+def id2path(id):
+
+ tmp = str(id)
+ parts = []
+
+ while len(tmp) > 3:
+ parts.append(tmp[0:3])
+ tmp = tmp[3:]
+
+ if len(tmp):
+ parts.append(tmp)
+
+ return os.path.join(*parts)
+
+def clean_meta_name(name, allow_punctuation=[]):
+
+ name = name.strip()
+ name = name.lower()
+
+ name = remove_accents(name)
+
+ for c in string.punctuation:
+
+ if c in allow_punctuation:
+ continue
+
+ name = name.replace(c, "")
+
+ name = name.replace(" ", "-")
+ name = name.replace("--", "-")
+
+ return name
+
+def remove_accents(input_str):
+ nkfd_form = unicodedata.normalize('NFKD', unicode(input_str))
+ only_ascii = nkfd_form.encode('ASCII', 'ignore')
+ return only_ascii

0 comments on commit e6aa674

Please sign in to comment.