Skip to content
Browse files

add directory source/destination to migrator

Change-Id: Iab9715e706fb75ab12950ea4b45459bdc1dab606
  • Loading branch information...
1 parent be93099 commit 85ece85a093ac5077549cc67e7602558517e9648 @kbatten kbatten committed
Showing with 116 additions and 0 deletions.
  1. +7 −0 couchbase/migrator/__init__.py
  2. +109 −0 couchbase/migrator/migrator_dir.py
View
7 couchbase/migrator/__init__.py
@@ -6,6 +6,7 @@
from migrator_json import JSONReader, JSONWriter
from migrator_couchdb import CouchdbReader, CouchdbWriter
from migrator_couchbase import CouchbaseReader, CouchbaseWriter
+from migrator_dir import DirReader, DirWriter
sources = []
destinations = []
@@ -14,11 +15,13 @@
sources.extend(migrator_csv.sources)
sources.extend(migrator_json.sources)
sources.extend(migrator_couchdb.sources)
+sources.extend(migrator_dir.sources)
destinations.extend(migrator_couchbase.destinations)
destinations.extend(migrator_csv.destinations)
destinations.extend(migrator_json.destinations)
destinations.extend(migrator_couchdb.destinations)
+destinations.extend(migrator_dir.destinations)
def reader(loc):
kind, fp = loc.split(':', 1)
@@ -30,6 +33,8 @@ def reader(loc):
return CouchdbReader(fp)
elif kind.lower() == 'couchbase':
return CouchbaseReader(fp)
+ elif kind.lower() == 'dir':
+ return DirReader(fp)
def writer(loc):
kind, fp = loc.split(':', 1)
@@ -41,3 +46,5 @@ def writer(loc):
return CouchdbWriter(fp)
elif kind.lower() == 'couchbase':
return CouchbaseWriter(fp)
+ elif kind.lower() == 'dir':
+ return DirWriter(fp)
View
109 couchbase/migrator/migrator_dir.py
@@ -0,0 +1,109 @@
+# source *.json files in a directory into destination
+
+# source:
+# recurses through subdirectories and reads .json files
+# if the .json has an _id field it will use that, otherwise it will use the filename
+# if the directory contains an _id file, the directory itself will be considered a document and all files (except *.json) will be considered json data
+
+# destination:
+# writes out items that have _id: _design/* to <dir>/design_docs
+# writes out all other items to <dir>/docs
+
+sources=[{'type':'dir','class':'DirReader','example':'dir:<directory>'}]
+destinations=[{'type':'dir','class':'DirWriter','example':'dir:<directory>'}]
+
+import os
+import json
+
+import migrator
+
+class DirReader(migrator.Reader):
+ def __init__(self, source):
+ self.dir = os.path.expanduser(source)
+ self.files = self._get_filenames()
+
+ def __iter__(self):
+ return self
+
+ def _get_filenames(self):
+ filenames = []
+ for root, subFolders, files in os.walk(self.dir):
+ for filename in files:
+ if filename.endswith(".json") or filename == "_id":
+ filepath = os.path.join(root, filename)
+ filenames.append(filepath)
+ return filenames
+
+ def _get_nonjson_filenames(self, base):
+ filenames = []
+ for root, subFolders, files in os.walk(base):
+ for filename in files:
+ if not filename.endswith(".json") and not filename == "_id":
+ filepath = os.path.join(root, filename)
+ filenames.append(filepath)
+ return filenames
+
+ def next(self):
+ try:
+ filename = self.files.pop(0)
+ path = os.path.join(self.dir, filename)
+ except IndexError:
+ raise StopIteration()
+
+ if os.path.basename(filename) == "_id":
+ # entire directory (except *.json) is a document
+ # <dir>/views is converted to <id>/_views
+ doc_basepath = os.path.dirname(filename)
+ with open(filename, 'r') as f:
+ id = f.read().strip('\n\r,')
+ json_data = {}
+ for f in self._get_nonjson_filenames(os.path.dirname(filename)):
+ f_relative_path = f[len(doc_basepath)+1:]
+ if os.path.isfile(os.path.join(doc_basepath,f_relative_path.split(os.path.sep)[0])):
+ # its a plain file
+ with open (f) as item:
+ json_data[f_relative_path.split(os.path.sep)[0]] = item.read().strip('\n\r,')
+ else:
+ # its a subdirectory
+ json_cur = json_data
+ for index in f_relative_path.split(os.path.sep)[:-1]:
+ if not index in json_cur:
+ json_cur[index] = {}
+ json_cur = json_cur[index]
+
+ index = os.path.splitext(f_relative_path.split(os.path.sep)[-1])[0]
+ with open(f) as item:
+ json_cur[index] = item.read().strip('\n\r,')
+ else:
+ with open(filename, 'r') as f:
+ json_data = json.loads(f.read().strip('\n\r,'))
+ if "_id" in json_data:
+ id = json_data["_id"]
+ else:
+ id = os.path.splitext(os.path.basename(filename))[0]
+
+ record = {'id':id}
+ record['value'] = dict((k,v) for (k,v) in json_data.iteritems() if not k.startswith('_'))
+ return record
+
+
+class DirWriter(migrator.Writer):
+ def __init__(self, destination):
+ self.dir = os.path.expanduser(destination)
+ try:
+ os.makedirs(os.path.join(destination,"docs"))
+ os.makedirs(os.path.join(destination,"design_docs"))
+ except OSError as e:
+ pass
+
+ def write(self, record):
+ if os.path.sep in record["id"]:
+ record["value"]["_id"] = record["id"]
+ if record["id"].startswith("_design/"):
+ filename = record["id"][len("_design/"):].replace(os.path.sep, "_")
+ path = os.path.join(self.dir, "design_docs", filename) + ".json"
+ else:
+ filename = record["id"].replace(os.path.sep, "_")
+ path = os.path.join(self.dir, "docs", filename) + ".json"
+ f = open(path, "wb")
+ f.write(json.dumps(record["value"]) + '\n')

0 comments on commit 85ece85

Please sign in to comment.
Something went wrong with that request. Please try again.