Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Configuration now knows about templates, and command line about a ref…

…ormat

path option, --reformat_path= or -R.
  • Loading branch information...
commit 1e7a16b869e579a6ce349969da3103d42b7f51dc 1 parent 3a8ac26
dim authored
View
8 debian/changelog
@@ -1,3 +1,11 @@
+pgloader (2.2.5-dev) unstable; urgency=low
+
+ * Configuration now supports templates
+ * Command line option for setting --reformat_path, -R
+ *
+
+ -- Dimitri Fontaine <dim@tapoueh.org> Mon, 26 Nov 2007 21:53:11 +0100
+
pgloader (2.2.4) unstable; urgency=low
* Reformat modules to change input on-the-fly
View
56 pgloader.py
@@ -101,6 +101,10 @@ def parse_options():
default = None,
help = "input files encoding")
+ parser.add_option("-R", "--reformat_path", dest = "reformat_path",
+ default = None,
+ help = "PATH where to find reformat python modules")
+
(opts, args) = parser.parse_args()
if opts.version:
@@ -129,10 +133,13 @@ def parse_options():
print "Error: Can't be verbose and quiet at the same time!"
sys.exit(1)
+ # if debug, then verbose
+ if opts.debug:
+ opts.verbose = True
+
pgloader.options.DRY_RUN = opts.dryrun
pgloader.options.DEBUG = opts.debug
- # if debug, then verbose
- pgloader.options.VERBOSE = opts.verbose or opts.debug
+ pgloader.options.VERBOSE = opts.verbose
pgloader.options.QUIET = opts.quiet
pgloader.options.SUMMARY = opts.summary
pgloader.options.PEDANTIC = opts.pedantic
@@ -146,6 +153,9 @@ def parse_options():
pgloader.options.INPUT_ENCODING = opts.encoding
+ if opts.reformat_path:
+ pgloader.options.REFORMAT_PATH = opts.reformat_path
+
return opts.config, args
def parse_config(conffile):
@@ -227,19 +237,10 @@ def parse_config(conffile):
config.get(section, 'empty_string'))
if config.has_option(section, 'reformat_path'):
- import os.path
- reformat_path = []
- tmp_rpath = config.get(section, 'reformat_path')
-
- for p in tmp_rpath.split(':'):
- if os.path.exists(p):
- reformat_path.append(p)
- else:
- print 'Error: reformat_path %s does not exists, ignored'%p
-
- pgloader.options.REFORMAT_PATH = reformat_path
- else:
- pgloader.reformat_path = None
+ # command line value is prefered to config format one
+ if not pgloader.options.REFORMAT_PATH:
+ rpath = config.get(section, 'reformat_path')
+ pgloader.options.REFORMAT_PATH = rpath
except Exception, error:
print "Error: Could not initialize PostgreSQL connection:"
@@ -361,6 +362,25 @@ def load_data():
# now init db connection
config, dbconn = parse_config(conffile)
+ from pgloader.tools import read_path, check_path
+ from pgloader.options import VERBOSE
+ import pgloader.options
+ rpath = read_path(pgloader.options.REFORMAT_PATH, check = False)
+ crpath = check_path(rpath, VERBOSE)
+
+ if not crpath:
+ # don't check same path entries twice
+ default_rpath = set(crpath) \
+ - set(pgloader.options.DEFAULT_REFORMAT_PATH)
+
+ pgloader.options.REFORMAT_PATH = check_path(default_rpath, VERBOSE)
+ else:
+ pgloader.options.REFORMAT_PATH = rpath
+
+ if VERBOSE:
+ print 'Notice: Reformat path is', pgloader.options.REFORMAT_PATH
+ print
+
# load some pgloader package modules
from pgloader.options import VERBOSE, DEBUG, QUIET, SUMMARY
from pgloader.options import DRY_RUN, PEDANTIC, VACUUM
@@ -392,11 +412,17 @@ def load_data():
sections.sort()
for s in sections:
try:
+ if VERBOSE:
+ print
+
pgloader = PGLoader(s, config, dbconn)
if not pgloader.template:
pgloader.run()
summary[s] = (pgloader.table,) + pgloader.summary()
+ else:
+ if VERBOSE:
+ print "Skipping section %s, which is a template" % s
except PGLoader_Error, e:
if e == '':
View
3  pgloader/__init__.py
@@ -0,0 +1,3 @@
+"""
+pgloader package, offering modules to implement pgloader.
+"""
View
5 pgloader/options.py
@@ -2,7 +2,7 @@
#
# Some common options, for each module to get them
-PGLOADER_VERSION = '2.2.4'
+PGLOADER_VERSION = '2.2.5-devel'
INPUT_ENCODING = None
PG_CLIENT_ENCODING = 'latin9'
@@ -32,4 +32,5 @@
UDC_PREFIX = 'udc_'
-REFORMAT_PATH = ['/usr/share/pgloader/reformat']
+REFORMAT_PATH = None
+DEFAULT_REFORMAT_PATH = ['/usr/share/pgloader/reformat']
View
81 pgloader/pgloader.py
@@ -54,11 +54,9 @@ def __init__(self, name, config, db):
# just skip it here
if VERBOSE:
- print
- print "[%s] skip template configuration" % self.name
+ print "[%s] is a template" % self.name
if not self.template and VERBOSE:
- print
print "[%s] parse configuration" % self.name
if not self.template:
@@ -83,11 +81,17 @@ def __init__(self, name, config, db):
# now load specific configuration
if VERBOSE:
- print
print "Reading configuration from section [%s]" % name
self.__read_conf__(name, config, db)
+ # force reinit of self.reader, which depends on template and
+ # specific options
+ if 'reader' in self.__dict__:
+ self.reader.__init__(self.db, self.reject,
+ self.filename, self.input_encoding,
+ self.table, self.columns)
+
if DEBUG:
print '%s init done' % name
print
@@ -181,11 +185,15 @@ def __read_conf__(self, name, config, db):
print 'columns', self.columns
print 'blob_columns', self.blob_cols
- if self.name == name and not self.columns:
- print 'Error: %s has no columns defined' % name
- self.config_errors += 1
+ if self.columns is None:
+ if not self.template:
+ print 'Error: %s has no columns defined' % name
+ self.config_errors += 1
- self.columns = []
+ else:
+ # non critical error, and code thereafter wants to use
+ # self.columns as a list
+ self.columns = []
##
# The config section can also provide user-defined colums
@@ -331,9 +339,7 @@ def __read_conf__(self, name, config, db):
self.columnlist = [n for (n, pos) in self.columns]
if DEBUG:
- #print "columns", self.columns
print "only_cols", self.only_cols
- #print "udcs", self.udcs
print "columnlist", self.columnlist
##
@@ -364,47 +370,28 @@ def __read_conf__(self, name, config, db):
if config.has_option(name, 'format'):
self.format = config.get(name, 'format')
- if 'reader' not in self.__dict__:
- if DEBUG:
- print 'READER INIT'
-
- if self.format.lower() == 'csv':
- from csvreader import CSVReader
- self.reader = CSVReader(self.db, self.reject,
- self.filename,
- self.input_encoding,
- self.table, self.columns)
-
- elif self.format.lower() == 'text':
- from textreader import TextReader
- self.reader = TextReader(self.db, self.reject,
- self.filename,
- self.input_encoding,
- self.table, self.columns,
- self.newline_escapes)
-
- self.reader.readconfig(name, config)
+ if self.format.lower() == 'csv':
+ from csvreader import CSVReader
+ self.reader = CSVReader(self.db, self.reject,
+ self.filename, self.input_encoding,
+ self.table, self.columns)
+
+ elif self.format.lower() == 'text':
+ from textreader import TextReader
+ self.reader = TextReader(self.db, self.reject,
+ self.filename, self.input_encoding,
+ self.table, self.columns,
+ self.newline_escapes)
+
+ if 'reader' in self.__dict__:
+ if DEBUG:
+ print 'reader.readconfig()'
+ self.reader.readconfig(name, config)
if not self.template and self.format is None:
# error only when not loading the Template part
print 'Error: %s: format parameter needed' % name
raise PGLoader_Error
- else:
- if DEBUG:
- print 'MANUAL REINIT OF READER'
- self.reader.reject = self.reject
- self.reader.filename = self.filename
- self.reader.input_encoding = self.input_encoding
- self.reader.newline_escapes = self.newline_escapes
- self.reader.readconfig(name, config)
-
- print 'BLURPS', self.reader.trailing_sep
-
-## ##
-## # parse the reader specific section options
-## if not self.template:
-## self.reader.readconfig(name, config)
-## print 'BLURPS', self.reader.trailing_sep
##
# Some column might need reformating
@@ -444,8 +431,10 @@ def __read_conf__(self, name, config, db):
print 'Error: %s failed to import reformat module "%s"' \
% (name, r_module)
print ' from %s' % str(REFORMAT_PATH)
+ print ' %s' % e
self.config_errors += 1
+
if module:
if r_function in module.__dict__:
self.reformat.append((r_colname,
View
10 pgloader/reader.py
@@ -22,6 +22,9 @@ class DataReader:
def __init__(self, db, reject, filename, input_encoding, table, columns):
""" init internal variables """
+ if DEBUG:
+ print 'reader __init__', filename, table, columns
+
self.db = db
self.filename = filename
self.input_encoding = input_encoding
@@ -32,7 +35,7 @@ def __init__(self, db, reject, filename, input_encoding, table, columns):
if self.input_encoding is None:
if INPUT_ENCODING is not None:
self.input_encoding = INPUT_ENCODING
-
+
def readconfig(self, name, config):
""" read configuration section for common options
@@ -67,8 +70,9 @@ def readconfig(self, name, config):
self.db.copy_sep = self.field_sep
if DEBUG and not DRY_RUN:
- print "null: '%s'" % self.db.null
- print "empty_string: '%s'" % self.db.empty_string
+ print "reader.readconfig null: '%s'" % self.db.null
+ print "reader.readconfig empty_string: '%s'" \
+ % self.db.empty_string
def readlines(self):
""" read data from configured file, and generate (yields) for
View
21 pgloader/textreader.py
@@ -31,28 +31,39 @@ class TextReader(DataReader):
"""
def __init__(self, db, reject, filename, input_encoding,
- table, columns, newline_escapes):
+ table, columns, newline_escapes = None):
""" init textreader with a newline_escapes parameter """
DataReader.__init__(self, db, reject,
filename, input_encoding, table, columns)
- self.newline_escapes = newline_escapes
-
+ if 'newline_escapes' not in self.__dict__:
+ self.newline_escapes = newline_escapes
def readconfig(self, name, config):
""" get this reader module configuration from config file """
DataReader.readconfig(self, name, config)
+ # this will be called twice if templates are in used, so we
+ # have to protect ourselves against removing already read
+ # configurations while in second run.
+
# optionnal number of columns per line
- self.field_count = None
+ if 'field_count' not in self.__dict__:
+ self.field_count = None
+
if config.has_option(name, 'field_count'):
self.field_count = config.getint(name, 'field_count')
# optionnal trailing separator option
- self.trailing_sep = False
+ if 'trailing_sep' not in self.__dict__:
+ self.trailing_sep = False
+
if config.has_option(name, 'trailing_sep'):
self.trailing_sep = config.get(name, 'trailing_sep') == 'True'
+ if DEBUG:
+ print 'reader.readconfig: field_count', self.field_count
+ print 'reader.readconfig: trailing_sep', self.trailing_sep
def readlines(self):
""" read data from configured file, and generate (yields) for
View
31 pgloader/tools.py
@@ -116,3 +116,34 @@ def parse_config_string(str):
+def read_path(strpath, verbose = False, path = [], check = True):
+ """ read a path configuration element, discarding non-existing entries """
+ import os.path
+
+ for p in strpath.split(':'):
+ path.append(p)
+
+ if check:
+ return check_path(path, verbose)
+ else:
+ return path
+
+def check_path(path, verbose = False):
+ """ removes non existant and non {directories, symlink} entries from path
+ """
+ path_ok = []
+
+ for p in path:
+ if os.path.exists(p):
+ if os.path.isdir(p) or \
+ (os.path.islink(p) and os.path.isdir(os.path.realpath(p))):
+ path_ok.append(p)
+ else:
+ if verbose:
+ print "Warning: path entry '%s' " % p + \
+ "is not a directory or does not link to a directory"
+ else:
+ if verbose:
+ print "Warning: path entry '%s' does not exists, ignored" % p
+
+ return path_ok
View
5 reformat/mysql.py
@@ -2,7 +2,6 @@
#
# pgloader mysql reformating module
#
-from pgloader.tools import PGLoader_Error
def timestamp(reject, input):
""" Reformat str as a PostgreSQL timestamp
@@ -12,7 +11,7 @@ def timestamp(reject, input):
"""
if len(input) != 14:
e = "MySQL timestamp reformat input too short: %s" % input
- raise PGLoader_Error, e
+ reject.log(e, input)
year = input[0:4]
month = input[4:6]
@@ -21,4 +20,4 @@ def timestamp(reject, input):
minute = input[10:12]
seconds = input[12:14]
- return '%s-%s-%s %s:%s:%s' % (year, month, day, hour, month, seconds)
+ return '%s-%s-%s %s:%s:%s' % (year, month, day, hour, minute, seconds)

0 comments on commit 1e7a16b

Please sign in to comment.
Something went wrong with that request. Please try again.