Skip to content

Commit

Permalink
unicode anywhere for i18n support
Browse files Browse the repository at this point in the history
  • Loading branch information
wjo1212 committed Jan 3, 2019
1 parent 1ae07db commit 35585a8
Show file tree
Hide file tree
Showing 17 changed files with 216 additions and 169 deletions.
23 changes: 22 additions & 1 deletion aliyun/log/etl_core/etl_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import copy
import six


def cached(fn):
@wraps(fn)
def _wrapped(*args, **kwargs):
Expand Down Expand Up @@ -134,4 +135,24 @@ def _wrapped(self, event, *args, **kwargs):
else:
return fn(self, event, *args, **kwargs)

return _wrapped
return _wrapped


def u(d):
"""
convert string, string container or unicode
:param d:
:return:
"""
if six.PY2:
if isinstance(d, six.binary_type):
return d.decode("utf8", "ignore")
elif isinstance(d, list):
return [u(x) for x in d]
elif isinstance(d, tuple):
return tuple(u(x) for x in d)
elif isinstance(d, dict):
return dict( (u(k), u(v)) for k, v in six.iteritems(d))

return d

6 changes: 3 additions & 3 deletions aliyun/log/etl_core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
class SettingError(LogException):
def __init__(self, ex=None, settings="", msg=""):
if msg and settings:
msg += '\nInvalid Settings "{0}"'.format(settings)
msg += u'\nInvalid Settings "{0}"'.format(settings)
else:
msg = msg or 'Invalid Settings "{0}"'.format(settings or 'unknown')
msg = msg or u'Invalid Settings "{0}"'.format(settings or 'unknown')

super(SettingError, self).__init__('InvalidConfig', '{0}\nDetail: {1}'.format(msg, ex))
super(SettingError, self).__init__('InvalidConfig', u'{0}\nDetail: {1}'.format(msg, ex))
self.settings = settings

12 changes: 7 additions & 5 deletions aliyun/log/etl_core/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@

from .config_parser import ConfigParser
from .exceptions import SettingError
from .etl_util import process_event
from .etl_util import process_event, u

logger = logging.getLogger(__name__)


class Runner(object):
def __init__(self, config_path):
config_path = u(config_path)

if not inspect.ismodule(config_path):
basedir = os.path.dirname(os.path.abspath(config_path))
module_name = os.path.basename(config_path[:-3])
Expand All @@ -21,15 +23,15 @@ def __init__(self, config_path):
try:
md = __import__(module_name)
except ImportError as ex:
logger.error("Cannot import config path: {0}".format(config_path))
raise SettingError(ex, 'Cannot import the config "{0}"'.format(config_path))
logger.error(u"Cannot import config path: {0}".format(config_path))
raise SettingError(ex, u'Cannot import the config "{0}"'.format(config_path))
else:
md = config_path

logger.info("runner: passed module {0} from config file {1}".format(md, config_path))
logger.info(u"runner: passed module {0} from config file {1}".format(md, config_path))

parsed_fn = ConfigParser(md).parse()
logger.info("runner: passed fn list: {0}".format(parsed_fn))
logger.info(u"runner: passed fn list: {0}".format(parsed_fn))

self.fn_list = [fn for no, fn in parsed_fn]

Expand Down
16 changes: 15 additions & 1 deletion aliyun/log/etl_core/trans_comp/trans_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import six

from ..etl_util import u

class trans_comp_base(object):
@property
Expand All @@ -9,6 +9,11 @@ def __name__(self):

@staticmethod
def _n(v):
"""
convert string to utf8 in Py2 or unicode in Py3
:param v:
:return:
"""
if v is None:
return ""

Expand All @@ -23,3 +28,12 @@ def _n(v):
v = v.decode('utf8', "ignore")

return str(v)

@staticmethod
def _u(d):
"""
convert string, string container or unicode
:param d:
:return:
"""
return u(d)
8 changes: 5 additions & 3 deletions aliyun/log/etl_core/trans_comp/trans_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class trans_comp_csv(trans_comp_base):
DEFAULT_QUOTE = '"'

def __init__(self, config, sep=None, quote=None, lstrip=None, restrict=None):
config = self._u(config)
if isinstance(config, (six.text_type, six.binary_type)):
self.keys = self.p_csv_sep.split(config)
elif isinstance(config, Iterable):
Expand All @@ -25,24 +26,25 @@ def __init__(self, config, sep=None, quote=None, lstrip=None, restrict=None):
raise SettingError(settings=config)

self.sep = sep or self.DEFAULT_SEP
self.lstrip = True if lstrip is None else lstrip
self.quote = quote or self.DEFAULT_QUOTE
self.lstrip = True if lstrip is None else lstrip
self.restrict = False if restrict is None else restrict

def __call__(self, event, inpt):
inpt = self._u(inpt)
if inpt in event:
data = event[inpt].split("\n")
ret = list(csv.reader(data, skipinitialspace=self.lstrip, delimiter=self.sep, quotechar=self.quote))[0]
if self.restrict and len(ret) != len(self.keys):
logger.warning(
"event {0} field {1} contains different count of fields as expected key {2} actual {3}".format(
u"event {0} field {1} contains different count of fields as expected key {2} actual {3}".format(
event, inpt, self.keys, ret))
return event

new_event = dict(zip(self.keys, ret))
event.update(new_event)
else:
logger.warning("field {0} doesn't exist in event {1}, skip it".format(inpt, event))
logger.warning(u"field {0} doesn't exist in event {1}, skip it".format(inpt, event))

return event

Expand Down
92 changes: 39 additions & 53 deletions aliyun/log/etl_core/trans_comp/trans_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,28 @@ def trans_comp_json(*args, **kwargs):


class json_transformer(trans_comp_base):
DEFAULT_SEP = '.'
DEFAULT_SEP = u'.'
DEFAULT_FMT = "simple"
DEFAULT_DEPTH = 100
DEFAULT_INCLUDE_NODE = ''
DEFAULT_EXCLUDE_NODE = ''
DEFAULT_INCLUDE_PATH = ''
DEFAULT_EXCLUDE_PATH = ''
DEFAULT_INCLUDE_NODE = u''
DEFAULT_EXCLUDE_NODE = u''
DEFAULT_INCLUDE_PATH = u''
DEFAULT_EXCLUDE_PATH = u''

DEFAULT_FMT_ARRAY = "{parent_rlist[0]}_{index}" # could also be custom formatting string using up to five placehodler: parent_list, parent_list, current, sep, prefix, suffix
DEFAULT_FMT_ARRAY = u"{parent_rlist[0]}_{index}" # could also be custom formatting string using up to five placehodler: parent_list, parent_list, current, sep, prefix, suffix
FMT_MAP = {
"simple": lambda prefix, current, suffix, *args, **kwargs: "{prefix}{current}{suffix}".format(prefix=prefix,
"simple": lambda prefix, current, suffix, *args, **kwargs: u"{prefix}{current}{suffix}".format(prefix=prefix,
current=current,
suffix=suffix),
"full": lambda parent_list, sep, prefix, current, suffix, *args,
**kwargs: "{parent_list_str}{sep}{prefix}{current}{suffix}".format(
**kwargs: u"{parent_list_str}{sep}{prefix}{current}{suffix}".format(
parent_list_str=sep.join(parent_list), current=current, sep=sep, prefix=prefix, suffix=suffix),
"parent": lambda parent_list, sep, prefix, current, suffix, *args,
**kwargs: "{parent}{sep}{prefix}{current}{suffix}".format(parent=parent_list[-1],
**kwargs: u"{parent}{sep}{prefix}{current}{suffix}".format(parent=parent_list[-1],
current=current, sep=sep,
prefix=prefix, suffix=suffix),
"root": lambda parent_list, sep, prefix, current, suffix, *args,
**kwargs: "{parent_list[0]}{sep}{prefix}{current}{suffix}".format(parent_list=parent_list,
**kwargs: u"{parent_list[0]}{sep}{prefix}{current}{suffix}".format(parent_list=parent_list,
current=current, sep=sep,
prefix=prefix, suffix=suffix)
# could also be custom formatting string using up to five placehodler: parent_list, parent_list, current, sep, prefix, suffix
Expand Down Expand Up @@ -85,11 +85,11 @@ def __init__(self, jmes=None, jmes_ignore_none=None, output=None,
self.expand = not jmes or not output

# self.level = level or 1
self.jmes = jmes or ""
self.prefix = "" if prefix is None else prefix
self.suffix = "" if suffix is None else suffix
self.sep = self.DEFAULT_SEP if sep is None else sep
self.output = output or ""
self.jmes = self._u(jmes or "")
self.prefix = self._u("" if prefix is None else prefix)
self.suffix = self._u("" if suffix is None else suffix)
self.sep = self._u(self.DEFAULT_SEP if sep is None else sep)
self.output = self._u(output or "")
self.jmes_filter = None
self.jmes_ignore_none = True if jmes_ignore_none is None else jmes_ignore_none
if jmes:
Expand All @@ -98,15 +98,15 @@ def __init__(self, jmes=None, jmes_ignore_none=None, output=None,
except jmespath.exceptions.ParseError as ex:
raise SettingError(ex=ex, msg="Invalid JMES filter setting", settings=jmes)
elif self.output:
logger.warning("json_transformer: parameter output '{0}' will be ignored as there's no filter is selected."
logger.warning(u"json_transformer: parameter output '{0}' will be ignored as there's no filter is selected."
.format(output))

self.depth = min((depth or self.DEFAULT_DEPTH), self.DEFAULT_DEPTH)
self.include_node = include_node or self.DEFAULT_INCLUDE_NODE
self.exclude_node = exclude_node or self.DEFAULT_EXCLUDE_NODE
self.include_path = include_path or self.DEFAULT_INCLUDE_PATH
self.exclude_path = exclude_path or self.DEFAULT_EXCLUDE_PATH
self.fmt = fmt or self.DEFAULT_FMT
self.include_node = self._u(include_node or self.DEFAULT_INCLUDE_NODE)
self.exclude_node = self._u(exclude_node or self.DEFAULT_EXCLUDE_NODE)
self.include_path = self._u(include_path or self.DEFAULT_INCLUDE_PATH)
self.exclude_path = self._u(exclude_path or self.DEFAULT_EXCLUDE_PATH)
self.fmt = self._u(fmt or self.DEFAULT_FMT)

try:
self.include_node_match = get_re_full_match(self.include_node)
Expand All @@ -117,12 +117,12 @@ def __init__(self, jmes=None, jmes_ignore_none=None, output=None,
raise SettingError(ex=ex, msg="Invalid regex string for include/exclude")

self.expand_array = True if expand_array is None else expand_array
self.format_array = fmt_array or self.DEFAULT_FMT_ARRAY
self.format_array = self._u(fmt_array or self.DEFAULT_FMT_ARRAY)

def _skip_keys(self, key, parent_list):
if (self.include_node and not self.include_node_match(key)) or (
self.exclude_node and self.exclude_node_match(key)):
logger.info("json_transformer: 'key' {0} is not in include keys '{1}' or in exclude keys '{2}', skip it."
logger.info(u"json_transformer: 'key' {0} is not in include keys '{1}' or in exclude keys '{2}', skip it."
.format(key, self.include_node, self.exclude_node))
return True

Expand All @@ -131,15 +131,15 @@ def _skip_keys(self, key, parent_list):
if (self.include_path and not self.include_path_match(path)) or (
self.exclude_path and self.exclude_path_match(path)):
logger.info(
"json_transformer: path '{0}' is not in include path '{1}' or in exclude path '{2}', skip it."
u"json_transformer: path '{0}' is not in include path '{1}' or in exclude path '{2}', skip it."
.format(path, self.include_path, self.exclude_path))
return True

return False

def format_add_kv(self, event, fmt, current, value, parent_list, parent_rlist, sep, prefix, suffix):
if self._skip_keys(current, parent_list):
logger.info("json_transformer: 'key' {0} is not in include keys '{1}' or in exclude keys '{2}', skip it."
logger.info(u"json_transformer: 'key' {0} is not in include keys '{1}' or in exclude keys '{2}', skip it."
.format(current, self.include_node, self.exclude_node))
return

Expand All @@ -157,21 +157,21 @@ def format_add_kv(self, event, fmt, current, value, parent_list, parent_rlist, s
prefix=prefix, suffix=suffix), \
json_transformer._n(value)
except Exception as ex:
logger.info("json_transformer: fail to format with settings: '{0}'".format((fmt, current, value,
logger.info(u"json_transformer: fail to format with settings: '{0}'".format((fmt, current, value,
parent_list, sep, prefix,
suffix)))
elif inspect.isfunction(fmt):
try:
ret = fmt(parent_list, current, value)
except Exception as ex:
logger.info("json_transformer: fail to call formatting string: {0} wuth parameters: {1}"
logger.info(u"json_transformer: fail to call formatting string: {0} wuth parameters: {1}"
.format(fmt, (parent_list, current, value)))

if ret and len(ret) == 2:
k, v = ret
event[json_transformer._n(k)] = json_transformer._n(v)
else:
logger.info("json_transformer: unexpected format result: {0}, fmt: '{1}', k: '{2}', v: '{3}', skip it"
logger.info(u"json_transformer: unexpected format result: {0}, fmt: '{1}', k: '{2}', v: '{3}', skip it"
.format(ret, fmt, current, value))

def _expand_json(self, event, key, value, parent_list, parent_rlist, depth, sep, prefix, suffix):
Expand All @@ -180,7 +180,7 @@ def _expand_json(self, event, key, value, parent_list, parent_rlist, depth, sep,
or (not isinstance(value, (list, tuple, dict))) \
or (isinstance(value, (list, tuple)) and not self.expand_array):
# 1. depth hit, 2. basic type, 3. array but not expand
logger.info("json_transformer: hit stop parsing, key: '{0}', value: '{1}', parent: '{2}', depth: '{3}'"
logger.info(u"json_transformer: hit stop parsing, key: '{0}', value: '{1}', parent: '{2}', depth: '{3}'"
.format(key, value, parent_list, depth))
self.format_add_kv(event, self.fmt, self._n(key), self._n(value), parent_list, parent_rlist, sep, prefix,
suffix)
Expand All @@ -202,7 +202,7 @@ def _expand_json(self, event, key, value, parent_list, parent_rlist, depth, sep,
suffix)

else:
logger.info("json_transformer: skip unsupported message '{0}' of type '{1}' when expanding"
logger.info(u"json_transformer: skip unsupported message '{0}' of type '{1}' when expanding"
.format(value, type(value)))

def _process_message(self, key, value):
Expand All @@ -211,18 +211,18 @@ def _process_message(self, key, value):
try:
value = json.loads(value)
except Exception as ex:
logger.info("json_transformer: fail to load event into json object: {0}, error: {1}".format(value, ex))
logger.info(u"json_transformer: fail to load event into json object: {0}, error: {1}".format(value, ex))
return None

if self.jmes_filter:
try:
value = self.jmes_filter.search(value)
if value is None and self.jmes_ignore_none:
logger.info("split_event_transformer: value {0} get null from jmes settings {1}, skip it".
logger.info(u"split_event_transformer: value {0} get null from jmes settings {1}, skip it".
format(value, self.jmes))
return None
except Exception as ex:
logger.info("split_event_transformer: value {0} with invalid jmes settings {1}, skip it".
logger.info(u"split_event_transformer: value {0} with invalid jmes settings {1}, skip it".
format(value, self.jmes))
return None

Expand All @@ -235,35 +235,21 @@ def _process_message(self, key, value):

return new_event

def extract_json(self, message):
new_event = {}
if isinstance(message, (six.binary_type, six.text_type)):
try:
message = json.loads(message)
except Exception as ex:
logger.info(
"json_transformer: fail to load event into json object: {0}, error: {1}".format(message, ex))
return message

if isinstance(message, dict):
for k, v in six.iteritems(message):
new_event["{0}{1}{2}".format(self.prefix, self._n(k), self.suffix)] = self._n(v)

return new_event

def __call__(self, event, inpt):
inpt = self._u(inpt)

# simple dict mode
if isinstance(inpt, (six.binary_type, six.text_type)):
inpt = [inpt]

if isinstance(inpt, Iterable):
for i in inpt:
if not isinstance(i, (six.binary_type, six.text_type)):
logger.error('trans_comp_lookup: type of input field "{0}" is unknown'.format(i))
logger.error(u'trans_comp_lookup: type of input field "{0}" is unknown'.format(i))
continue

if i not in event:
logger.info('trans_comp_lookup: event "{0}" does not contain field "{1}"'.format(event, i))
logger.info(u'trans_comp_lookup: event "{0}" does not contain field "{1}"'.format(event, i))
continue

# get input value
Expand All @@ -272,8 +258,8 @@ def __call__(self, event, inpt):
event.update(new_event)
else:
logger.info(
'trans_comp_lookup: event "{0}" does not extract value from field "{1}"'.format(event, i))
u'trans_comp_lookup: event "{0}" does not extract value from field "{1}"'.format(event, i))
else:
logger.error("trans_comp_lookup: unknown type of input field {0}".format(inpt))
logger.error(u"trans_comp_lookup: unknown type of input field {0}".format(inpt))

return event

0 comments on commit 35585a8

Please sign in to comment.