Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/config files - basic/3 connector-ldap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ all_users_filter: "(&(objectClass=user)(objectCategory=person)(!(userAccountCont
# or this one for OpenLDAP: "(&(|(objectClass=groupOfNames)(objectClass=posixGroup))(cn={group}))"
group_filter_format: "(&(|(objectCategory=group)(objectClass=groupOfNames)(objectClass=posixGroup))(cn={group}))"

# (optional) string_encoding (default value given below)
# string_encoding specifies the Unicode string encoding used by the directory.
# All values retrieved from the directory are converted to Unicode before being
# sent to or compared with values on the Adobe side, to avoid encoding issues.
# The value must be a Python codec name or alias, such as 'latin1' or 'utf-8.
# See https://docs.python.org/2/library/codecs.html#standard-encodings for details.
#string_encoding: utf-8

# (optional) user_identity_type_format (no default)
# user_identity_type_format specifies how to construct a user's desired identity
# type on the Adobe side by combining constant strings with attribute values.
Expand All @@ -86,6 +94,8 @@ group_filter_format: "(&(|(objectCategory=group)(objectClass=groupOfNames)(objec
# The default value used here is simple, and suitable for OpenLDAP systems. If you
# are using a non-email-aware AD system, which holds the username separately
# from the domain name, you may want: "{sAMAccountName}@mydomain.com"
# NOTE: for this and every format setting, the constant strings must be in
# the encoding specified by the string_encoding setting, above.
user_email_format: "{mail}"

# (optional) user_domain_format (no default value)
Expand Down
8 changes: 8 additions & 0 deletions examples/config files - basic/4 connector-csv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
# To set it to a specific value, uncomment this setting:
#delimiter: ","

# (optional) string_encoding (default value given below)
# string_encoding specifies the Unicode string encoding used in the CSV file.
# All values retrieved from the file are converted to Unicode before being
# sent to or compared with values on the Adobe side, to avoid encoding issues.
# The value must be a Python codec name or alias, such as 'latin1' or 'utf-8.
# See https://docs.python.org/2/library/codecs.html#standard-encodings for details.
#string_encoding: utf-8

# (optional) email_column_name (default "email")
# The column name that contains the user's email address.
# Values in this column must be valid, unquoted email addresses.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
'pycrypto',
'python-ldap==2.4.25',
'PyYAML',
'umapi-client>=2.3',
'umapi-client>=2.4.1',
'psutil',
'keyring'
],
Expand Down
11 changes: 9 additions & 2 deletions user_sync/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
import user_sync.config
import user_sync.connector.directory
import user_sync.connector.umapi
from user_sync.error import AssertionException
import user_sync.lockfile
import user_sync.rules
from user_sync.error import AssertionException
from user_sync.version import __version__ as APP_VERSION

LOG_STRING_FORMAT = '%(asctime)s %(process)d %(levelname)s %(name)s - %(message)s'
Expand Down Expand Up @@ -85,6 +85,12 @@ def process_args():
"When using this option, you must also specify what you want done with Adobe-only "
"users by also including --adobe-only-user-action and one of its arguments",
metavar='input_path', dest='stray_list_input_path')
parser.add_argument('--config-file-encoding',
help="config files are expected to contain only ASCII characters; if you "
"use an extended character set (e.g., to specify group names), then "
"specify the encoding of your configuration files with this argument. "
"All encoding names understood by Python are allowed.",
dest='encoding_name', default='ascii')
return parser.parse_args()


Expand Down Expand Up @@ -137,7 +143,7 @@ def init_log(logging_config):
fileHandler.setLevel(file_log_level)
fileHandler.setFormatter(logging.Formatter(LOG_STRING_FORMAT, LOG_DATE_FORMAT))
logging.getLogger().addHandler(fileHandler)
if (unknown_file_log_level == True):
if unknown_file_log_level:
logger.log(logging.WARNING, 'Unknown file log level: %s setting to info' % options['file_log_level'])


Expand Down Expand Up @@ -200,6 +206,7 @@ def begin_work(config_loader):
def create_config_loader(args):
config_bootstrap_options = {
'main_config_filename': args.config_filename,
'config_file_encoding': args.encoding_name,
}
config_loader = user_sync.config.ConfigLoader(config_bootstrap_options)
return config_loader
Expand Down
40 changes: 26 additions & 14 deletions user_sync/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import codecs
import logging
import os
import re
Expand All @@ -41,6 +42,7 @@ def __init__(self, caller_options):
self.options = options = {
# these are in alphabetical order! Always add new ones that way!
'delete_strays': False,
'config_file_encoding': 'ascii',
'directory_connector_module_name': None,
'directory_connector_overridden_options': None,
'directory_group_filter': None,
Expand All @@ -56,14 +58,15 @@ def __init__(self, caller_options):
'update_user_info': True,
'username_filter_regex': None,
}
options.update(caller_options)

options.update(caller_options)
main_config_filename = options.get('main_config_filename')
config_encoding = options['config_file_encoding']
try:
codecs.lookup(config_encoding)
except LookupError:
raise AssertionException("Unknown encoding '%s' specified with --config-file-encoding" % config_encoding)
ConfigFileLoader.config_encoding = config_encoding
main_config_content = ConfigFileLoader.load_root_config(main_config_filename)

if (not os.path.isfile(main_config_filename)):
raise AssertionException('Config file does not exist: %s' % (main_config_filename))

self.logger = logger = logging.getLogger('config')
logger.info("Using main config file: %s", main_config_filename)
self.main_config = DictConfig("<%s>" % main_config_filename, main_config_content)
Expand Down Expand Up @@ -606,6 +609,10 @@ class ConfigFileLoader:
'''
Loads config files and does pathname expansion on settings that refer to files or directories
'''
# config files can contain Unicode characters, so an encoding for them
# can be specified as a command line argument. This defaults to ascii.
config_encoding = 'ascii'

# key_paths in the root configuration file that should have filename values
# mapped to their value options. See load_from_yaml for the option meanings.
ROOT_CONFIG_PATH_KEYS = {'/adobe_users/connectors/umapi': (True, True, None),
Expand Down Expand Up @@ -680,9 +687,11 @@ def load_from_yaml(cls, filename, path_keys):
cmd = filename[3:-1]
try:
bytes = subprocess.check_output(cmd, cwd=dir, shell=True)
yml = yaml.load(bytes)
yml = yaml.load(bytes.decode(cls.config_encoding, 'strict'))
except subprocess.CalledProcessError as e:
raise AssertionException("Error executing process '%s' in dir '%s': %s" % (cmd, dir, e))
except UnicodeDecodeError as e:
raise AssertionException('Encoding error in process output: %s' % e)
except yaml.error.MarkedYAMLError as e:
raise AssertionException('Error parsing process YAML data: %s' % e)
else:
Expand All @@ -693,17 +702,20 @@ def load_from_yaml(cls, filename, path_keys):
cls.filename = os.path.split(cls.filepath)[1]
cls.dirpath = os.path.dirname(cls.filepath)
try:
with open(filename, 'r', 1) as input_file:
yml = yaml.load(input_file)
with open(filename, 'rb', 1) as input_file:
bytes = input_file.read()
yml = yaml.load(bytes.decode(cls.config_encoding, 'strict'))
except IOError as e:
# if a file operation error occurred while loading the
# configuration file, swallow up the exception and re-raise this
# configuration file, swallow up the exception and re-raise it
# as an configuration loader exception.
raise AssertionException('Error reading configuration file: %s' % e)
raise AssertionException("Error reading configuration file '%s': %s" % (cls.filepath, e))
except UnicodeDecodeError as e:
# as above, but in case of encoding errors
raise AssertionException("Encoding error in configuration file '%s: %s" % (cls.filepath, e))
except yaml.error.MarkedYAMLError as e:
# same as above, but indicate this problem has to do with
# parsing the configuration file.
raise AssertionException('Error parsing configuration file: %s' % e)
# as above, but in case of parse errors
raise AssertionException("Error parsing configuration file '%s': %s" % (cls.filepath, e))

# process the content of the dict
for path_key, options in path_keys.iteritems():
Expand Down
8 changes: 4 additions & 4 deletions user_sync/connector/directory_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(self, caller_options):
caller_config = user_sync.config.DictConfig('%s configuration' % self.name, caller_options)
builder = user_sync.config.OptionsBuilder(caller_config)
builder.set_string_value('delimiter', None)
builder.set_string_value('string_encoding', 'utf-8')
builder.set_string_value('first_name_column_name', 'firstname')
builder.set_string_value('last_name_column_name', 'lastname')
builder.set_string_value('email_column_name', 'email')
Expand All @@ -73,6 +74,8 @@ def __init__(self, caller_options):
logger.debug('%s initialized with options: %s', self.name, options)
caller_config.report_unused_values(logger)

# encoding of column values
self.encoding = options['string_encoding']
# identity type for new users if not specified in column
self.user_identity_type = user_sync.identity_type.parse_identity_type(options['user_identity_type'])

Expand Down Expand Up @@ -190,7 +193,4 @@ def get_column_value(self, row, column_name):
:type column_name: str
'''
value = row.get(column_name)
if (value == ''):
value = None
return value

return value.decode(self.encoding) if value else None
20 changes: 11 additions & 9 deletions user_sync/connector/directory_ldap.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import string

import keyring
import ldap.controls.libldap

import user_sync.config
Expand Down Expand Up @@ -64,6 +63,7 @@ def __init__(self, caller_options):
builder.set_string_value('group_filter_format', '(&(|(objectCategory=group)(objectClass=groupOfNames)(objectClass=posixGroup))(cn={group}))')
builder.set_string_value('all_users_filter', '(&(objectClass=user)(objectCategory=person)(!(userAccountControl:1.2.840.113556.1.4.803:=2)))')
builder.set_bool_value('require_tls_cert', False)
builder.set_string_value('string_encoding', 'utf-8')
builder.set_string_value('user_identity_type_format', None)
builder.set_string_value('user_email_format', '{mail}')
builder.set_string_value('user_username_format', None)
Expand All @@ -79,6 +79,7 @@ def __init__(self, caller_options):
self.logger = logger = user_sync.connector.helper.create_logger(options)
logger.debug('%s initialized with options: %s', self.name, options)

LDAPValueFormatter.encoding = options['string_encoding']
self.user_identity_type = user_sync.identity_type.parse_identity_type(options['user_identity_type'])
self.user_identity_type_formatter = LDAPValueFormatter(options['user_identity_type_format'])
self.user_email_formatter = LDAPValueFormatter(options['user_email_format'])
Expand Down Expand Up @@ -367,19 +368,20 @@ def iter_search_result(self, base_dn, scope, filter_string, attributes):
raise

class LDAPValueFormatter(object):
encoding = 'utf-8'

def __init__(self, string_format):
'''
:type string_format: str
'''
if (string_format == None):
'''
if (string_format == None):
attribute_names = []
else:
formatter = string.Formatter()
attribute_names = [item[1] for item in formatter.parse(string_format) if item[1]]

self.string_format = string_format
self.attribute_names = attribute_names

def get_attribute_names(self):
'''
:rtype list(str)
Expand All @@ -402,17 +404,17 @@ def generate_value(self, record):
break
values[attribute_name] = value
if values is not None:
result = self.string_format.format(**values)
result = self.string_format.format(**values).decode(self.encoding)
return (result, attribute_name)

@staticmethod
def get_attribute_value(attributes, attribute_name):
@classmethod
def get_attribute_value(cls, attributes, attribute_name):
'''
:type attributes: dict
:type attribute_name: str
'''
if attribute_name in attributes:
attribute_value = attributes[attribute_name]
if (len(attribute_value) > 0):
return attribute_value[0]
return attribute_value[0].decode(cls.encoding)
return None
2 changes: 1 addition & 1 deletion user_sync/connector/umapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, name, caller_options):
options['enterprise'] = enterprise_options = enterprise_builder.get_options()
self.options = options
self.logger = logger = helper.create_logger(options)
server_config.report_unused_values(logger)
if server_config: server_config.report_unused_values(logger)
logger.debug('UMAPI initialized with options: %s', options)

# set up the auth dict for umapi-client
Expand Down
6 changes: 4 additions & 2 deletions user_sync/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ def open_file(name, mode, buffering = -1):

def normalize_string(string_value):
'''
:type string_value: str
Normalize a unicode or regular string
:param string_value: either a unicode or regular string or None
:return: the same type that came in
'''
return string_value.strip().lower() if string_value != None else None
return string_value.strip().lower() if string_value is not None else None

def guess_delimiter_from_filename(filename):
'''
Expand Down
6 changes: 4 additions & 2 deletions user_sync/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def __init__(self, caller_options):
# in the secondary umapis (and exclude all that don't match). Finally,
# we keep track of user keys (in any umapi) that we have updated, so
# we can correctly report their count.
self.adobe_user_count = 0
self.included_user_keys = set()
self.excluded_user_count = 0
self.updated_user_keys = set()
Expand Down Expand Up @@ -172,7 +173,7 @@ def log_action_summary(self, umapi_connectors):
self.action_summary['directory_users_read'] = len(self.directory_user_by_user_key)
self.action_summary['directory_users_selected'] = len(self.filtered_directory_user_by_user_key)
# find the total number of adobe users and excluded users
self.action_summary['adobe_users_read'] = len(self.included_user_keys) + self.excluded_user_count
self.action_summary['adobe_users_read'] = self.adobe_user_count
self.action_summary['adobe_users_excluded'] = self.excluded_user_count
self.action_summary['adobe_users_updated'] = len(self.updated_user_keys)
# find out the number of users that have no changes; this depends on whether
Expand Down Expand Up @@ -752,6 +753,7 @@ def update_umapi_users_for_connector(self, umapi_info, umapi_connector):

def is_umapi_user_excluded(self, in_primary_org, user_key, current_groups):
if in_primary_org:
self.adobe_user_count += 1
# in the primary umapi, we actually check the exclusion conditions
identity_type, username, domain = self.parse_user_key(user_key)
if identity_type in self.exclude_identity_types:
Expand Down Expand Up @@ -886,7 +888,7 @@ def get_user_key(self, id_type, username, domain, email=None):
domain = ""
elif not domain:
return None
return id_type + ',' + username + ',' + domain
return unicode(id_type) + u',' + unicode(username) + u',' + unicode(domain)

def parse_user_key(self, user_key):
'''Returns the identity_type, username, and domain for the user.
Expand Down