Skip to content

Commit

Permalink
Merge branch 'email-validation-glab' of 'https://github.com/sduenas/s…
Browse files Browse the repository at this point in the history
…ortinghat'

Merges #111
Closes #111
Fixes #107
  • Loading branch information
sduenas committed Dec 20, 2017
2 parents bdfbc49 + 561ed42 commit bdad309
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 8 deletions.
12 changes: 9 additions & 3 deletions misc/grimoirelab2sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ def main():
args = parse_args()

try:
email_validation = not args.no_email_validation
parser = parse_grimoirelab_file(args.identities, args.organizations,
args.source)
args.source, email_validation)
except (IOError, UnicodeDecodeError, InvalidFormatError) as e:
raise RuntimeError(str(e))

Expand Down Expand Up @@ -71,6 +72,9 @@ def parse_args():
parser.add_argument('-o', '--outfile', nargs='?', type=argparse.FileType('w'),
default=sys.stdout,
help='Sorting Hat JSON output filename')
parser.add_argument('--no-email-validation', dest='no_email_validation',
action='store_true',
help="do not email addresses validation")

args = parser.parse_args()

Expand All @@ -80,14 +84,16 @@ def parse_args():
return args


def parse_grimoirelab_file(identities, organizations, source):
def parse_grimoirelab_file(identities, organizations, source, email_validation):
"""Parse GrimoireLab JSON file"""

content_id = read_file(identities) if identities else None
content_org = read_file(organizations) if organizations else None

try:
parser = GrimoireLabParser(content_id, content_org, source=source)
parser = GrimoireLabParser(content_id, content_org,
source=source,
email_validation=email_validation)
except ValueError:
s = "Error: Empty input file(s)\n"
sys.stdout.write(s)
Expand Down
14 changes: 9 additions & 5 deletions sortinghat/parsing/grimoirelab.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,30 @@ class GrimoireLabParser(object):
The unique identities are stored in an object named 'uidentities'.
The keys of this object are the UUID of the unique identities.
Each unique identity object stores a list of identities and
enrollments.
enrollments. Email addresses will not be validated when `email_validation`
is set to `False`.
Organizations are stored in 'organizations' object. Its keys
are the name of the organizations and each organization object is
related to a list of domains.
:param stream: stream to parse
:param identities: stream of identities to parse
:param organizations: stream of organizations to parse
:param source: source of the data
:param email_validation: validate email addresses; set to True by default
:raises InvalidFormatError: raised when the format of the stream is
not valid.
"""

EMAIL_ADDRESS_REGEX = r"^(?P<email>[^\s@]+@[^\s@.]+\.[^\s@]+)$"
GRIMOIRELAB_INVALID_FORMAT = "invalid GrimoireLab yaml format. %(error)s"

def __init__(self, identities=None, organizations=None,
source='grimoirelab'):
source='grimoirelab', email_validation=True):
self._identities = {}
self._organizations = {}
self.source = source
self.email_validation = email_validation

if not (identities or organizations):
raise ValueError('Null identities and organization streams')
Expand Down Expand Up @@ -155,7 +159,7 @@ def __create_sh_identities(name, emails, yaml_entry):

emails = yid.get('email', None)

if emails:
if emails and self.email_validation:
self.__validate_email(emails[0])

enrollments = yid.get('enrollments', None)
Expand Down
31 changes: 31 additions & 0 deletions tests/test_parser_grimoirelab.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,37 @@ def test_enrollments_parser(self):
# Unknown organization is ignored during the parsing process
self.assertEqual(len(uid.enrollments), 0)

def test_email_validation(self):
"""Check wheter it raises an error on invalid email addresses"""

stream_ids = self.read_file('data/grimoirelab_invalid_email.yml')

with self.assertRaisesRegexp(InvalidFormatError, '^.+Invalid email address: lcanas__at__bitergia.com$'):
GrimoireLabParser(stream_ids, email_validation=True)

def test_supress_email_validation(self):
"""Check wheter it ignores invalid email addresses"""

stream_ids = self.read_file('data/grimoirelab_invalid_email.yml')
parser = GrimoireLabParser(stream_ids, email_validation=False)

uids = parser.identities
self.assertEqual(len(uids), 3)

uid = uids[1]
self.assertIsInstance(uid, UniqueIdentity)
self.assertFalse(uid.profile.is_bot)
self.assertEqual(uid.profile.name, 'Luis Cañas-Díaz')

# This identity has an invalid email address
id1 = uid.identities[1]
self.assertIsInstance(id1, Identity)
self.assertEqual(id1.name, None)
self.assertEqual(id1.email, 'lcanas__at__bitergia.com')
self.assertEqual(id1.username, None)
self.assertEqual(id1.source, 'grimoirelab')
self.assertEqual(id1.uuid, None)

def test_not_valid_organizations_stream(self):
"""Check whether it parses invalid organizations files"""

Expand Down

0 comments on commit bdad309

Please sign in to comment.