Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add an extractor for pulling user information from BambooHR #369

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 23 additions & 0 deletions README.md
Expand Up @@ -852,6 +852,29 @@ job = DefaultJob(conf=job_config,
job.launch()
```

### [BamboohrUserExtractor](./databuilder/extractor/user/bamboohr/bamboohr_user_extractor.py)

The included `BamboohrUserExtractor` provides support for extracting basic user metadata from [BambooHR](https://www.bamboohr.com/). For companies and organizations that use BambooHR to store employee information such as email addresses, first names, last names, titles, and departments, use the `BamboohrUserExtractor` to populate Amundsen user data.

A sample job config is shown below.

```python
extractor = BamboohrUserExtractor()
task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader())

job_config = ConfigFactory.from_dict({
'extractor.bamboohr_user.api_key': api_key,
'extractor.bamboohr_user.subdomain': subdomain,
})

job = DefaultJob(conf=job_config,
task=task,
publisher=Neo4jCsvPublisher())
job.launch()
```



## List of transformers
#### [ChainedTransformer](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/transformer/base_transformer.py#L41 "ChainedTransformer")
A chanined transformer that can take multiple transformer.
Expand Down
2 changes: 2 additions & 0 deletions databuilder/extractor/user/__init__.py
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
2 changes: 2 additions & 0 deletions databuilder/extractor/user/bamboohr/__init__.py
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
65 changes: 65 additions & 0 deletions databuilder/extractor/user/bamboohr/bamboohr_user_extractor.py
@@ -0,0 +1,65 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0


from pyhocon import ConfigTree
import requests
from requests.auth import HTTPBasicAuth
from typing import Iterator, Optional
from xml.etree import ElementTree

from databuilder.extractor.base_extractor import Extractor
from databuilder.models.user import User


class BamboohrUserExtractor(Extractor):
API_KEY = 'api_key'
SUBDOMAIN = 'subdomain'

def init(self, conf: ConfigTree) -> None:
self._extract_iter: Optional[Iterator] = None
self._extract_iter = None

self._api_key = conf.get_string(BamboohrUserExtractor.API_KEY)
self._subdomain = conf.get_string(BamboohrUserExtractor.SUBDOMAIN)

def extract(self) -> Optional[User]:
if not self._extract_iter:
self._extract_iter = self._get_extract_iter()
try:
return next(self._extract_iter)
except StopIteration:
return None

def _employee_directory_uri(self) -> str:
return 'https://api.bamboohr.com/api/gateway.php/{subdomain}/v1/employees/directory'.format(
subdomain=self._subdomain
)

def _get_extract_iter(self) -> Iterator[User]:
response = requests.get(
self._employee_directory_uri(), auth=HTTPBasicAuth(self._api_key, 'x')
)

root = ElementTree.fromstring(response.content)

for user in root.findall('./employees/employee'):

def get_field(name: str) -> str:
field = user.find('./field[@id=\'{name}\']'.format(name=name))
if field is not None and field.text is not None:
return field.text
else:
return ''

yield User(
email=get_field('workEmail'),
first_name=get_field('firstName'),
last_name=get_field('lastName'),
name=get_field('displayName'),
team_name=get_field('department'),
role_name=get_field('jobTitle'),
)

def get_scope(self) -> str:
return 'extractor.bamboohr_user'
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -59,3 +59,4 @@ httplib2>=0.18.0
unidecode

requests==2.23.0,<3.0
responses==0.10.6
2 changes: 2 additions & 0 deletions tests/unit/extractor/user/__init__.py
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
2 changes: 2 additions & 0 deletions tests/unit/extractor/user/bamboohr/__init__.py
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,46 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import io
import unittest

import os

import responses
from pyhocon import ConfigFactory

from databuilder.models.user import User
from databuilder.extractor.user.bamboohr.bamboohr_user_extractor import BamboohrUserExtractor


class TestBamboohrUserExtractor(unittest.TestCase):
@responses.activate
def test_parse_testdata(self) -> None:
bhr = BamboohrUserExtractor()
bhr.init(ConfigFactory.from_dict({'api_key': 'api_key', 'subdomain': 'amundsen'}))

testdata_xml = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'../../../resources/extractor/user/bamboohr/testdata.xml'
)

with io.open(testdata_xml) as testdata:
responses.add(responses.GET, bhr._employee_directory_uri(), body=testdata.read())

expected = User(
email='roald@amundsen.io',
first_name='Roald',
last_name='Amundsen',
name='Roald Amundsen',
team_name='508 Corporate Marketing',
role_name='Antarctic Explorer',
)

actual_users = list(bhr._get_extract_iter())

self.assertEqual(1, len(actual_users))
self.assertEqual(repr(expected), repr(actual_users[0]))


if __name__ == '__main__':
unittest.main()
39 changes: 39 additions & 0 deletions tests/unit/resources/extractor/user/bamboohr/testdata.xml
@@ -0,0 +1,39 @@
<?xml version="1.0"?>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting, haven't seen xml for a while :)

<directory>
<fieldset>
<field id="displayName">Display name</field>
<field id="firstName">First name</field>
<field id="lastName">Last name</field>
<field id="preferredName">Preferred name</field>
<field id="gender">Gender</field>
<field id="jobTitle">Job title</field>
<field id="workPhone">Work Phone</field>
<field id="mobilePhone">Mobile Phone</field>
<field id="workEmail">Work Email</field>
<field id="department">Department</field>
<field id="location">Location</field>
<field id="workPhoneExtension">Work Ext.</field>
<field id="photoUploaded">Employee photo</field>
<field id="photoUrl">Photo URL</field>
<field id="canUploadPhoto">Can Upload Photo</field>
</fieldset>
<employees>
<employee id="1082">
<field id="displayName">Roald Amundsen</field>
<field id="firstName">Roald</field>
<field id="lastName">Amundsen</field>
<field id="preferredName"></field>
<field id="gender">Male</field>
<field id="jobTitle">Antarctic Explorer</field>
<field id="workPhone"></field>
<field id="mobilePhone"></field>
<field id="workEmail">roald@amundsen.io</field>
<field id="department">508 Corporate Marketing</field>
<field id="location">Norway</field>
<field id="workPhoneExtension"></field>
<field id="photoUploaded">true</field>
<field id="photoUrl">https://upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Amundsen_in_fur_skins.jpg/440px-Amundsen_in_fur_skins.jpg</field>
<field id="canUploadPhoto">no</field>
</employee>
</employees>
</directory>