Skip to content
This repository has been archived by the owner on Mar 1, 2018. It is now read-only.

Commit

Permalink
Ensure combining datasets return the same number of rows as reimburse…
Browse files Browse the repository at this point in the history
…ments
  • Loading branch information
Irio committed Dec 23, 2016
1 parent 0729392 commit 02c2053
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 1 deletion.
3 changes: 2 additions & 1 deletion rosie/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def get(self):
reimbursements = self.get_reimbursements()
companies = self.get_companies()
return pd.merge(reimbursements, companies,
how='left',
left_on='cnpj_cpf',
right_on='cnpj')

Expand Down Expand Up @@ -45,7 +46,7 @@ def get_companies(self):
is_in_brazil = ('(-73.992222 < longitude < -34.7916667) & '
'(-33.742222 < latitude < 5.2722222)')
dataset = pd.read_csv(os.path.join(self.path, self.COMPANIES_DATASET),
dtype={'cnpj_cpf': np.str},
dtype={'cnpj': np.str},
low_memory=False)
dataset = dataset.query(is_in_brazil)
dataset['cnpj'] = dataset['cnpj'].str.replace(r'\D', '')
Expand Down
Binary file added tests/fixtures/companies.xz
Binary file not shown.
Binary file added tests/fixtures/reimbursements.xz
Binary file not shown.
24 changes: 24 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os.path
from tempfile import mkdtemp
from unittest import TestCase
from unittest.mock import patch
from shutil import copy2

from rosie import Dataset


class TestDataset(TestCase):

def setUp(self):
temp_path = mkdtemp()
copy2('tests/fixtures/companies.xz',
os.path.join(temp_path, Dataset.COMPANIES_DATASET))
copy2('tests/fixtures/reimbursements.xz', temp_path)
self.subject = Dataset(temp_path)

@patch('rosie.dataset.CEAPDataset')
@patch('rosie.dataset.fetch')
def test_get_performs_a_left_merge_between_reimbursements_and_companies(self, _ceap_dataset, _fetch):
dataset = self.subject.get()
self.assertEqual(5, len(dataset))
self.assertEqual(1, dataset['legal_entity'].isnull().sum())

0 comments on commit 02c2053

Please sign in to comment.