Skip to content
This repository has been archived by the owner on Mar 1, 2018. It is now read-only.

Commit

Permalink
Reestructuring rosie based on python packages
Browse files Browse the repository at this point in the history
First steps towards making rosie more generic following
[this gist](https://gist.github.com/c98a91a9447b9404abd3b449daba50ec) and [this part](https://docs.python.org/3/tutorial/modules.html#packages) of python documentation.
Thanks to @jtemporal for the time the help to start it.
  • Loading branch information
anaschwendler committed Apr 12, 2017
1 parent 36a8751 commit 36fc8c8
Show file tree
Hide file tree
Showing 29 changed files with 40 additions and 40 deletions.
File renamed without changes.
18 changes: 9 additions & 9 deletions rosie/__init__.py → ceap/classifiers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
import numpy as np
from sklearn.externals import joblib

from rosie.dataset import Dataset
from rosie.election_expenses_classifier import ElectionExpensesClassifier
from rosie.invalid_cnpj_cpf_classifier import InvalidCnpjCpfClassifier
from rosie.meal_price_outlier_classifier import MealPriceOutlierClassifier
from rosie.monthly_subquota_limit_classifier import MonthlySubquotaLimitClassifier
from rosie.traveled_speeds_classifier import TraveledSpeedsClassifier
from rosie.irregular_companies_classifier import IrregularCompaniesClassifier
from ceap.classifiers.dataset import Dataset
from ceap.classifiers.election_expenses_classifier import ElectionExpensesClassifier
from ceap.classifiers.invalid_cnpj_cpf_classifier import InvalidCnpjCpfClassifier
from ceap.classifiers.meal_price_outlier_classifier import MealPriceOutlierClassifier
from ceap.classifiers.monthly_subquota_limit_classifier import MonthlySubquotaLimitClassifier
from ceap.classifiers.traveled_speeds_classifier import TraveledSpeedsClassifier
from ceap.classifiers.irregular_companies_classifier import IrregularCompaniesClassifier


class Rosie:
class Ceap:
CLASSIFIERS = {
MealPriceOutlierClassifier: 'meal_price_outlier',
MonthlySubquotaLimitClassifier: 'over_monthly_subquota_limit',
Expand Down Expand Up @@ -66,4 +66,4 @@ def predict(self, model, irregularity):

def main(target_directory='/tmp/serenata-data'):
dataset = Dataset(target_directory).get()
Rosie(dataset, target_directory).run_classifiers()
Ceap(dataset, target_directory).run_classifiers()
4 changes: 4 additions & 0 deletions ceap/classifiers/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ceap import main


main()
File renamed without changes.
Empty file added ceap/tests/__init__.py
Empty file.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cnpj_cpf,situation,situation_date,issue_date
02989654001197,ABERTA,2013-01-03,2013-01-30
02989654001197,ABERTA,201 3-01-03,2013-01-30
02989654001197,BAIXADA,2013-01-03,2013-01-30
02989654001197,NULA,2013-01-03,2013-01-30
02989654001197,INAPTA,2013-01-03,2013-01-30
Expand Down
File renamed without changes.
10 changes: 5 additions & 5 deletions tests/test_rosie.py → ceap/tests/test_ceap.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

import pandas as pd

from rosie import Rosie
from ceap.classifiers import Ceap


class TestRosie(TestCase):
class TestCeap(TestCase):

def setUp(self):
row = pd.Series({'applicant_id': 444,
Expand All @@ -20,14 +20,14 @@ def setUp(self):
'total_net_value': 178,
'year': 2016})
self.dataset = pd.DataFrame().append(row, ignore_index=True)
self.subject = Rosie(self.dataset, mkdtemp())
self.subject = Ceap(self.dataset, mkdtemp())

@patch('rosie.joblib')
@patch('ceap.classifiers.joblib')
def test_load_trained_model_trains_model_when_not_persisted(self, _):
model = self.subject.load_trained_model(MagicMock)
model.fit.assert_called_once_with(self.dataset)

@patch('rosie.joblib')
@patch('ceap.classifiers.joblib')
def test_load_trained_model_doesnt_train_model_when_already_persisted(self, _):
Path(os.path.join(self.subject.data_path, 'magicmock.pkl')).touch()
model = self.subject.load_trained_model(MagicMock)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_dataset.py → ceap/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@
from unittest.mock import patch
from shutil import copy2

from rosie import Dataset
from ceap.classifiers import Dataset


class TestDataset(TestCase):

def setUp(self):
temp_path = mkdtemp()
copy2('tests/fixtures/companies.xz',
copy2('ceap/tests/fixtures/companies.xz',
os.path.join(temp_path, Dataset.COMPANIES_DATASET))
copy2('tests/fixtures/reimbursements.xz', temp_path)
copy2('ceap/tests/fixtures/reimbursements.xz', temp_path)
self.subject = Dataset(temp_path)

@patch('rosie.dataset.CEAPDataset')
@patch('rosie.dataset.fetch')
@patch('ceap.classifiers.dataset.CEAPDataset')
@patch('ceap.classifiers.dataset.fetch')
def test_get_performs_a_left_merge_between_reimbursements_and_companies(self, _ceap_dataset, _fetch):
dataset = self.subject.get()
self.assertEqual(5, len(dataset))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import numpy as np
import pandas as pd

from rosie.election_expenses_classifier import ElectionExpensesClassifier
from ceap.classifiers.election_expenses_classifier import ElectionExpensesClassifier


class TestElectionExpensesClassifier(TestCase):

def setUp(self):
self.dataset = pd.read_csv('tests/fixtures/election_expenses_classifier.csv',
self.dataset = pd.read_csv('ceap/tests/fixtures/election_expenses_classifier.csv',
dtype={'name': np.str, 'legal_entity': np.str})
self.subject = ElectionExpensesClassifier()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import numpy as np
import pandas as pd

from rosie.invalid_cnpj_cpf_classifier import InvalidCnpjCpfClassifier
from ceap.classifiers.invalid_cnpj_cpf_classifier import InvalidCnpjCpfClassifier


class TestInvalidCnpjCpfClassifier(TestCase):

def setUp(self):
self.dataset = pd.read_csv('tests/fixtures/invalid_cnpj_cpf_classifier.csv',
self.dataset = pd.read_csv('ceap/tests/fixtures/invalid_cnpj_cpf_classifier.csv',
dtype={'cnpj_cpf': np.str})
self.subject = InvalidCnpjCpfClassifier()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import numpy as np
import pandas as pd

from rosie.irregular_companies_classifier import IrregularCompaniesClassifier
from ceap.classifiers.irregular_companies_classifier import IrregularCompaniesClassifier


class TestIrregularCompaniesClassifier(TestCase):

def setUp(self):
self.dataset = pd.read_csv('tests/fixtures/irregular_companies_classifier.csv',
self.dataset = pd.read_csv('ceap/tests/fixtures/irregular_companies_classifier.csv',
dtype={'cnpj': np.str})
self.subject = IrregularCompaniesClassifier()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
import pandas as pd
from numpy.testing import assert_array_equal

from rosie.meal_price_outlier_classifier import MealPriceOutlierClassifier
from ceap.classifiers.meal_price_outlier_classifier import MealPriceOutlierClassifier


class TestMealPriceOutlierClassifier(TestCase):

def setUp(self):
self.dataset = pd.read_csv('tests/fixtures/meal_price_outlier_classifier.csv',
self.dataset = pd.read_csv('ceap/tests/fixtures/meal_price_outlier_classifier.csv',
dtype={'cnpj_cpf': np.str})
self.subject = MealPriceOutlierClassifier()
self.subject.fit(self.dataset)

@patch('rosie.meal_price_outlier_classifier.KMeans')
@patch('ceap.classifiers.meal_price_outlier_classifier.KMeans')
def test_predict_returns_a_prediction_for_each_observation(self, kmeans_mock):
kmeans_mock.return_value.predict.return_value = np.ones(3)
self.subject.fit(self.dataset)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
import pandas as pd
from numpy.testing import assert_array_equal

from rosie.monthly_subquota_limit_classifier import MonthlySubquotaLimitClassifier
from ceap.classifiers.monthly_subquota_limit_classifier import MonthlySubquotaLimitClassifier


class TestMonthlySubquotaLimitClassifier(TestCase):

def setUp(self):
self.dataset = pd.read_csv('tests/fixtures/monthly_subquota_limit_classifier.csv',
self.dataset = pd.read_csv('ceap/tests/fixtures/monthly_subquota_limit_classifier.csv',
dtype={'subquota_number': np.str})
self.subject = MonthlySubquotaLimitClassifier()
self.subject.fit_transform(self.dataset)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import sklearn
from numpy.testing import assert_array_equal

from rosie.traveled_speeds_classifier import TraveledSpeedsClassifier
from ceap.classifiers.traveled_speeds_classifier import TraveledSpeedsClassifier


class TestTraveledSpeedsClassifier(TestCase):

def setUp(self):
self.dataset = pd.read_csv('tests/fixtures/traveled_speeds_classifier.csv',
self.dataset = pd.read_csv('ceap/tests/fixtures/traveled_speeds_classifier.csv',
dtype={'cnpj_cpf': np.str})
self.subject = TraveledSpeedsClassifier()
self.subject.fit(self.dataset)
Expand Down
6 changes: 3 additions & 3 deletions rosie.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def help():


def run():
import rosie
from ceap import classifiers
target_directory = argv[2] if len(argv) >= 3 else '/tmp/serenata-data/'
rosie.main(target_directory)
classifiers.main(target_directory)


def test():
import unittest
loader = unittest.TestLoader()
tests = loader.discover('tests')
tests = loader.discover('ceap/tests')
testRunner = unittest.runner.TextTestRunner()
testRunner.run(tests)

Expand Down
4 changes: 0 additions & 4 deletions rosie/__main__.py

This file was deleted.

0 comments on commit 36fc8c8

Please sign in to comment.