Skip to content
This repository has been archived by the owner on Mar 1, 2018. It is now read-only.

Commit

Permalink
Document existing classifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
Irio committed May 1, 2017
1 parent ed5d850 commit 7a513cc
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@


class ElectionExpensesClassifier(TransformerMixin):
"""
Election Expenses classifier.
Check a `legal_entity` field for the presency of the political candidacy
category in the Brazilian Federal Revenue.
Dataset
-------
legal_entity : string column
Brazilian Federal Revenue category of companies, preceded by its code.
"""

def fit(self, X):
return self
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,24 @@


class IrregularCompaniesClassifier(TransformerMixin):
"""
Irregular Companies classifier.
Check for the official state of the company in the
Brazilian Federal Revenue and reports for rows with companies unauthorized
to sell products or services.
Dataset
-------
issue_date : datetime column
Date when the expense was made.
situation : string column
Situation of the company according to the Brazilian Federal Revenue.
situation_date : datetime column
Date when the situation was last updated.
"""

def fit(self, X):
return self
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,20 @@


class MealPriceOutlierClassifier(TransformerMixin):
"""
Meal Price Outlier classifier.
Dataset
-------
applicant_id : string column
A personal identifier code for every person making expenses.
net_value : float column
The value of the expense.
recipient_id : string column
A CNPJ (Brazilian company ID) or CPF (Brazilian personal tax ID).
"""

HOTEL_REGEX = r'hote(?:(?:ls?)|is)'
CLUSTER_KEYS = ['mean', 'std']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@


class MonthlySubquotaLimitClassifier(TransformerMixin):
"""
Monthly Subquota Limit classifier.
Dataset
-------
issue_date : datetime column
Date when the expense was made.
net_value : float column
The value of the expense.
"""

KEYS = ['applicant_id', 'month', 'year']

def fit(self, X):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,31 @@


class TraveledSpeedsClassifier(TransformerMixin):
"""
Traveled Speeds classifier.
Dataset
-------
applicant_id : category column
A personal identifier code for every person making expenses.
category : category column
Category of the expense. The model will be applied just in rows where
the value is equal to "Meal".
is_party_expense : bool column
If the row corresponds to a party expense or not. The model will be
applied just in rows where the value is equal to `False`.
issue_date : datetime column
Date when the expense was made.
latitude : float column
Latitude of the place where the expense was made.
longitude : float column
Longitude of the place where the expense was made.
"""

AGG_KEYS = ['applicant_id', 'issue_date']

Expand Down
17 changes: 16 additions & 1 deletion rosie/core/classifiers/invalid_cnpj_cpf_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@


class InvalidCnpjCpfClassifier(TransformerMixin):
"""
Invalid CNPJ/CPF classifier.
Validate a `recipient_id` field by calculating its expected check digit
and verifying the authenticity of the provided ones.
Dataset
-------
document_type : category column
Validate rows with values 'bill_of_sale' or 'simple_receipt'.
recipient_id : string column
A CNPJ (Brazilian company ID) or CPF (Brazilian personal tax ID).
"""

def fit(self, X):
return self
Expand All @@ -17,4 +31,5 @@ def predict(self, X):
return np.r_[X.apply(self.__is_invalid, axis=1)]

def __is_invalid(self, row):
return (row['document_type'] in ['bill_of_sale', 'simple_receipt']) & (not cpfcnpj.validate(str(row['recipient_id'])))
return (row['document_type'] in ['bill_of_sale', 'simple_receipt']) \
& (not cpfcnpj.validate(str(row['recipient_id'])))

0 comments on commit 7a513cc

Please sign in to comment.