Skip to content
This repository has been archived by the owner on Mar 1, 2018. It is now read-only.

Commit

Permalink
Merge branch 'master' into irio-integration-test-on-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
jtemporal committed Jun 19, 2017
2 parents 2096a03 + d10d702 commit ec5f40a
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ class MealPriceOutlierClassifier(TransformerMixin):

HOTEL_REGEX = r'hote(?:(?:ls?)|is)'
CLUSTER_KEYS = ['mean', 'std']
COLS = ['applicant_id',
'category',
'net_value',
'recipient',
'recipient_id']

def fit(self, X):
_X = X[self.__applicable_rows(X)]
Expand All @@ -49,7 +54,7 @@ def transform(self, X=None):
pass

def predict(self, X):
_X = X.copy()
_X = X[self.COLS].copy()
companies = _X[self.__applicable_rows(_X)] \
.groupby('recipient_id').apply(self.__company_stats) \
.reset_index()
Expand All @@ -68,7 +73,7 @@ def predict(self, X):
_X = pd.merge(_X, known_thresholds, how='left')
if 'cnpj_threshold' in _X.columns:
_X.loc[_X['cnpj_threshold'].notnull(),
'threshold'] = _X['cnpj_threshold']
'threshold'] = _X['cnpj_threshold']
_X['y'] = 1
is_outlier = self.__applicable_rows(_X) & \
_X['threshold'].notnull() & \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,30 @@ class MonthlySubquotaLimitClassifier(TransformerMixin):
issue_date : datetime column
Date when the expense was made.
month : int column
The quota month matching the expense request.
net_value : float column
The value of the expense.
subquota_number : category column
A number to classify a category of expenses.
year : int column
The quota year matching the expense request.
"""

KEYS = ['applicant_id', 'month', 'year']
COLS = ['applicant_id',
'issue_date',
'month',
'net_value',
'subquota_number',
'year']

def fit(self, X):
self.X = X
self._X = self.X.copy()
self._X = self.X[self.COLS].copy()
self.__create_columns()
return self

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ class TraveledSpeedsClassifier(TransformerMixin):
"""

AGG_KEYS = ['applicant_id', 'issue_date']
COLS = ['applicant_id',
'category',
'is_party_expense',
'issue_date',
'latitude',
'longitude']

def __init__(self, contamination=.001):
if contamination in [0, 1]:
Expand All @@ -57,7 +63,7 @@ def transform(self, X=None):
def predict(self, X):
check_is_fitted(self, ['polynomial', '_polynomial_fn'])

_X = X.copy()
_X = X[self.COLS].copy()
_X = self.__aggregate_dataset(_X)
_X = self.__classify_dataset(_X)
_X = pd.merge(X, _X, how='left', left_on=self.AGG_KEYS, right_on=self.AGG_KEYS)
Expand Down

0 comments on commit ec5f40a

Please sign in to comment.