Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cross Validation Added #407

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,4 @@ docs/_build

# License copied to conda build_dir
pkg/conda/LICENSE
env/*
18 changes: 15 additions & 3 deletions gramex/handlers/mlhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from tornado.gen import coroutine
from tornado.web import HTTPError
from sklearn.metrics import get_scorer
from sklearn.model_selection import cross_val_predict, cross_val_score

op = os.path
MLCLASS_MODULES = [
Expand All @@ -40,7 +41,8 @@
'pipeline': True,
'nums': [],
'cats': [],
'target_col': None
'target_col': None,
'cv': True,
}
ACTIONS = ['predict', 'score', 'append', 'train', 'retrain']
DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html')
Expand Down Expand Up @@ -112,14 +114,23 @@ def setup(cls, data=None, model={}, config_dir='', **kwargs):
data = cls._filtercols(data)
data = cls._filterrows(data)
cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params)

# train the model
target = data[target_col]
train = data[[c for c in data if c != target_col]]
# cross validation
cls.cross_validation(train,target)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not required here.

gramex.service.threadpool.submit(
_fit, cls.model, train, target, cls.model_path, cls.name)
cls.config_store.flush()


@classmethod
def cross_validation(cls,train,target):
cv = cls.get_opt('cv',True)
if cv:
CVscore = cross_val_score(cls.model.steps[-1][1], X=train, y=target, cv=cv)
CVavg = sum(CVscore)/len(CVscore)
print('Cross Validation Score : ',CVavg)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CV should take place within the train method only.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if cv:
    cvscore = cross_val_score(mod, X=train, y=target, cv=cv)
else:
   # Do the usual .fit


@classmethod
def load_data(cls, default=pd.DataFrame()):
try:
Expand Down Expand Up @@ -351,6 +362,7 @@ def _train(self, data=None):
target = data[target_col]
train = data[[c for c in data if c != target_col]]
self.model = self._assemble_pipeline(data, force=True)
self.cross_validation(train,target)
_fit(self.model, train, target, self.model_path)
return {'score': self.model.score(train, target)}

Expand Down