Skip to content

Commit

Permalink
Added parameters folder
Browse files Browse the repository at this point in the history
  • Loading branch information
crawles committed Sep 8, 2017
1 parent d717d91 commit 95cf8d8
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 510 deletions.
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,24 @@ Train a pipeline:
train_url = 'http://0.0.0.0:8080/train_pipeline'
train_files = {'raw_data': open('data/data_train.json', 'rb'),
'labels' : open('data/label_train.json', 'rb'),
'params' : open('train_parameters.yml', 'rb')}
'params' : open('parameters/train_parameters.yml', 'rb')}

# post request to train pipeline
r_train = requests.post(train_url, files=train_files)
result_df = json.loads(r_train.json())
```

```python
{'featureEngParams': {'default_fc_parameters': "['median', 'minimum', 'standard_deviation', 'sum_values', 'variance', 'maximum', 'length', 'mean']",
{'featureEngParams': {'default_fc_parameters': "['median', 'minimum', 'standard_deviation',
'sum_values', 'variance', 'maximum',
'length', 'mean']",
'impute_function': 'impute',
...},
'mean_cv_accuracy': 0.865,
'mean_cv_roc_auc': 0.932,
'modelId': 1,
'modelType': "RandomForestClassifier(...)",
'modelType': "Pipeline(steps=[('stackingestimator', StackingEstimator(estimator=LinearSVC(...))),
('logisticregression', LogisticRegressionClassifier(solver='liblinear',...))])"
'trainShape': [1647, 8],
'trainTime': 1.953}
```
Expand All @@ -78,7 +81,7 @@ Serve pipeline predictions:
```python
serve_url = 'http://0.0.0.0:8080/serve_prediction'
test_files = {'raw_data': open('data/data_test.json', 'rb'),
'params' : open('test_parameters_model2.yml', 'rb')}
'params' : open('parameters/test_parameters_model2.yml', 'rb')}

# post request to serve predictions from trained pipeline
r_test = requests.post(serve_url, files=test_files)
Expand Down
31 changes: 0 additions & 31 deletions automl_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,37 +36,6 @@
api.add_resource(resources.Models, '/models',
resource_class_kwargs={'model_factory': model_factory})



#@app.route('/train_pipeline', methods=['POST'])
#def train_api():
#
# df = read_file(request, 'raw_data')
# params = read_params(request, 'params')
# X_train = build_features(df, params)
# y_train = read_file(request, 'labels')
# y_train = y_train.set_index('example_id')
# y_train = y_train.loc[X_train.index]
#
# cl = train_model(X_train, y_train.label, params)
# classifier.cl = cl
# print sklearn.metrics.roc_auc_score(y_train.label,
# cl.predict_proba(X_train)[:,1])
# return str(cl)


@app.route('/serve_pred', methods=['POST'])
def serve_api():
df = read_file(request, 'raw_data')
params = read_params(request, 'params')
X = build_features(df, params)
scores = classifier.cl.predict_proba(X)[:,1]
result = pd.DataFrame(scores,
columns=['score'],
index=X.index)
return result.to_json()


# note: not used if using gunicorn
if __name__ == "__main__":
if os.environ.get('VCAP_SERVICES') is None: # running locally
Expand Down
543 changes: 92 additions & 451 deletions modelling_and_usage.ipynb

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
pipeline_id: 2
model_training:
model_args:
generations: 20
population_size: 10
generations: 300
population_size: 50
verbosity: 2
max_time_mins: 5
max_time_mins: 80
model: tpot.TPOTClassifier
extract_features:
column_id: example_id
Expand Down
25 changes: 4 additions & 21 deletions test_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import pprint

import numpy as np
import pandas as pd
Expand All @@ -8,36 +7,29 @@

from sklearn.metrics import roc_auc_score

#if os.environ.get('VCAP_SERVICES') is None: # running locally
# host = "http://0.0.0.0:8080"
#else: # running on CF
# host = "TBD"

def test_train_model_1(host):
'''Train sklearn model'''
url = os.path.join(host, 'train_pipeline')
train_files = {'raw_data': open('data/data_train.json', 'rb'),
'labels' : open('data/label_train.json', 'rb'),
'params' : open('train_parameters.yml', 'rb')}
'params' : open('parameters/train_parameters.yml', 'rb')}
r = requests.post(url,
files=train_files)
pprint.pprint(r.json())

def test_train_model_2(host):
'''Train TPOT model'''
url = os.path.join(host, 'train_pipeline')
train_files = {'raw_data': open('data/data_train.json', 'rb'),
'labels' : open('data/label_train.json', 'rb'),
'params' : open('train_parameters_model2.yml', 'rb')}
'params' : open('parameters/train_parameters_model2.yml', 'rb')}
r = requests.post(url,
files=train_files)
pprint.pprint(r.json())


def test_serve_model(host):
serve_url = os.path.join(host, 'serve_prediction')
test_files = {'raw_data': open('data/data_test.json', 'rb'),
'params' : open('test_parameters.yml', 'rb')}
'params' : open('parameters/test_parameters.yml', 'rb')}
r = requests.post(serve_url, files=test_files)

# parse result
Expand All @@ -52,7 +44,7 @@ def test_serve_model(host):
def test_serve_model_2(host):
serve_url = os.path.join(host, 'serve_prediction')
test_files = {'raw_data': open('data/data_test.json', 'rb'),
'params' : open('test_parameters_model2.yml', 'rb')}
'params' : open('parameters/test_parameters_model2.yml', 'rb')}
r = requests.post(serve_url, files=test_files)

# parse result
Expand All @@ -64,18 +56,9 @@ def test_serve_model_2(host):
print "Test AUC: {}".format(auc)
assert (auc > 0.9)

##TODO: change everything to pipeline
#def test_get_a_model():
# '''Get a model'''
# url = os.path.join(host, 'model')
# r = requests.get(url, data={'pipeline_id': 1})
# pprint.pprint(r.json())
# assert r.status_code == 200

def test_get_models(host):
'''Show all available models'''
url = os.path.join(host, 'models')
r = requests.get(url)
assert r.status_code == 200
pprint.pprint(r.json())

0 comments on commit 95cf8d8

Please sign in to comment.