In [2]:
import numpy as np
import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import pandas as pd
import pylab as plb
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

In [3]:
df = pd.read_csv('https://s3.amazonaws.com/aml-sample-data/banking.csv', header=0)

In [4]:
df.rename(columns={'y':'willRespond'},inplace=True)

In [5]:
df.drop(['duration','contact','month','day_of_week','default','pdays',],axis=1,inplace=True) #Removing unnecessary columns

In [6]:
le=preprocessing.LabelEncoder()

In [7]:
le.fit(df.poutcome)

LabelEncoder()

In [8]:
list(le.classes_)

['failure', 'nonexistent', 'success']

In [9]:
df.poutcome=le.transform(df.poutcome)

In [10]:
df.housing=le.fit_transform(df.housing)

In [11]:
df.loan=le.fit_transform(df.loan)

In [12]:
df.job=le.fit_transform(df.job)

In [13]:
df.marital=le.fit_transform(df.marital)

In [14]:
df.education=le.fit_transform(df.education)

In [15]:
list(le.classes_)

['basic.4y',
 'basic.6y',
 'basic.9y',
 'high.school',
 'illiterate',
 'professional.course',
 'university.degree',
 'unknown']

# Splitting and Creating a Logistic Regression Model

In [16]:
X = df.drop('willRespond',axis=1)
y = df.willRespond

In [17]:
smallX = X.loc[:,['age','job','marital','education','housing','loan','campaign']]
X = smallX

In [18]:
x_train, x_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3, random_state=0,stratify=y) #70/30

In [19]:
x_train.loc[:,'target'] = y_train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [23]:
pos = x_train[x_train.target == 1]
neg = x_train[x_train.target == 0]


In [24]:
smp_size = int(pos.shape[0] * 0.9)

In [25]:
#model training
models = []
for i in range(30):
    pos_smp = pos.sample(n=smp_size)
    neg_smp = neg.sample(n=smp_size)
    full_smp = pos_smp.append(neg_smp)
    x_smp = full_smp.drop('target',axis=1)
    y_smp = full_smp.target
    model=LogisticRegression(penalty='l2', max_iter=5)
    model.fit(x_smp,y_smp)
    models.append(model)

In [26]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline, Pipeline

In [27]:
def iterateModels(data):
    scores = []
    for mod in models:
        scores.append(mod.predict_proba(data)[0][1])
    return np.mean(scores)

In [28]:
single = x_test.iloc[0,:].values

In [29]:
single.shape

(7,)

In [30]:
s = single.reshape(1,7)

In [31]:
models[0].predict_proba(s)[0][1]

0.47062322959973835

In [32]:
s

array([[33,  0,  1,  6,  0,  0,  3]])

In [33]:
iterateModels(s)

0.46878736144926364

In [34]:
scores = np.zeros([x_test.shape[0],30])

In [35]:
for i, model in enumerate(models):
    scores[:,i] = model.predict_proba(x_test)[:,1]

In [36]:
final_scores = scores.mean(axis=1)

In [37]:
x_test.iloc[0,:].values

array([33,  0,  1,  6,  0,  0,  3])

In [38]:
final_scores[0]

0.46878736144926364

In [39]:
y_pred = pd.Series(final_scores).map(lambda x: 1 if x > 0.6 else 0)

In [40]:
np.mean(y_pred==y_test)

0.84891154811038283

In [43]:
from sklearn.metrics import classification_report, confusion_matrix

In [45]:
#print("Evaluation report: \n\n%s" % classification_report(y_test, prediction))

In [46]:
from sklearn.pipeline import make_pipeline

In [47]:
transFunc = FunctionTransformer(iterateModels)
myPipe = make_pipeline(transFunc)

In [48]:
myPipe.transform(s)

0.46878736144926364

In [49]:
type(myPipe)

sklearn.pipeline.Pipeline

### Deploy!


In [50]:
#!pip install git+https://gfilla:563dec96604582b22c75d1cb69e4a46a8084a783@github.ibm.com/spark-service/project-lib.git --ignore-installed

In [51]:
from repository.mlrepositoryclient import MLRepositoryClient
from repository.mlrepositoryartifact import MLRepositoryArtifact
from repository.mlrepository import MetaProps, MetaNames
from project_lib import Project

In [52]:
# The code was removed by DSX for sharing.

In [54]:
proj = Project(sc, 'be994310-57f5-4351-9017-4dd6e0ce1817', 'p-1088294dc79b4eb3e43a160707efc5ead06433af')

In [59]:
#inserted by clicking project token 
proj_meta = proj.get_metadata()
creds = [x for x in proj_meta['entity']['compute'] if x['type'] == 'machine_learning'][0]['credentials']
creds 

{'access_key': 'xs3p16vPvAcbadF6UWWlE7bZodFQYEWEwWS4VRDEgp32o2/bYs4qekCD1NQv9b6yHxGxQ3pIogjgEOjN0TGDTcL0h32gVzPkwMbmHXNpi+FQYUqQmv73SQJrb1WXWeZv',
 'instance_id': '2b565dc7-179e-432c-99b9-14516f136a52',
 'password': '624a3f40-66dc-461d-89a2-a7e24753346b',
 'url': 'https://ibm-watson-ml.mybluemix.net',
 'username': '4d32f62e-04b6-46e7-b929-557c751cf234'}

In [60]:
service_path = 'https://ibm-watson-ml.mybluemix.net'
username = creds['username']
password = creds['password']
instance_id = creds['instance_id']


In [62]:
ml_repository_client = MLRepositoryClient(service_path)
ml_repository_client.authorize(username, password)
props = MetaProps({MetaNames.AUTHOR_NAME:"Greg Filla", MetaNames.AUTHOR_EMAIL:"gfilla@us.ibm.com"})

In [63]:
model_artifact = MLRepositoryArtifact(myPipe, name="bank_lr_ensemble", meta_props=props)

In [64]:
saved_model = ml_repository_client.models.save(model_artifact)

In [65]:
saved_model.meta.available_props()

dict_keys(['lastUpdated', 'version', 'authorEmail', 'trainingDataRef', 'runtime', 'pipelineVersionHref', 'modelVersionHref', 'creationTime', 'modelType', 'authorName'])

In [66]:
print("modelType: " + saved_model.meta.prop("modelType"))
print("runtime: " + saved_model.meta.prop("runtime"))
print("creationTime: " + str(saved_model.meta.prop("creationTime")))
print("modelVersionHref: " + saved_model.meta.prop("modelVersionHref"))

modelType: scikit-model-0.17
runtime: python-3.5
creationTime: 2017-09-23 16:40:16.965000+00:00
modelVersionHref: https://ibm-watson-ml.mybluemix.net/v2/artifacts/models/a3b87b66-097c-4a6b-ae95-d0d90d829344/versions/f404ba36-128d-4bfb-a5d8-3e725cd3750c


In [67]:
loadedModelArtifact = ml_repository_client.models.get(saved_model.uid)

In [68]:
print(loadedModelArtifact.name)
print(saved_model.uid)

bank_lr_ensemble
a3b87b66-097c-4a6b-ae95-d0d90d829344


In [69]:
s

array([[33,  0,  1,  6,  0,  0,  3]])

In [72]:
predictions = loadedModelArtifact.model_instance().transform(s)

In [73]:
predictions

0.46878736144926364

In [74]:
import urllib3, requests, json

headers = urllib3.util.make_headers(basic_auth='{}:{}'.format(username, password))
url = '{}/v3/identity/token'.format(service_path)
response = requests.get(url, headers=headers)
mltoken = json.loads(response.text).get('token')

In [75]:
endpoint_instance = service_path + "/v3/wml_instances/" + instance_id
header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + mltoken} 

response_get_instance = requests.get(endpoint_instance, headers=header)
print(response_get_instance)
print(response_get_instance.text)

<Response [200]>
{"metadata":{"guid":"2b565dc7-179e-432c-99b9-14516f136a52","url":"https://ibm-watson-ml.mybluemix.net/v3/wml_instances/2b565dc7-179e-432c-99b9-14516f136a52","created_at":"2017-07-31T21:37:18.193Z","modified_at":"2017-09-23T16:40:17.082Z"},"entity":{"source":"Bluemix","published_models":{"url":"https://ibm-watson-ml.mybluemix.net/v3/wml_instances/2b565dc7-179e-432c-99b9-14516f136a52/published_models"},"usage":{"expiration_date":"2017-10-01T00:00:00.000Z","computation_time":{"limit":18000,"current":0},"model_count":{"limit":200,"current":11},"prediction_count":{"limit":5000,"current":0},"deployment_count":{"limit":5,"current":5}},"plan_id":"3f6acf43-ede8-413a-ac69-f8af3bb0cbfe","status":"Active","organization_guid":"59db2e81-0ab2-4abe-b2a1-f7da1b9ad1dc","region":"us-south","account":{"id":"501209268","name":"Greg Filla's Account","type":"TRIAL"},"owner":{"ibm_id":"310000V6NF","email":"gfilla@us.ibm.com","user_id":"c1eabbcb-3ff0-4f2a-b6b0-ed5c19df4f2f","country_code":"USA

In [76]:
endpoint_published_models = json.loads(response_get_instance.text).get('entity').get('published_models').get('url')

print(endpoint_published_models)

https://ibm-watson-ml.mybluemix.net/v3/wml_instances/2b565dc7-179e-432c-99b9-14516f136a52/published_models


In [77]:
header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + mltoken}
response_get = requests.get(endpoint_published_models, headers=header)

print(response_get)
print(response_get.text)

<Response [500]>
{"trace":"-i2iodqmb7bxl","errors":[{"code":"underlying_api_error","message":"WMLRY0002E Unhandled exception of type DeserializationException with message: Unsupported value for type ModelType scikit-model-0.18.1"}]}


In [79]:
sklearn.__version__

'0.17.1'

In [80]:
myPipe._

'sklearn.pipeline'