# Stroke Prediction using RandomForestClassifier with ibm-watson-machine-learning

### Section 2: Scoring model example with IBM Watson Machine Learning using Batch Deployment method and publish the result to IBM Cloud Object Storage

## 1. Data Loading (clean and transform if needed)

In [1]:
import pandas as pd

df = pd.read_csv('input_data.csv')
df

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status
0,1,1,33,0,1,1,1,0,229,39,1
1,2,1,23,1,0,1,2,0,239,30,2
2,3,1,43,0,1,1,3,0,249,20,3
3,4,1,53,1,0,1,3,0,219,50,1
4,5,1,63,0,1,1,2,0,209,15,2
5,6,0,30,1,0,1,1,0,200,10,3
6,7,0,33,0,1,1,1,0,229,39,1
7,8,0,23,1,0,1,2,0,239,30,2
8,9,0,43,0,1,1,3,0,249,20,3
9,10,0,53,1,0,1,3,0,219,50,1


## 2. Connection to IBM Watson Machine Learning

In [2]:
api_key = 'YOUR-API-KEY'
location = 'YOUR-LOCATION'

In [3]:
wml_credentials = {
    "apikey": api_key,
    "url": 'https://' + location + '.ml.cloud.ibm.com'
}

In [None]:
!pip install -U ibm-watson-machine-learning

In [4]:
from ibm_watson_machine_learning import APIClient

client = APIClient(wml_credentials)
print(client.version)

1.0.99


In [5]:
space_id = 'YOUR-SPACE-ID'

In [None]:
client.spaces.list(limit=10)

In [6]:
client.set.default_space(space_id)

'SUCCESS'

In [None]:
client.repository.get_details('YOUR-PUBLISHED-MODEL-UID')

In [None]:
client.deployments.get_details('YOUR-DEPLYMENT-UID')

## 3. Model Scoring

In [7]:
test_df = df.copy()
test_df.drop(columns=['id'], inplace=True)
test_df

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status
0,1,33,0,1,1,1,0,229,39,1
1,1,23,1,0,1,2,0,239,30,2
2,1,43,0,1,1,3,0,249,20,3
3,1,53,1,0,1,3,0,219,50,1
4,1,63,0,1,1,2,0,209,15,2
5,0,30,1,0,1,1,0,200,10,3
6,0,33,0,1,1,1,0,229,39,1
7,0,23,1,0,1,2,0,239,30,2
8,0,43,0,1,1,3,0,249,20,3
9,0,53,1,0,1,3,0,219,50,1


In [8]:
test_df.to_csv('input_data_noid.csv', index = False, header=True)

In [11]:
job_payload_ref = {
    client.deployments.ScoringMetaNames.NAME: "Job - Stroke Prediction Scoring",
    client.deployments.ScoringMetaNames.INPUT_DATA: [
        {
            "fields": test_df.columns.tolist(),
            "values": test_df.values.tolist()
        }
    ]
}

In [12]:
job = client.deployments.create_job('28be5c19-eb1a-4935-860d-031b48e2dc1a', meta_props=job_payload_ref)

In [14]:
job_id = client.deployments.get_job_uid(job)

In [15]:
client.deployments.get_job_details(job_id)

{'entity': {'deployment': {'id': '28be5c19-eb1a-4935-860d-031b48e2dc1a'},
  'platform_job': {'job_id': '53017703-318a-4a00-8b21-09c7a93fadd8',
   'run_id': '30afaebc-534a-4dbd-887f-20ea28f7cf83'},
  'scoring': {'input_data': [{'fields': ['gender',
      'age',
      'hypertension',
      'heart_disease',
      'ever_married',
      'work_type',
      'Residence_type',
      'avg_glucose_level',
      'bmi',
      'smoking_status'],
     'values': [[1, 33, 0, 1, 1, 1, 0, 229, 39, 1],
      [1, 23, 1, 0, 1, 2, 0, 239, 30, 2],
      [1, 43, 0, 1, 1, 3, 0, 249, 20, 3],
      [1, 53, 1, 0, 1, 3, 0, 219, 50, 1],
      [1, 63, 0, 1, 1, 2, 0, 209, 15, 2],
      [0, 30, 1, 0, 1, 1, 0, 200, 10, 3],
      [0, 33, 0, 1, 1, 1, 0, 229, 39, 1],
      [0, 23, 1, 0, 1, 2, 0, 239, 30, 2],
      [0, 43, 0, 1, 1, 3, 0, 249, 20, 3],
      [0, 53, 1, 0, 1, 3, 0, 219, 50, 1]]}],
   'predictions': [{'fields': ['prediction', 'probability'],
     'values': [[0, [0.95, 0.05]],
      [0, [0.96, 0.04]],
      [0, 

In [17]:
job_details_do = client.deployments.get_job_details(job_id)
print(job_details_do)

{'entity': {'deployment': {'id': '28be5c19-eb1a-4935-860d-031b48e2dc1a'}, 'platform_job': {'job_id': '53017703-318a-4a00-8b21-09c7a93fadd8', 'run_id': '30afaebc-534a-4dbd-887f-20ea28f7cf83'}, 'scoring': {'input_data': [{'fields': ['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'work_type', 'Residence_type', 'avg_glucose_level', 'bmi', 'smoking_status'], 'values': [[1, 33, 0, 1, 1, 1, 0, 229, 39, 1], [1, 23, 1, 0, 1, 2, 0, 239, 30, 2], [1, 43, 0, 1, 1, 3, 0, 249, 20, 3], [1, 53, 1, 0, 1, 3, 0, 219, 50, 1], [1, 63, 0, 1, 1, 2, 0, 209, 15, 2], [0, 30, 1, 0, 1, 1, 0, 200, 10, 3], [0, 33, 0, 1, 1, 1, 0, 229, 39, 1], [0, 23, 1, 0, 1, 2, 0, 239, 30, 2], [0, 43, 0, 1, 1, 3, 0, 249, 20, 3], [0, 53, 1, 0, 1, 3, 0, 219, 50, 1]]}], 'predictions': [{'fields': ['prediction', 'probability'], 'values': [[0, [0.95, 0.05]], [0, [0.96, 0.04]], [0, [0.94, 0.06]], [0, [0.87, 0.13]], [0, [0.66, 0.34]], [0, [0.9, 0.1]], [0, [0.83, 0.17]], [0, [0.93, 0.07]], [0, [0.95, 0.05]], [0, [0.69, 0.

In [22]:
Prediction = []
test = job_details_do['entity']['scoring']['predictions'][0]['values']
for x in range(len(test)):
    y = test[x][0]
    Prediction.append(y)
print(Prediction)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [23]:
df["Prediction"] = Prediction
df

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,Prediction
0,1,1,33,0,1,1,1,0,229,39,1,0
1,2,1,23,1,0,1,2,0,239,30,2,0
2,3,1,43,0,1,1,3,0,249,20,3,0
3,4,1,53,1,0,1,3,0,219,50,1,0
4,5,1,63,0,1,1,2,0,209,15,2,0
5,6,0,30,1,0,1,1,0,200,10,3,0
6,7,0,33,0,1,1,1,0,229,39,1,0
7,8,0,23,1,0,1,2,0,239,30,2,0
8,9,0,43,0,1,1,3,0,249,20,3,0
9,10,0,53,1,0,1,3,0,219,50,1,0


In [24]:
df.to_csv ('result_dataframe.csv', index = False, header=True)

## 4. Upload result to IBM Cloud Object Storage

In [9]:
# @hidden_cell
# The following code contains the credentials for a bucket in your IBM Cloud Object Storage.
# You might want to remove those credentials before you share the notebook.
credentials = {
    'BUCKET': 'YOUR-BUCKET-NAME',
    'URL': 'YOUR-ENDPOINT-URL',
    'SECRET_KEY': 'YOUR-SECRET-KEY',
    'API_KEY': 'YOUR-COS-API-KEY',
    'RESOURCE_INSTANCE_ID': 'YOUR-RESOURCE-INSTANCE-ID',
    'ACCESS_KEY': 'YOUR-ACCESS-KEY'
}

In [10]:
import ibm_boto3
from ibm_botocore.client import Config, ClientError

# Constants for IBM COS values
COS_ENDPOINT = credentials['URL'] # Current list avaiable at https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints
COS_API_KEY_ID = credentials['API_KEY'] # eg "W00YixxxxxxxxxxMB-odB-2ySfTrFBIQQWanc--P3byk"
COS_INSTANCE_CRN = credentials['RESOURCE_INSTANCE_ID'] # eg "crn:v1:bluemix:public:cloud-object-storage:global:a/3bf0d9003xxxxxxxxxx1c3e97696b71c:d6f04d83-6c4f-4a62-a165-696756d63903::"

# Create client 
cos = ibm_boto3.client("s3",
    ibm_api_key_id=COS_API_KEY_ID,
    ibm_service_instance_id=COS_INSTANCE_CRN,
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT
)

In [28]:
cos.upload_file(Filename='result_dataframe.csv',Bucket=credentials['BUCKET'],Key='result_dataframe.csv')