In [11]:
import tarfile

import boto3
import pandas as pd
import os
from sagemaker.estimator import Estimator
from sagemaker.local import LocalSession
from sagemaker.predictor import csv_serializer

In [5]:
from __future__ import print_function

import json
import os
import pickle
import sys
import traceback

import pandas as pd
from causalnex.discretiser import Discretiser
import warnings
from causalnex.structure import StructureModel
from sklearn.model_selection import train_test_split
from causalnex.network import BayesianNetwork
from causalnex.evaluation import classification_report
from causalnex.evaluation import roc_auc

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
sagemaker_session = LocalSession()
sagemaker_session.config = {'local': {'local_code': True}}

role = 'arn:aws:iam::948014026119:role/service-role/AmazonSageMaker-ExecutionRole-20220620T163773'
session = boto3.session.Session(profile_name='mlops-prod')
s3 = session.client('s3')

In [16]:
data_location = "./data/heart_failure_clinical_records_dataset.csv"

this is a comment

In [17]:
from causalnex.discretiser import Discretiser
import pandas as pd

initial_df = pd.read_csv(data_location)

initial_df["age"] = Discretiser(method="fixed", numeric_split_points=[60]).transform(
    initial_df["age"].values
)
initial_df["serum_sodium"] = Discretiser(method="fixed", numeric_split_points=[136]).transform(
    initial_df["serum_sodium"].values
)
initial_df["serum_creatinine"] = Discretiser(
    method="fixed", numeric_split_points=[1.1, 1.4]
).transform(initial_df["serum_sodium"].values)

initial_df["ejection_fraction"] = Discretiser(
    method="fixed", numeric_split_points=[30, 38, 42]
).transform(initial_df["ejection_fraction"].values)

initial_df["creatinine_phosphokinase"] = Discretiser(
    method="fixed", numeric_split_points=[120, 540, 670]
).transform(initial_df["creatinine_phosphokinase"].values)

initial_df["platelets"] = Discretiser(method="fixed", numeric_split_points=[263358]).transform(
    initial_df["platelets"].values
)
initial_df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,1,0,2,0,0,1,1,0,0,1,0,4,1
1,0,0,3,0,2,0,1,0,1,1,0,6,1
2,1,0,1,0,0,0,0,0,0,1,1,7,1
3,0,1,0,0,0,0,0,0,1,1,0,7,1
4,1,1,1,1,0,0,1,0,0,0,0,8,1


In [None]:
sm = StructureModel()
sm.add_edges_from([
    ('ejection_fraction', 'DEATH_EVENT'),
    ('creatinine_phosphokinase', 'DEATH_EVENT'),
    ('age','DEATH_EVENT'),
    ('smoking','high_blood_pressure'),
    ('age','high_blood_pressure'),            
    ('serum_sodium','DEATH_EVENT'),
    ('high_blood_pressure','DEATH_EVENT'),
    ('anaemia','DEATH_EVENT'),
    ('creatinine_phosphokinase','DEATH_EVENT'),
    ('smoking','DEATH_EVENT')
])

train, test = train_test_split(initial_df, train_size=0.8, test_size=0.2, random_state=42)
        
bn = BayesianNetwork(sm)
bn = bn.fit_node_states(initial_df)
bn = bn.fit_cpds(train, method="BayesianEstimator", bayes_prior="K2")

roc, auc = roc_auc(bn, test, "DEATH_EVENT")
print("Model AUC: " + str(auc))

print(classification_report(bn, test, "DEATH_EVENT"))

# save the model
model_path = "models"
with open(os.path.join(model_path, 'causal_model.pkl'), 'wb') as out:
    pickle.dump(bn, out)

Now we want to use this code to create a docker image that can be pushed to ECR and then use that image with Sagemaker

In [28]:
image = 'sagemaker-causalnex-local'

env={
    "MODEL_SERVER_WORKERS":"2"
    }

local_regressor = Estimator(
    image,
    role,
    instance_count=1,
    instance_type="local")

train_location = 'file://'+data_location

local_regressor.fit(train_location, logs=True)

Creating scmjhsri78-algo-1-16lri ... 
Creating scmjhsri78-algo-1-16lri ... done
Attaching to scmjhsri78-algo-1-16lri
[36mscmjhsri78-algo-1-16lri |[0m Starting the training.
[36mscmjhsri78-algo-1-16lri |[0m Model AUC: 0.7368055555555555
[36mscmjhsri78-algo-1-16lri |[0m {'DEATH_EVENT_0': {'precision': 0.6122448979591837, 'recall': 0.8571428571428571, 'f1-score': 0.7142857142857143, 'support': 35}, 'DEATH_EVENT_1': {'precision': 0.5454545454545454, 'recall': 0.24, 'f1-score': 0.3333333333333333, 'support': 25}, 'accuracy': 0.6, 'macro avg': {'precision': 0.5788497217068646, 'recall': 0.5485714285714285, 'f1-score': 0.5238095238095238, 'support': 60}, 'weighted avg': {'precision': 0.5844155844155844, 'recall': 0.6, 'f1-score': 0.5555555555555555, 'support': 60}}
[36mscmjhsri78-algo-1-16lri |[0m Training complete.
[36mscmjhsri78-algo-1-16lri exited with code 0
[0mAborting on container exit...
===== Job Complete =====


In [74]:
predictor = local_regressor.deploy(1, 'local', env=env)



Attaching to uf74mk7e8y-algo-1-a0tr0
[36muf74mk7e8y-algo-1-a0tr0 |[0m Starting the inference server with 2 workers.
[36muf74mk7e8y-algo-1-a0tr0 |[0m [2023-04-20 06:30:50 +0000] [10] [INFO] Starting gunicorn 20.1.0
[36muf74mk7e8y-algo-1-a0tr0 |[0m [2023-04-20 06:30:50 +0000] [10] [INFO] Listening at: unix:/tmp/gunicorn.sock (10)
[36muf74mk7e8y-algo-1-a0tr0 |[0m [2023-04-20 06:30:50 +0000] [10] [INFO] Using worker: sync
[36muf74mk7e8y-algo-1-a0tr0 |[0m [2023-04-20 06:30:50 +0000] [12] [INFO] Booting worker with pid: 12
[36muf74mk7e8y-algo-1-a0tr0 |[0m [2023-04-20 06:30:50 +0000] [13] [INFO] Booting worker with pid: 13
[36muf74mk7e8y-algo-1-a0tr0 |[0m 172.21.0.1 - - [20/Apr/2023:06:30:54 +0000] "GET /ping HTTP/1.1" 200 1 "-" "python-urllib3/1.26.15"
!

In [85]:
test_data = open('payload.json')

In [86]:
predicted = predictor.predict(test_data).decode('utf-8')

[36muf74mk7e8y-algo-1-a0tr0 |[0m 172.21.0.1 - - [20/Apr/2023:06:35:39 +0000] "POST /invocations HTTP/1.1" 415 38 "-" "python-urllib3/1.26.15"


In [87]:
print(predicted)

This predictor only supports JSON data


In [88]:
predictor.delete_endpoint()

Gracefully stopping... (press Ctrl+C again to force)
