# Heat Attack Dataset
This dataset was downloaded from https://www.kaggle.com/rashikrahmanpritom/heart-attack-analysis-prediction-dataset.<br>
The notebook was created taking as baseline the tutorials from https://microsoftlearning.github.io/mslearn-dp100/.
## Connect to a workspace

In [None]:
from azureml.core import Workspace, Dataset
ws = Workspace.from_config()
print(ws.name, "loaded")

## Prepare Compute
Check available compute resources. Mostly use CI (local compute) for this stage.

In [None]:
print("Compute Resources:")
for compute_name in ws.compute_targets:
    compute = ws.compute_targets[compute_name]
    print("\t", compute.name, ':', compute.type)

## Specify an Azure ML compute cluster (we will need it for autoML)

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "CLAI915002"

try:
    # Get existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found cluster!")
except ComputeTargetException:
    # Create one if it does not exist
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)



Found cluster!


## Load and register dataset
**Data Description**<br>

*age*: Age of the person<br>
*sex*: Gender of the person<br>
*cp*: chest pain type<br>
*trtbps*: resting blood pressure (mm Hg)<br>
*chol*: cholesterol (mg/dl)<br>
*fbs*: fasting blood sugar > 120 mg/dl<br>
*restecg*: resting electrocardiographic results<br>
*thalachh*: maximum heart rate achieved<br>
*exng*: exercise induced angina (1 = yes, 0 = no)<br>
*oldpeak*: previous peak<br>
*slp*: slope<br>
*caa*: number of major vessels (0-3)<br>
*thall*: Thal rate <br>
*output*: had heart attack (target)



In [4]:
# Load default datastore
default_ds = ws.get_default_datastore()

# Upload datasets to the datastore
default_ds.upload_files(['./data/heart.csv'],
                        target_path='heart-data/',
                        overwrite=True,
                        show_progress=True)

Uploading an estimated of 1 files
Uploading ./data/heart.csv
Uploaded ./data/heart.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_1e2a2111e2144499a05e1ca1a641f13c

In [5]:
# Create tabular dataset with heart data
heart_tab = Dataset.Tabular.from_delimited_files(path=(default_ds, 'heart-data/heart.csv'))
heart_tab.to_pandas_dataframe()


Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [6]:
# Register heart dataset
heart_tab = heart_tab.register(workspace=ws,
                            name='heart',
                            description='heart attack data',
                            tags={'format':'CSV'},
                            create_new_version=True)


In [7]:
print("Datasets:")
for dataset_name in list(ws.datasets.keys()):
    dataset = Dataset.get_by_name(ws, dataset_name)
    print("\t", dataset.name, 'version', dataset.version)

Datasets:
	 o2 version 1
	 heart version 2


## Check data

In [8]:
# Check for Null values
heart_tab.to_pandas_dataframe().isnull().sum()

age         0
sex         0
cp          0
trtbps      0
chol        0
fbs         0
restecg     0
thalachh    0
exng        0
oldpeak     0
slp         0
caa         0
thall       0
output      0
dtype: int64

In [9]:
# Look inside
heart_tab.to_pandas_dataframe().describe()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


## Prepare data

In [10]:
heart_ds = ws.datasets.get("heart")
train_valid_ds, test_ds = heart_ds.random_split(percentage=0.75, seed=0)
train_ds, valid_ds = train_valid_ds.random_split(percentage=0.8, seed=0)

print(valid_ds.to_pandas_dataframe())

    age  sex  cp  trtbps  chol  fbs  restecg  thalachh  exng  oldpeak  slp  \
0    44    1   1     120   263    0        1       173     0      0.0    2   
1    52    1   2     172   199    1        1       162     0      0.5    2   
2    50    0   2     120   219    0        1       158     0      1.6    1   
3    58    0   2     120   340    0        1       172     0      0.0    2   
4    66    0   3     150   226    0        1       114     0      2.6    0   
5    43    1   0     150   247    0        1       171     0      1.5    2   
6    71    0   1     160   302    0        1       162     0      0.4    2   
7    48    1   1     130   245    0        0       180     0      0.2    1   
8    53    0   0     138   234    0        0       160     0      0.0    2   
9    48    1   0     122   222    0        0       186     0      0.0    2   
10   35    0   0     138   183    0        1       182     0      1.4    2   
11   44    1   1     120   220    0        1       170     0    

## Configure automated machine learning

Set config class with the performance metric that the model will be evaluated.

In [13]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(name = 'Auto ML Experiment',
                             task='classification',
                             compute_target=cluster_name,
                             training_data=train_ds,
                             validation_data=valid_ds,
                             label_column_name='output',
                             iterations=10,
                             primary_metric='accuracy',
                             max_concurrent_iterations=2,
                             featurization='auto')

## Run auto ML experiment

In [None]:
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails

automl_experiment = Experiment(ws, 'heart-automl')
automl_run = automl_experiment.submit(automl_config)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

## View child run details

In [15]:
for run in automl_run.get_children():
    print('Run ID', run.id)
    for metric in run.get_metrics():
        print('\t', run.get_metrics(metric))    

Run ID AutoML_2618e68f-c7bd-41f2-95e4-3a92cd6aa189_8
	 {'AUC_weighted': 0.8802308802308803}
	 {'accuracy': 0.8148148148148148}
	 {'average_precision_score_micro': 0.8759922160072741}
	 {'recall_score_micro': 0.8148148148148148}
	 {'f1_score_macro': 0.7969924812030074}
	 {'f1_score_weighted': 0.810359231411863}
	 {'precision_score_macro': 0.8171701112877583}
	 {'average_precision_score_weighted': 0.8796285634708554}
	 {'precision_score_weighted': 0.8157569334039922}
	 {'balanced_accuracy': 0.7878787878787878}
	 {'recall_score_weighted': 0.8148148148148148}
	 {'weighted_accuracy': 0.8392156862745097}
	 {'log_loss': 0.44919812524569247}
	 {'recall_score_macro': 0.7878787878787878}
	 {'matthews_correlation': 0.6043394648337973}
	 {'average_precision_score_macro': 0.8698354740454199}
	 {'norm_macro_recall': 0.5757575757575757}
	 {'AUC_micro': 0.8751714677640604}
	 {'precision_score_micro': 0.8148148148148148}
	 {'f1_score_micro': 0.8148148148148148}
	 {'AUC_macro': 0.8802308802308803}
	 {'a

## Get best run

In [16]:
best_run, fitted_model = automl_run.get_output()
print(best_run)
print('\nBest Model Definition:')
print(fitted_model)
print('\nBest Run Transformations:')
for step in fitted_model.named_steps:
    print(step)
print('\nBest Run Metrics:')
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)


Package:azureml-automl-runtime, training version:1.35.1, current version:1.34.0
Package:azureml-core, training version:1.35.0.post1, current version:1.34.0
Package:azureml-dataprep, training version:2.23.2, current version:2.22.2
Package:azureml-dataprep-rslex, training version:1.21.2, current version:1.20.1
Package:azureml-dataset-runtime, training version:1.35.0, current version:1.34.0
Package:azureml-defaults, training version:1.35.0, current version:1.34.0
Package:azureml-interpret, training version:1.35.0, current version:1.34.0
Package:azureml-mlflow, training version:1.35.0, current version:1.34.0
Package:azureml-pipeline-core, training version:1.35.0, current version:1.34.0
Package:azureml-responsibleai, training version:1.35.0, current version:1.34.0
Package:azureml-telemetry, training version:1.35.0, current version:1.34.0
Package:azureml-train-automl-client, training version:1.35.0, current version:1.34.0
Package:azureml-train-automl-runtime, training version:1.35.1, current

Run(Experiment: heart-automl,
Id: AutoML_2618e68f-c7bd-41f2-95e4-3a92cd6aa189_2,
Type: azureml.scriptrun,
Status: Completed)

Best Model Definition:
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=False, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/moun...
                                      class_weight='balanced', criterion='gini',
                                      max_depth=None, max_features='sqrt',
                                      max_leaf_nodes=None, max_samples=None,
                                      min_impurity_decrease=0.0,
                                      min_impurity_split=None,
                                      min_samples_leaf=0.01,
                                     

## Register the model

In [None]:
from azureml.core import Model

# Register model
best_run.register_model(model_path='outputs/model.pkl',
                        model_name='heart_model',
                        tags={'Training context':'Auto ML'},
                        properties={'Accuracy': best_run_metrics['accuracy']})
                        

In [None]:
# Check models registered in the workspace
for model in Model.list(ws):
    print(model.name, 'version:', model.version)