In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import tensorflow as tf

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
import pickle

In [2]:
df = pd.read_csv('processed-cleveland-heart-disease.csv')

In [3]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [4]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [5]:
df.tail()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,2
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,3
301,57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,1.0,3.0,1
302,38.0,1.0,3.0,138.0,175.0,0.0,0.0,173.0,0.0,0.0,1.0,0.0,3.0,0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    float64
 1   sex       303 non-null    float64
 2   cp        303 non-null    float64
 3   trestbps  303 non-null    float64
 4   chol      303 non-null    float64
 5   fbs       303 non-null    float64
 6   restecg   303 non-null    float64
 7   thalach   303 non-null    float64
 8   exang     303 non-null    float64
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    float64
 11  ca        303 non-null    float64
 12  thal      303 non-null    float64
 13  target    303 non-null    int64  
dtypes: float64(13), int64(1)
memory usage: 33.3 KB


In [7]:
df['target'].where(df['target'] <= 1, 1, inplace=True)

In [8]:
X = df.drop("target", axis=1).values
y = df["target"].values

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [10]:
# from sklearn.preprocessing import MinMaxScaler
# sc = MinMaxScaler((-1, 1))
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)

### Sklearn Random Forest Classifier

In [11]:
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)

RandomForestClassifier()

In [12]:
rfc.score(X_test, y_test)

0.6043956043956044

In [13]:
y_pred_test = rfc.predict(X_test)

In [14]:
accuracy_score(y_pred_test, y_test)

0.6043956043956044

In [15]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType

In [16]:
initial_type = [('input', FloatTensorType([None, 21]))]
final_type = [('label', Int64TensorType([None, 3])), ('output', FloatTensorType([None, 1]))]

#sklonnx = convert_sklearn(rfc, initial_types=initial_type)
sklonnx = convert_sklearn(rfc, initial_types=initial_type, final_types=final_type, options={'zipmap':False})
with open("cleveland_heart_disease_skl.onnx", "wb") as f:
    f.write(sklonnx.SerializeToString())

In [17]:
import onnxruntime as rt
sess = rt.InferenceSession("cleveland_heart_disease_skl.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0]

In [18]:
accuracy_score(pred_onx, y_test)

0.9498432601880877

### Pytorch Neural Network Classifier

In [19]:
X_train_t = torch.FloatTensor(X_train) 
X_test_t = torch.FloatTensor(X_test)
y_train_t = torch.tensor(y_train).long()
y_test_t = torch.tensor(y_test).long()

In [20]:
class PModel(nn.Module):
    def __init__(self, input_features=21, hidden_layer_1=100, hidden_layer_2=100, output_features=4):
        super().__init__()
        self.fc1 = nn.Linear(input_features, hidden_layer_1)
        self.fc2 = nn.Linear(hidden_layer_1, hidden_layer_2)
        self.out = nn.Linear(hidden_layer_2, output_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)
        return x

In [21]:
model = PModel()
model

PModel(
  (fc1): Linear(in_features=21, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (out): Linear(in_features=100, out_features=4, bias=True)
)

In [22]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [23]:
epochs = 201
losses = []

for i in range(epochs):
    y_pred = model(X_train_t)
    loss = criterion(y_pred, y_train_t)
    losses.append(loss)
    print(f'epoch: {i:2} loss: {loss.item():10.4f}')
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

epoch:  0 loss:     0.9877
epoch:  1 loss:     3.0609
epoch:  2 loss:     0.7220
epoch:  3 loss:     0.9681
epoch:  4 loss:     1.1465
epoch:  5 loss:     1.2489
epoch:  6 loss:     1.2932
epoch:  7 loss:     1.2887
epoch:  8 loss:     1.2412
epoch:  9 loss:     1.1644
epoch: 10 loss:     1.0558
epoch: 11 loss:     0.9431
epoch: 12 loss:     0.8137
epoch: 13 loss:     0.6892
epoch: 14 loss:     0.5736
epoch: 15 loss:     0.4956
epoch: 16 loss:     0.5076
epoch: 17 loss:     0.6044
epoch: 18 loss:     0.6401
epoch: 19 loss:     0.5478
epoch: 20 loss:     0.4543
epoch: 21 loss:     0.4306
epoch: 22 loss:     0.4429
epoch: 23 loss:     0.4577
epoch: 24 loss:     0.4630
epoch: 25 loss:     0.4531
epoch: 26 loss:     0.4295
epoch: 27 loss:     0.4047
epoch: 28 loss:     0.3898
epoch: 29 loss:     0.3933
epoch: 30 loss:     0.4133
epoch: 31 loss:     0.4223
epoch: 32 loss:     0.4151
epoch: 33 loss:     0.3971
epoch: 34 loss:     0.3815
epoch: 35 loss:     0.3779
epoch: 36 loss:     0.3792
e

In [24]:
pred_nn_test = model(X_test_t)
_, preds_nn_y = torch.max(pred_nn_test, 1)

In [25]:
preds_nn_y

tensor([1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 2,
        1, 3, 1, 1, 3, 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 3, 3, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,
        1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 1, 3,
        2, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 2, 2, 1, 1,
        1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2,
        2, 1, 1, 3, 1, 3, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 3, 1, 2, 1, 1, 1,
        1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 2, 1, 1, 1,
        3, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3,
        1, 2, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 3, 1, 1,
        1, 2, 1, 3, 1, 1, 1, 1, 3, 1, 1,

In [26]:
accuracy_score(y_test_t, preds_nn_y)

0.8714733542319749

In [27]:
model.eval()

PModel(
  (fc1): Linear(in_features=21, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (out): Linear(in_features=100, out_features=4, bias=True)
)

In [28]:
dummy_input = torch.rand(1, 21, requires_grad=False)

In [29]:
torch.onnx.export(
    model, 
    dummy_input,
    "fetal_health_pyt.onnx",
    export_params=True,
    input_names=["input"],
    output_names=["output"],
    opset_version=9,
    dynamic_axes={'input' : {0 : 'batch_size'}, 
                  'output' : {0 : 'batch_size'}})

In [30]:
import onnx

onnx_model = onnx.load("cleveland_heart_disease_pyt.onnx")
onnx.checker.check_model(onnx_model)

In [31]:
sess_nn = rt.InferenceSession("cleveland_heart_disease_pyt.onnx")
def to_numpy(tensor):
    return tensor.detach().cpu().numpy()

input_name_nn = sess_nn.get_inputs()[0].name
label_name_nn = sess_nn.get_outputs()[0].name
pred_onx_nn = sess_nn.run([label_name_nn], {input_name_nn: to_numpy(X_test_t)})

In [32]:
preds_nn = np.argmax(pred_onx_nn, axis=-1).squeeze()

In [33]:
accuracy_score(y_test_t, preds_nn)

0.8714733542319749

### Tensorflow Neural Network Classifier

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(X_train)).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)

In [None]:
class TModel(Model):
    def __init__(self):
        super(TModel, self).__init__()
        self.d1 = Dense(units=21, activation='relu')
        self.d2 = Dense(100, activation='relu')
        self.d3 = Dense(100, activation='relu')
        self.d4 = Dense(4)
        
    def call(self, x):
        x = self.d1(x)
        x = self.d2(x)
        x = self.d3(x)
        return self.d4(x)
    
tmodel = TModel()

In [None]:
t_loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
t_optimizer = tf.keras.optimizers.Adam()

In [None]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [None]:
@tf.function
def train_step(data, labels):
    with tf.GradientTape() as tape:
        t_predictions = tmodel(data, training=True)
        t_loss = t_loss_object(labels, t_predictions)
    t_gradients = tape.gradient(t_loss, tmodel.trainable_variables)
    t_optimizer.apply_gradients(zip(t_gradients, tmodel.trainable_variables))
    
    train_loss(t_loss)
    train_accuracy(labels, t_predictions)
        

In [None]:
@tf.function
def test_step(data, labels):
    t_predictions = tmodel(data, training=False)
    tt_loss = t_loss_object(labels, t_predictions)

    test_loss(tt_loss)
    test_accuracy(labels, t_predictions)

In [None]:
EPOCHS = 201

for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for data, labels in train_ds:
        train_step(data, labels)

    for test_data, test_labels in test_ds:
        test_step(test_data, test_labels)

    print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result()}, '
    f'Accuracy: {train_accuracy.result() * 100}, '
    f'Test Loss: {test_loss.result()}, '
    f'Test Accuracy: {test_accuracy.result() * 100}'
    )

In [None]:
#pip install -U tf2onnx

In [None]:
tmodel.summary()

In [None]:
import tf2onnx

In [None]:
input_spec = (tf.TensorSpec((None,13), tf.float64, name="input"),)
output_path = "cleveland_heart_disease_tf.onnx"

model_proto, _ = tf2onnx.convert.from_keras(tmodel, input_signature=input_spec, opset=9, output_path=output_path)
output_names = [n.name for n in model_proto.graph.output]

In [None]:
sess_tf = rt.InferenceSession("cleveland_heart_disease_tf.onnx")
onnx_pred_tf = sess_tf.run(output_names, {"input": X_test})

In [None]:
preds_tf = np.argmax(onnx_pred_tf, axis=-1).squeeze()

In [None]:
accuracy_score(y_test, preds_tf)