In [1]:
import pandas as pd
from tensorflow.keras.utils import get_file

df = pd.read_csv("/content/heart.csv", header=None)
 

print("Read {} rows.".format(len(df)))

Read 270 rows.


In [2]:
df.columns = ['age',
              "sex",
              "cpt",
              "rbp",
              "sci",
              "fbs",
              "rer",
              "mhra",
              "eia",
              "oldp",
              "tsotpe",
              "nomv",
              "thal",
              "outcome"
              ]


In [3]:
pd.set_option('display.max_columns', 5)
pd.set_option('display.max_rows', 5)
df

Unnamed: 0,age,sex,...,thal,outcome
0,70.0,1.0,...,3.0,2
1,67.0,0.0,...,7.0,1
...,...,...,...,...,...
268,57.0,1.0,...,6.0,1
269,67.0,1.0,...,3.0,2


In [4]:
import pandas as pd
import os
import numpy as np
from sklearn import metrics
from scipy.stats import zscore

def expand_categories(values):
    result = []
    s = values.value_counts()
    t = float(len(values))
    for v in s.index:
        result.append("{}:{}%".format(v,round(100*(s[v]/t),2)))
    return "[{}]".format(",".join(result))
        
def analyze(df):
    print()
    cols = df.columns.values
    total = float(len(df))

    print("{} rows".format(int(total)))
    for col in cols:
        uniques = df[col].unique()
        unique_count = len(uniques)
        if unique_count>100:
            print("** {}:{} ({}%)".format(col,unique_count,int(((unique_count)/total)*100)))
        else:
            print("** {}:{}".format(col,expand_categories(df[col])))
            expand_categories(df[col])

In [5]:
analyze(df)


270 rows
** age:[54.0:5.93%,58.0:5.56%,51.0:4.44%,59.0:4.44%,60.0:4.44%,57.0:4.44%,52.0:4.07%,62.0:4.07%,44.0:3.7%,56.0:3.33%,64.0:3.33%,41.0:3.33%,65.0:2.96%,42.0:2.96%,67.0:2.96%,48.0:2.59%,43.0:2.59%,53.0:2.59%,61.0:2.59%,63.0:2.59%,50.0:2.59%,46.0:2.59%,45.0:2.59%,66.0:2.22%,55.0:2.22%,49.0:1.85%,70.0:1.48%,47.0:1.48%,71.0:1.11%,40.0:1.11%,39.0:1.11%,35.0:1.11%,69.0:1.11%,68.0:1.11%,37.0:0.74%,34.0:0.74%,77.0:0.37%,76.0:0.37%,74.0:0.37%,38.0:0.37%,29.0:0.37%]
** sex:[1.0:67.78%,0.0:32.22%]
** cpt:[4.0:47.78%,3.0:29.26%,2.0:15.56%,1.0:7.41%]
** rbp:[120.0:12.59%,130.0:11.48%,140.0:11.11%,110.0:6.3%,150.0:6.3%,160.0:4.07%,125.0:3.7%,128.0:3.33%,112.0:3.33%,138.0:3.33%,118.0:2.59%,135.0:2.22%,108.0:2.22%,132.0:2.22%,124.0:1.85%,145.0:1.85%,100.0:1.48%,134.0:1.48%,152.0:1.48%,136.0:1.11%,142.0:1.11%,115.0:1.11%,122.0:1.11%,126.0:1.11%,105.0:1.11%,180.0:1.11%,178.0:0.74%,102.0:0.74%,170.0:0.74%,94.0:0.74%,165.0:0.37%,129.0:0.37%,123.0:0.37%,192.0:0.37%,117.0:0.37%,200.0:0.37%,158.0:0.3

In [6]:
def encode_numeric_zscore(df, name, mean=None, sd=None):
    if mean is None:
        mean = df[name].mean()

    if sd is None:
        sd = df[name].std()

    df[name] = (df[name] - mean) / sd

In [7]:
encode_numeric_zscore(df, 'age')
encode_numeric_zscore(df, 'sex')
encode_numeric_zscore(df, 'cpt')
encode_numeric_zscore(df, 'rbp')
encode_numeric_zscore(df, 'sci')
encode_numeric_zscore(df, 'fbs')
encode_numeric_zscore(df, 'rer')
encode_numeric_zscore(df, 'mhra')
encode_numeric_zscore(df, 'eia')
encode_numeric_zscore(df, 'oldp')
encode_numeric_zscore(df, 'tsotpe')
encode_numeric_zscore(df, 'nomv')
encode_numeric_zscore(df, 'thal')
df.dropna(inplace=True,axis=1)
df[0:5]

Unnamed: 0,age,sex,...,thal,outcome
0,1.70892,0.688222,...,-0.874083,2
1,1.379578,-1.447639,...,1.187073,1
2,0.281771,0.688222,...,1.187073,2
3,1.050236,0.688222,...,1.187073,1
4,2.148043,-1.447639,...,-0.874083,1


In [8]:
# Convert to numpy - Classification
x_columns = df.columns.drop('outcome')
x = df[x_columns].values
dummies = pd.get_dummies(df['outcome']) # Classification
outcomes = dummies.columns
num_classes = len(outcomes)
y = dummies.values

In [9]:
df.groupby('outcome')['outcome'].count()

outcome
1    150
2    120
Name: outcome, dtype: int64

In [10]:
import pandas as pd
import io
import requests
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping

# Create a test/train split.  25% test
# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.25, random_state=42)

# Create neural net
model = Sequential()
model.add(Dense(10, input_dim=x.shape[1], activation='relu'))
model.add(Dense(50, input_dim=x.shape[1], activation='relu'))
model.add(Dense(10, input_dim=x.shape[1], activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.add(Dense(y.shape[1],activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, 
                        patience=5, verbose=1, mode='auto',
                           restore_best_weights=True)
model.fit(x_train,y_train,validation_data=(x_test,y_test),
          callbacks=[monitor],verbose=2,epochs=1000)

Epoch 1/1000
7/7 - 1s - loss: 0.6926 - val_loss: 0.6897
Epoch 2/1000
7/7 - 0s - loss: 0.6893 - val_loss: 0.6865
Epoch 3/1000
7/7 - 0s - loss: 0.6857 - val_loss: 0.6820
Epoch 4/1000
7/7 - 0s - loss: 0.6807 - val_loss: 0.6759
Epoch 5/1000
7/7 - 0s - loss: 0.6745 - val_loss: 0.6674
Epoch 6/1000
7/7 - 0s - loss: 0.6658 - val_loss: 0.6559
Epoch 7/1000
7/7 - 0s - loss: 0.6534 - val_loss: 0.6403
Epoch 8/1000
7/7 - 0s - loss: 0.6372 - val_loss: 0.6195
Epoch 9/1000
7/7 - 0s - loss: 0.6155 - val_loss: 0.5938
Epoch 10/1000
7/7 - 0s - loss: 0.5896 - val_loss: 0.5640
Epoch 11/1000
7/7 - 0s - loss: 0.5601 - val_loss: 0.5325
Epoch 12/1000
7/7 - 0s - loss: 0.5304 - val_loss: 0.5013
Epoch 13/1000
7/7 - 0s - loss: 0.5015 - val_loss: 0.4719
Epoch 14/1000
7/7 - 0s - loss: 0.4763 - val_loss: 0.4475
Epoch 15/1000
7/7 - 0s - loss: 0.4572 - val_loss: 0.4247
Epoch 16/1000
7/7 - 0s - loss: 0.4373 - val_loss: 0.4064
Epoch 17/1000
7/7 - 0s - loss: 0.4232 - val_loss: 0.3912
Epoch 18/1000
7/7 - 0s - loss: 0.4104 - 

<tensorflow.python.keras.callbacks.History at 0x7f51bd7e7ad0>

In [11]:

# Measure accuracy
pred = model.predict(x_test)
pred = np.argmax(pred,axis=1)
y_eval = np.argmax(y_test,axis=1)
score = metrics.accuracy_score(y_eval, pred)
print("Doğrulama Skoru: {}".format(score))

Doğrulama Skoru: 0.9117647058823529
