In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split

%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('heart.csv.txt')

data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [4]:
data.shape

(462, 10)

In [5]:
one_hot_famhist = pd.get_dummies(data['famhist'], prefix='famhist')
one_hot_famhist.head()

Unnamed: 0,famhist_Absent,famhist_Present
0,0,1
1,1,0
2,0,1
3,0,1
4,0,1


In [7]:
data = pd.concat([data, one_hot_famhist], axis=1)
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1,0,1


In [8]:
data = data.drop(['famhist'], axis=1)
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,60,25.99,57.34,49,1,0,1


In [9]:
features = data.drop(['chd'], axis=1)
features.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,49,25.3,97.2,52,0,1
1,144,0.01,4.41,28.61,55,28.87,2.06,63,1,0
2,118,0.08,3.48,32.28,52,29.14,3.81,46,0,1
3,170,7.5,6.41,38.03,51,31.99,24.26,58,0,1
4,134,13.6,3.5,27.78,60,25.99,57.34,49,0,1


In [10]:
labels = data['chd']
labels.head()

0    1
1    1
2    0
3    1
4    1
Name: chd, dtype: int64

In [11]:
inputs_to_normalize = ['sbp', 'tobacco', 'ldl', 'adiposity', 'typea', 'obesity','alcohol', 'age'  ]

In [12]:
for each in inputs_to_normalize:
    features[each] = (features[each] - features[each].min())/features[each].max()
    
print(features.head())

        sbp   tobacco       ldl  adiposity     typea   obesity   alcohol  \
0  0.270642  0.384615  0.309850   0.385267  0.461538  0.227565  0.660371   
1  0.197248  0.000321  0.223744   0.514709  0.538462  0.304208  0.013996   
2  0.077982  0.002564  0.163079   0.601083  0.500000  0.310004  0.025885   
3  0.316514  0.240385  0.354207   0.736409  0.487179  0.371189  0.164821   
4  0.151376  0.435897  0.164384   0.495175  0.602564  0.242379  0.389565   

        age  famhist_Absent  famhist_Present  
0  0.578125               0                1  
1  0.750000               1                0  
2  0.484375               0                1  
3  0.671875               0                1  
4  0.531250               0                1  


In [13]:
train_X, test_X, train_Y, test_Y = train_test_split(features, labels, test_size=0.2)

In [14]:
print(train_X.shape, train_Y.shape)

(369, 10) (369,)


In [15]:
print(test_X.shape, test_Y.shape)

(93, 10) (93,)


In [31]:
n_featurex = 10
n_labels = 2

n_epochs = 1500
learning_rate = 0.1

In [32]:
columns = [tf.feature_column.numeric_column(column_name) for column_name in inputs_to_normalize]

columns

[NumericColumn(key='sbp', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='tobacco', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='ldl', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='adiposity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='typea', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='obesity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='alcohol', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [33]:
columns.append(tf.feature_column.categorical_column_with_vocabulary_list('famhist_Absent', [0,1]))
columns.append(tf.feature_column.categorical_column_with_vocabulary_list('famhist_Present',[0,1]))

In [34]:
estimator = tf.estimator.LinearClassifier(feature_columns=columns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Admin\\AppData\\Local\\Temp\\tmpq0qiayn9', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [39]:
train_input_func = tf.compat.v1.estimator.inputs.pandas_input_fn(
    x = train_X,
    y = train_Y,
    batch_size = 100,
    num_epochs = n_epochs,
    shuffle = True
)

In [40]:
estimator.train(input_fn = train_input_func, steps = 1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\Admin\AppData\Local\Temp\tmpq0qiayn9\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.6931472, step = 0
INFO:tensorflow:global_step/sec: 200.588
INFO:tensorflow:loss = 0.48015746, step = 100 (0.506 sec)
INFO:tensorflow:global_step/sec: 254.689
INFO:tensorflow:loss = 0.5910774, step = 200 (0.395 sec)
INFO:tensorflow:global_step/sec: 230.1
INFO:tensorflow:loss = 0.55597353, step = 300 (0.433 sec)
INFO:tensorflow:global_step/sec: 274.227
INFO:tensorflow:loss = 0.60796404, step = 400 (0.366 sec)
INFO:tensorflow:global_step/sec: 276.501
INFO:tensorflow:loss = 0.5462824,

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x292d1eead68>

In [41]:
test_input_func = tf.compat.v1.estimator.inputs.pandas_input_fn(
    x=test_X,
    y=test_Y,
    batch_size=1,
    shuffle=False
)

In [42]:
results = estimator.evaluate(
    input_fn = test_input_func,
    steps = None
)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-12-31T16:33:38Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Admin\AppData\Local\Temp\tmpq0qiayn9\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 1.13495s
INFO:tensorflow:Finished evaluation at 2020-12-31-16:33:39
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.7741935, accuracy_baseline = 0.61290324, auc = 0.8447856, auc_precision_recall = 0.72295713, average_loss = 0.4951851, global_step = 1000, label/mean = 0.38709676, loss = 0.4951851, precision = 0.7777778, prediction/mean = 0.3732038, recall = 0.5833333
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\Admin\AppData\Local\Temp\tmpq0qiayn9\model.ckpt-1000


In [43]:
results

{'accuracy': 0.7741935,
 'accuracy_baseline': 0.61290324,
 'auc': 0.8447856,
 'auc_precision_recall': 0.72295713,
 'average_loss': 0.4951851,
 'label/mean': 0.38709676,
 'loss': 0.4951851,
 'precision': 0.7777778,
 'prediction/mean': 0.3732038,
 'recall': 0.5833333,
 'global_step': 1000}