In [2]:
!nvidia-smi

Mon Mar  1 17:18:21 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   65C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
import requests
import random
# from sklearn.model_selection import train_test_split
# import h5py

In [4]:
# example & label split
def example_label_split(docs):
  examples = []
  labels = []
  for item in docs:
    examples.append(item[:-1])
    labels.append(item[-1])
  return np.array(examples), np.array(labels)

In [5]:
# train-test split
# test_proportion of 20 means 20% test and 80% train
def train_test_split(docs, test_proportion, do_random=False):
  test = []
  train = []
  for doc in docs:
    if do_random:
      random.shuffle(doc)
    split_point = int((test_proportion/100)*len(doc))
    for item in doc[:split_point]:
      test.append(item)
    for item in doc[split_point:]:
      train.append(item)

  return train, test

In [6]:
# reads tsv file & separate between disease and non-disease doc.
def read_file(url, index=0):
  disease_docs = []
  non_disease_docs = []
 
  # read file by link
  file = requests.get(url).text
  data = file.strip().split('\n')
  
  for line in data[1:]:
    splitted_line = line.strip().split('\t')
    # collecting disease doc
    if splitted_line[-1] != '1':
      disease_docs.append(list(map(int, splitted_line)))
    # collecting non-disease doc
    else:
      non_disease_docs.append(list(map(int, splitted_line)))
  
  return disease_docs, non_disease_docs

In [7]:
# print predicted output with their example
def print_result(test_ex, results):
  print('SN\tAGE\tSEX\tCP\tBP\tCHOL\tMAX_HR\tTARGET')
  i = 0
  while i<len(results):
    print(i+1, end='\t')
    for example in test_ex[i]:
      print(example,end='\t')
    if results[i] > 0.4:
      print(1)
    else:
      print(0)
    i += 1
  print('SN\tAGE\tSEX\tCP\tBP\tCHOL\tMAX_HR\tTARGET\n\n')

In [18]:
# Main function (Create model, training, & testing)
def main(train_ex, train_lbl, test_ex, test_lbl):
  
  # create model
  model = keras.Sequential()
  model.add(keras.layers.Dense(units=1000, input_dim=6, activation='sigmoid', name='hidden_layer_1'))
  # model.add(keras.layers.Dense(units=500, activation='relu', name='hidden_layer_2'))
  model.add(keras.layers.Dense(units=1, activation='relu', name='output_layer'))

  # compile model
  model.compile(optimizer='Adamax',
                loss='mean_absolute_error',
                metrics=['accuracy'] )
  

  # model checkpoint
  checkpoint_path = "model/model.cpkt"
  model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                                 save_weights_only=True,
                                                                 monitor='val_accuracy',
                                                                 mode='max',verbose=0,
                                                                 save_best_only=True)

  # training the NN
  print('\nPlease wait...\nTraining the model...')
  model.fit(train_ex,
            train_lbl, epochs=500,verbose=0,
            validation_data=(test_ex, test_lbl),
            callbacks=[model_checkpoint_callback])
  print('Successfully Trained')
  
  # Loads the weights
  print('\nLoads the best model..')
  model.load_weights(checkpoint_path)

  # test the model
  print('\nTesting the model...\n')
  results = model.predict(test_ex)

  # evaluate the model
  loss, accuracy = model.evaluate(test_ex, test_lbl, verbose=2)

  # dislpay predicted results
  print('\nPredicted results::: Accuracy: {:5.2f}%'.format(100 * accuracy))
  print_result(test_ex, results)

  # print accuracy
  print("Accuracy: {:5.2f}%".format(100 * accuracy))

In [19]:
if __name__ == '__main__':

  # I was uploaded 'heart_data.tsv' file to my github repository.
  data_link = 'https://raw.githubusercontent.com/iammeskat/data/main/nnfl_assignment_1/heart_data.tsv'
  
  # read file & split between disease and non-disease
  disease_docs, non_disease_docs = read_file(data_link)

  '''
  test_docs: 20% (that means 10% from diseas_docs & another 10% from non_disease_docs).
  train_docs: 80% (that means 40% from diseas_docs & anpther 40% from non_disease_docs).
  test_proportion of 20 means 20% test and 80% train.
  do_random=True: randomly select from docs'''
  train_docs, test_docs = train_test_split(docs=[disease_docs, non_disease_docs],
                                           test_proportion = 20,
                                           do_random = True)
  
  # example and label separation & convert into numpy array
  train_ex, train_lbl = example_label_split(train_docs)
  test_ex, test_lbl = example_label_split(test_docs)

  main(train_ex, train_lbl,
       test_ex, test_lbl)


Please wait...
Training the model...
Successfully Trained

Loads the best model..

Testing the model...

2/2 - 0s - loss: 0.2203 - accuracy: 0.9000

Predicted results::: Accuracy: 90.00%
SN	AGE	SEX	CP	BP	CHOL	MAX_HR	TARGET
1	48	1	1	130	245	180	0
2	37	1	2	130	250	187	0
3	41	0	2	112	268	172	0
4	66	1	0	120	302	151	1
5	42	1	2	120	240	194	0
6	34	1	3	118	182	174	0
7	76	0	2	140	197	116	0
8	51	1	2	125	245	166	0
9	35	0	0	138	183	182	1
10	35	1	1	122	192	174	0
11	74	0	1	120	269	121	1
12	49	1	1	130	266	171	0
13	51	1	2	94	227	154	0
14	43	1	0	115	303	181	0
15	62	1	2	130	231	146	0
16	41	1	1	120	157	182	0
17	54	0	2	135	304	170	0
18	46	0	2	142	177	160	0
19	54	0	2	160	201	163	0
20	57	0	0	128	303	159	1
21	42	0	2	120	209	173	0
22	69	1	3	160	234	131	0
23	41	1	2	112	250	179	0
24	43	1	0	110	211	161	0
25	47	1	2	138	257	156	0
26	54	0	1	132	288	159	0
27	51	0	2	120	295	157	0
28	55	0	1	132	342	166	0
29	42	1	1	120	295	162	0
30	52	1	1	134	201	158	0
31	68	0	2	120	211	115	0
32	42	1	0	140	226	178	0
33	44	1	1	120	220	