In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt

# Reading Dataset

In [2]:
# Peeking inside dataset
!head {'./dataset/breast-cancer-wisconsin.csv'}

1000025,5,1,1,1,2,1,3,1,1,0
1002945,5,4,4,5,7,10,3,2,1,0
1015425,3,1,1,1,2,2,3,1,1,0
1016277,6,8,8,1,3,4,3,7,1,0
1017023,4,1,1,3,2,1,3,1,1,0
1017122,8,10,10,8,7,10,9,7,1,1
1018099,1,1,1,1,2,10,3,1,1,0
1018561,2,1,2,1,2,1,3,1,1,0
1033078,2,1,1,1,2,1,1,1,5,0
1033078,4,2,1,1,2,1,2,1,1,0


In [3]:
# Loading the full (test + train) dataset
LABEL_COLUMN = 'class'
LABELS = [0, 1]
CSV_COLUMNS = ['id', 'clump_thickness', 'uniformity_of_cell_size', 'uniformity_of_cell_shape', 'marginal_adhesion', 'single_epithelial_cell_size', 'bare_nuclei', 'bland_chromatin', 'normal_nucleoli', 'mitoses', 'class']
SELECT_COLUMNS = ['clump_thickness', 'uniformity_of_cell_size', 'uniformity_of_cell_shape', 'marginal_adhesion', 'single_epithelial_cell_size', 'bare_nuclei', 'bland_chromatin', 'normal_nucleoli', 'mitoses', 'class']
DEFAULTS = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=5,
      na_value="?",
      num_epochs=1,
      label_name=LABEL_COLUMN,
      ignore_errors=True, 
      **kwargs
  )
  return dataset

full_dataset = get_dataset(
    './dataset/breast-cancer-wisconsin.csv',
    column_names=CSV_COLUMNS,
    select_columns=SELECT_COLUMNS,
    column_defaults = DEFAULTS
)

In [4]:
# Testing dataset loaded or not
def show_batch(dataset):
  for batch, label in dataset.take(1):
    for key, value in batch.items():
      print("{:20s}: {} : {}".format(key, value.numpy(), label.numpy()))

show_batch(full_dataset)

clump_thickness     : [ 5.  1.  1. 10.  3.] : [0. 0. 0. 1. 1.]
uniformity_of_cell_size: [ 1.  1.  1. 10. 10.] : [0. 0. 0. 1. 1.]
uniformity_of_cell_shape: [1. 1. 1. 6. 7.] : [0. 0. 0. 1. 1.]
marginal_adhesion   : [1. 1. 1. 3. 8.] : [0. 0. 0. 1. 1.]
single_epithelial_cell_size: [2. 1. 1. 3. 5.] : [0. 0. 0. 1. 1.]
bare_nuclei         : [ 1.  1.  1. 10.  8.] : [0. 0. 0. 1. 1.]
bland_chromatin     : [2. 3. 1. 4. 7.] : [0. 0. 0. 1. 1.]
normal_nucleoli     : [1. 1. 3. 3. 4.] : [0. 0. 0. 1. 1.]
mitoses             : [1. 1. 1. 2. 1.] : [0. 0. 0. 1. 1.]


In [5]:
# To check dataformat of the dataset
example_batch, labels_batch = next(iter(full_dataset))
print(example_batch, labels_batch)

OrderedDict([('clump_thickness', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 1.,  4.,  1.,  7., 10.], dtype=float32)>), ('uniformity_of_cell_size', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([1., 2., 1., 4., 4.], dtype=float32)>), ('uniformity_of_cell_shape', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([1., 1., 1., 6., 3.], dtype=float32)>), ('marginal_adhesion', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 1.,  1.,  1.,  4., 10.], dtype=float32)>), ('single_epithelial_cell_size', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([2., 2., 2., 6., 3.], dtype=float32)>), ('bare_nuclei', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 1.,  1.,  1.,  1., 10.], dtype=float32)>), ('bland_chromatin', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([1., 2., 1., 4., 7.], dtype=float32)>), ('normal_nucleoli', <tf.Tensor: shape=(5,), dtype=float32, numpy=array([1., 1., 1., 3., 1.], dtype=float32)>), ('mitoses', <tf.Tensor: shape=(5,), dtype=float32, numpy=

In [6]:
def pack(features, label):
  return tf.stack(list(features.values()), axis=-1), label

full_dataset = full_dataset.map(pack)

# checking conversion
# for features, labels in full_dataset.take(1):
#   print(features.numpy())
#   print()
#   print(labels.numpy())
for x in full_dataset.take(1):
  print(type(x))

<class 'tuple'>


# Splitting Dataset into Train and Test

In [7]:
dataset_size = 699
train_dataset_percentage = 0.7
test_dataset_percentage = 0.3

train_size = tf.dtypes.cast(train_dataset_percentage * dataset_size, tf.int64)
test_size =  tf.dtypes.cast(test_dataset_percentage * dataset_size, tf.int64)

full_dataset = full_dataset.shuffle(dataset_size)
train_dataset = full_dataset.take(train_size)
full_dataset.skip(train_size)
test_dataset = full_dataset.take(test_size)
print(next(iter(train_dataset)))
print(next(iter(test_dataset)))

(<tf.Tensor: shape=(5, 9), dtype=float32, numpy=
array([[ 2.,  1.,  1.,  2.,  2.,  1.,  1.,  1.,  1.],
       [ 4., 10.,  4.,  7.,  3., 10.,  9., 10.,  1.],
       [ 2.,  1.,  1.,  1.,  2.,  1.,  3.,  1.,  1.],
       [ 5.,  3.,  2.,  8.,  5., 10.,  8.,  1.,  2.],
       [ 3.,  1.,  1.,  1.,  2.,  1.,  3.,  1.,  1.]], dtype=float32)>, <tf.Tensor: shape=(5,), dtype=float32, numpy=array([0., 1., 0., 1., 0.], dtype=float32)>)
(<tf.Tensor: shape=(5, 9), dtype=float32, numpy=
array([[ 6., 10., 10.,  2.,  8., 10.,  7.,  3.,  3.],
       [ 4.,  1.,  1.,  1.,  2.,  1.,  3.,  1.,  1.],
       [ 1.,  3.,  1.,  2.,  2.,  2.,  5.,  3.,  2.],
       [10.,  5.,  5.,  3.,  6.,  7.,  7., 10.,  1.],
       [ 7.,  4.,  6.,  4.,  6.,  1.,  4.,  3.,  1.]], dtype=float32)>, <tf.Tensor: shape=(5,), dtype=float32, numpy=array([1., 0., 0., 1., 1.], dtype=float32)>)


# Creating Neural Network Architecture

In [8]:
def PamaNet():
  input = tf.keras.layers.Input(shape=(9))
  x = tf.keras.layers.Dense(64, activation='relu')(input)
  x = tf.keras.layers.Dense(64, activation='relu')(x)
  x = tf.keras.layers.Dense(64, activation='relu')(x)
  x = tf.keras.layers.Dense(1, activation='relu')(x)
  output = x
  model = tf.keras.Model(input, output)
  return model

model = PamaNet()
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 9)]               0         
_________________________________________________________________
dense (Dense)                (None, 64)                640       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 9,025
Trainable params: 9,025
Non-trainable params: 0
_________________________________________________________________


# Training

In [9]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer='adam',
    metrics=['accuracy']
)

In [10]:
model.fit(train_dataset, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fac9a80e780>

# Testing

In [25]:
test_loss, test_accuracy = model.evaluate(test_dataset)

print('\n\nTest Loss {}, Test Accuracy {}'.format(test_loss, test_accuracy))



  numdigits = int(np.log10(self.target)) + 1


OverflowError: ignored

In [11]:
data = next(iter(train_dataset))
x = data[0]
y = data[1]
print(y)
print(model(x))

tf.Tensor([0. 0. 0. 0. 1.], shape=(5,), dtype=float32)
tf.Tensor(
[[0.      ]
 [0.      ]
 [0.      ]
 [0.      ]
 [7.903204]], shape=(5, 1), dtype=float32)


In [23]:
# Custom Evaluation
def evaluate(model, dataset):
  correct_count = 0
  wrong_count = 0
  for x, y_true in dataset:
    y_est = model(x , training=False)
    for est, tru in zip(y_est, y_true):
      if tru == 0 and est <= 0.5:
        correct_count += 1
      elif tru == 1 and est > 0.5:
        correct_count += 1
      else:
        wrong_count += 1
  return correct_count / (correct_count + wrong_count)

In [24]:
print(evaluate(model, test_dataset))

0.9970674486803519
