### Breast Cancer Dataset

In [3]:
from sklearn.datasets import load_breast_cancer
import numpy as np

breast_cancer = load_breast_cancer()
breast_cancer.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [4]:
breast_cancer['data'].shape

(569, 30)

In [5]:
breast_cancer['feature_names']

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [6]:
breast_cancer['target_names']

array(['malignant', 'benign'], dtype='<U9')

### Use 30% of the data as labeled data

In [7]:
data, labels = breast_cancer.data[:,0:3], breast_cancer.target[:]

num_samples = len(labels)
shuffle_order = np.random.permutation(num_samples)

data = data[shuffle_order, :]
labels = labels[shuffle_order]

data = data / np.amax(data, axis = 0)
labels = labels / np.amax(labels, axis = 0)
data = data.astype('float32')
labels = labels.astype('float32')

In [8]:
print(data.shape)
print(data[0:5,:])
labels[0:5]

(569, 3)
[[0.4571327  0.54404277 0.43835545]
 [0.38242617 0.38110998 0.36212203]
 [0.42831734 0.71639514 0.4076923 ]
 [0.60512275 0.5157841  0.5819629 ]
 [0.4766987  0.52240324 0.47023872]]


array([1., 1., 1., 0., 0.], dtype=float32)

In [9]:
X, y = data, labels

train_split = 0.6
n_train = int(train_split * num_samples)
x_train, y_train = X[:n_train], y[:n_train]
x_test, y_test = X[n_train:], y[n_train:]

y_train = y_train.reshape(y_train.shape[0],1)
y_test = y_test.reshape(y_test.shape[0],1)

In [10]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(341, 3)
(341, 1)
(228, 3)
(228, 1)


In [11]:
import tensorflow
import keras
from keras.models import Sequential
from keras.layers import Dense

In [12]:
model = Sequential()
model.add(Dense(8, activation='sigmoid', input_dim=3))
model.add(Dense(1, activation='linear'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 8)                 32        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
Total params: 41
Trainable params: 41
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.compile(loss='mean_squared_error',optimizer='sgd')

In [None]:
import time
startTime = time.time()
model.fit(x_train,y_train,
         batch_size=4, epochs=200,
         verbose=1,
         validation_data=(x_test,y_test))

print('StartTime %0.3f ~ EndTime %0.3f , Duration %0.3f' %(startTime, time.time(), time.time() - startTime))

Train on 341 samples, validate on 228 samples
Epoch 1/200


### A larger network with more data, more layers, more neurons and more epochs

In [13]:
data, labels = breast_cancer.data[:,:], breast_cancer.target[:]

num_samples = len(labels)
shuffle_order = np.random.permutation(num_samples)

data = data[shuffle_order, :]
labels = labels[shuffle_order]

data = data / np.amax(data, axis = 0)
labels = labels / np.amax(labels, axis = 0)
data = data.astype('float32')
labels = labels.astype('float32')

print(data.shape)
print(data[0:5,:])
labels[0:5]

(569, 30)
[[0.3058698  0.53411406 0.28997347 0.08868453 0.76070994 0.25949624
  0.07029054 0.04601889 0.6013158  0.69345236 0.12467804 0.42313203
  0.11342129 0.03391737 0.3832316  0.23353028 0.07575758 0.17539306
  0.42520583 0.10214477 0.26539955 0.54582155 0.24705414 0.06438646
  0.7362983  0.1604915  0.07189297 0.09546392 0.4477252  0.37166265]
 [0.40021345 0.37627292 0.37867373 0.15593763 0.50832313 0.12906775
  0.0022814  0.0146173  0.5832237  0.62407637 0.07462583 0.20390993
  0.06956324 0.02779417 0.18043688 0.05261448 0.00245884 0.05571131
  0.21532616 0.06802949 0.35405105 0.44529673 0.3267516  0.11582041
  0.52380955 0.09257089 0.00440735 0.05728522 0.42407352 0.35749397]
 [0.37886873 0.642057   0.36079577 0.1387445  0.59100366 0.20943834
  0.05574039 0.08026839 0.6240132  0.6495279  0.08691263 0.3056295
  0.06810737 0.03068978 0.23093478 0.07644018 0.02729798 0.11829892
  0.27333754 0.0877681  0.3399001  0.7103351  0.31042993 0.1071227
  0.67340523 0.1321361  0.08985623 0.2

array([1., 1., 1., 1., 1.], dtype=float32)

In [14]:
X, y = data, labels

train_split = 0.6
n_train = int(train_split * num_samples)
x_train, y_train = X[:n_train], y[:n_train]
x_test, y_test = X[n_train:], y[n_train:]

In [15]:
model4 = Sequential()
model4.add(Dense(16, activation='sigmoid', input_dim=30))
model4.add(Dense(16, activation='sigmoid'))
model4.add(Dense(1, activation='linear'))
model4.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 16)                496       
_________________________________________________________________
dense_4 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 17        
Total params: 785
Trainable params: 785
Non-trainable params: 0
_________________________________________________________________


In [16]:
import time
startTime = time.time()

model4.compile(loss='mean_squared_error',optimizer='sgd')
model4.fit(x_train,y_train,
         batch_size=4, epochs=1000,
         verbose=0,
         validation_data=(x_test,y_test))
score4 = model4.evaluate(x_test, y_test)
print(score4)

endTime = time.time()
print('StartTime %0.3f ~ EndTime %0.3f , Duration %0.3f' %(startTime, endTime, endTime - startTime))

0.062496209582477286
StartTime 1531469181.153 ~ EndTime 1531469416.476 , Duration 235.323
