### Breast Cancer Dataset

In [6]:
from sklearn.datasets import load_breast_cancer
import numpy as np

breast_cancer = load_breast_cancer()
breast_cancer.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [7]:
breast_cancer['data'].shape

(569, 30)

In [8]:
breast_cancer['feature_names']

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [9]:
breast_cancer['target_names']

array(['malignant', 'benign'], dtype='<U9')

### Use 30% of the data as labeled data

In [10]:
data, labels = breast_cancer.data[:,0:3], breast_cancer.target[:]

num_samples = len(labels)
shuffle_order = np.random.permutation(num_samples)

data = data[shuffle_order, :]
labels = labels[shuffle_order]

data = data / np.amax(data, axis = 0)
labels = labels / np.amax(labels, axis = 0)
data = data.astype('float32')
labels = labels.astype('float32')

In [11]:
print(data.shape)
print(data[0:5,:])
labels[0:5]

(569, 3)
[[0.5332622  0.5628819  0.5174005 ]
 [0.61152613 0.56186354 0.59204245]
 [0.65136963 0.47301427 0.6291777 ]
 [0.5222341  0.51247454 0.5025995 ]
 [0.532195   0.48625255 0.51474804]]


array([1., 0., 0., 0., 1.], dtype=float32)

In [12]:
X, y = data, labels

train_split = 0.6
n_train = int(train_split * num_samples)
x_train, y_train = X[:n_train], y[:n_train]
x_test, y_test = X[n_train:], y[n_train:]

y_train = y_train.reshape(y_train.shape[0],1)
y_test = y_test.reshape(y_test.shape[0],1)

In [13]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(341, 3)
(341, 1)
(228, 3)
(228, 1)


In [14]:
import tensorflow
import keras
from keras.models import Sequential
from keras.layers import Dense

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [15]:
model = Sequential()
model.add(Dense(8, activation='sigmoid', input_dim=3))
model.add(Dense(1, activation='linear'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 8)                 32        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
Total params: 41
Trainable params: 41
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.compile(loss='mean_squared_error',optimizer='sgd')

In [17]:
import time
startTime = time.time()
model.fit(x_train,y_train,
         batch_size=4, epochs=200,
         verbose=0,
         validation_data=(x_test,y_test))

print('StartTime %0.3f ~ EndTime %0.3f , Duration %0.3f' %(startTime, time.time(), time.time() - startTime))

InternalError: Blas GEMM launch failed : a.shape=(4, 3), b.shape=(3, 8), m=4, n=8, k=3
	 [[Node: dense_1/MatMul = MatMul[T=DT_FLOAT, _class=["loc:@training/SGD/gradients/dense_1/MatMul_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_dense_1_input_0_0/_29, dense_1/kernel/read)]]
	 [[Node: loss/mul/_49 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_221_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

### A larger network with more data, more layers, more neurons and more epochs

In [14]:
data, labels = breast_cancer.data[:,:], breast_cancer.target[:]

num_samples = len(labels)
shuffle_order = np.random.permutation(num_samples)

data = data[shuffle_order, :]
labels = labels[shuffle_order]

data = data / np.amax(data, axis = 0)
labels = labels / np.amax(labels, axis = 0)
data = data.astype('float32')
labels = labels.astype('float32')

print(data.shape)
print(data[0:5,:])
labels[0:5]

(569, 30)
[[0.36712915 0.41624236 0.34647214 0.12990804 0.57735616 0.14458598
  0.02371134 0.02731113 0.6200658  0.63639164 0.07323355 0.19795291
  0.06169245 0.02392106 0.22762609 0.0535229  0.02555555 0.10409168
  0.19759342 0.08733244 0.31215316 0.43944287 0.28312102 0.09047955
  0.57726866 0.08357278 0.03501597 0.08182131 0.4038867  0.35657832]
 [0.41657773 0.39332995 0.39803714 0.16805278 0.70379436 0.21079907
  0.0938613  0.16153082 0.66085523 0.66769296 0.11994431 0.15138178
  0.10714286 0.04524161 0.3063283  0.0810192  0.04169192 0.21235083
  0.24737175 0.1038874  0.36237514 0.36657247 0.33503184 0.12139163
  0.655885   0.10538752 0.08682109 0.27024055 0.41654113 0.37619278]
 [0.657773   0.4460285  0.64350134 0.4270292  0.61933905 0.38129705
  0.34934396 0.45641154 0.60263157 0.6872947  0.27577445 0.21392016
  0.22070064 0.17663224 0.25615162 0.23737076 0.11199495 0.2979731
  0.20481317 0.1761059  0.63124305 0.461849   0.58280253 0.3761166
  0.63432163 0.29196596 0.2821885  0.5

array([1., 1., 0., 1., 1.], dtype=float32)

In [15]:
X, y = data, labels

train_split = 0.6
n_train = int(train_split * num_samples)
x_train, y_train = X[:n_train], y[:n_train]
x_test, y_test = X[n_train:], y[n_train:]

In [16]:
model4 = Sequential()
model4.add(Dense(16, activation='sigmoid', input_dim=30))
model4.add(Dense(16, activation='sigmoid'))
model4.add(Dense(1, activation='linear'))
model4.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 16)                496       
_________________________________________________________________
dense_4 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 17        
Total params: 785
Trainable params: 785
Non-trainable params: 0
_________________________________________________________________


In [27]:
import time
startTime = time.time()

model4.compile(loss='mean_squared_error',optimizer='sgd')
model4.fit(x_train,y_train,
         batch_size=4, epochs=1000,
         verbose=0,
         validation_data=(x_test,y_test))
score4 = model4.evaluate(x_test, y_test)
print(score4)

endTime = time.time()
print('StartTime %0.3f ~ EndTime %0.3f , Duration %0.3f' %(startTime, endTime, endTime - startTime))

0.048683479885783115
StartTime 1531467510.858 ~ EndTime 1531467766.272 , Duration 255.414
