In [None]:
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from sklearn.datasets import load_breast_cancer

import os
os.environ['KERAS_BACKEND'] = 'torch'
import torch
import keras
from keras import layers

In [None]:
# Load the data and split off some for testing
data = load_breast_cancer()
x = data['data'][:-10]
y = data['target'][:-10]
x_test = data['data'][-10:]
# There are only two categories here. If there are more, you often
# need to convert the target from the format it's served in where categories
# are [0,1,2,...] to a 1-hot format, ie [1,0,0,...], [0,1,0,...], [0,0,1,...], ...
y_test = data['target'][-10:]
print(x.shape, data['feature_names'])
print(y.shape, data['target_names'])

In [None]:
# We'll use dense layers with dropout. Dropout helps us prevent overfitting
# when we're training.
model = keras.Sequential([
    layers.Input(shape=(30,)),
    layers.Dense(20, activation='tanh'),
    layers.Dropout(rate=0.2),
    layers.Dense(12, activation='tanh'),
    layers.Dropout(rate=0.2),
    layers.Dense(4, activation='tanh'),
    # The last layer has a sigmoid activation function to make sure we're
    # in the range [0,1]
    layers.Dense(1, activation="sigmoid")
])
# Binary cross-entropy is a good loss to select for classification problems
model.compile(loss="binary_crossentropy", 
              optimizer=keras.optimizers.Adam(), 
              metrics=["binary_accuracy"]
             )
model.summary()

In [None]:
# A validation split sets aside some of our data to monitor how well we're doing
# on things the training loop hasn't seen directly. It's a proxy for testing error
# we can use while still tuning our hyper-parameters with it.
history = model.fit(x,y,epochs=100,validation_split=0.1)

In [None]:
# The model predicts a float and we need to convert that to 0 or 1. We'll use a
# threshold of 0.5, but often if you care about false positives or negatives more
# you could pick a more or less conservative value.

threshold = 0.5

y_pred = np.squeeze(model.predict(x))
y_class = np.array([1 if prediction > threshold 
                    else 0 for prediction in y_pred
                   ])

# Let's look at the whole data, and then just the ones we get wrong. We'll
# visualize with the first two input dimensions.
_,axs = plt.subplots(2)
axs[0].scatter(x[:,0],x[:,1],c=y,edgecolor='k',cmap='cool')
axs[0].set_xlabel(data['feature_names'][0]);
axs[0].set_ylabel(data['feature_names'][1]);
axs[1].scatter(x[np.logical_not(y==y_class),0],x[np.logical_not(y==y_class),1],c=y_pred[np.logical_not(y==y_class)],edgecolor='k',cmap='cool')
axs[1].set_xlim(axs[0].get_xlim())
axs[1].set_ylim(axs[0].get_ylim())
axs[1].set_xlabel(data['feature_names'][0]);
axs[1].set_ylabel(data['feature_names'][1]);

In [None]:
# Finally, let's see how we do on our testing data
y_pred = np.squeeze(model.predict(x_test))
y_class = np.array([1 if prediction > threshold 
                    else 0 for prediction in y_pred
                   ])
print(f'True: {y_test}')
print(f'Pred: {y_class}')