### Import Libraries

In [257]:
import tensorflow as tf
from tensorflow import keras

import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [258]:
print(tf.__version__)

2.1.0


# Dataset

In [259]:
DATA_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"

csv_file = keras.utils.get_file(fname="wdbc.data", origin=DATA_URL)

In [260]:
# Make numpy values easier to read.
np.set_printoptions(precision=6, suppress=True)

In [261]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
df = pd.read_csv(csv_file)

df.columns = ['id', 'diagnosis', 'radius', 'texture', 'perimeter', 'area', 'smoothness','compactness', 'concavity',
              'concave_points', 'symmetry', 'fractal_dimension', 'radius_se','texture_se', 'perimeter_se', 'area_se',
              'smoothness_se', 'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se', 'fractal_dimension_se',
              'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst',
              'concavity_worst', 'concave_points_worst', 'symmetry_worst', 'fractal_dimension_worst']
df.diagnosis.replace(to_replace=['M', 'B'], value=[1, 0], inplace=True)

df.head()

Unnamed: 0,id,diagnosis,radius,texture,perimeter,area,smoothness,compactness,concavity,concave_points,symmetry,fractal_dimension,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave_points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave_points_worst,symmetry_worst,fractal_dimension_worst
0,842517,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
1,84300903,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
2,84348301,1,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
3,84358402,1,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678
4,843786,1,12.45,15.7,82.57,477.1,0.1278,0.17,0.1578,0.08089,0.2087,0.07613,0.3345,0.8902,2.217,27.19,0.00751,0.03345,0.03672,0.01137,0.02165,0.005082,15.47,23.75,103.4,741.6,0.1791,0.5249,0.5355,0.1741,0.3985,0.1244


### Data normalization

In [262]:
diagnosis = df.pop('diagnosis')
id_dataset = df.pop('id')

min_max_scaler = MinMaxScaler()
min_max_scaler_m1 = MinMaxScaler(feature_range=(-1,1))

# Create an object to transform the data to fit minmax processor
x_scaled = min_max_scaler.fit_transform(df)
x_scaled_m1 = min_max_scaler_m1.fit_transform(df)

# Run the normalizer on the dataframe
df_normalized = pd.DataFrame(x_scaled)
df_normalized_m1 = pd.DataFrame(x_scaled_m1)

In [263]:
# View the dataframe
df_normalized.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
0,0.643144,0.272574,0.615783,0.501591,0.28988,0.181768,0.203608,0.348757,0.379798,0.141323,0.156437,0.082589,0.12444,0.12566,0.119387,0.081323,0.04697,0.253836,0.084539,0.09111,0.606901,0.303571,0.539818,0.435214,0.347553,0.154563,0.192971,0.639175,0.23359,0.222878
1,0.601496,0.39026,0.595743,0.449417,0.514309,0.431017,0.462512,0.635686,0.509596,0.211247,0.229622,0.094303,0.18037,0.162922,0.150831,0.283955,0.096768,0.389847,0.20569,0.127006,0.556386,0.360075,0.508442,0.374508,0.48359,0.385375,0.359744,0.835052,0.403706,0.213433
2,0.21009,0.360839,0.233501,0.102906,0.811321,0.811361,0.565604,0.522863,0.776263,1.0,0.139091,0.175875,0.126655,0.038155,0.251453,0.543215,0.142955,0.353665,0.728148,0.287205,0.24831,0.385928,0.241347,0.094008,0.915472,0.814012,0.548642,0.88488,1.0,0.773711
3,0.629893,0.156578,0.630986,0.48929,0.430351,0.347893,0.463918,0.51839,0.378283,0.186816,0.233822,0.093065,0.220563,0.163688,0.332359,0.167918,0.143636,0.357075,0.136179,0.1458,0.519744,0.123934,0.506948,0.341575,0.437364,0.172415,0.319489,0.558419,0.1575,0.142595
4,0.258839,0.20257,0.267984,0.141506,0.678613,0.461996,0.369728,0.402038,0.518687,0.551179,0.080753,0.117132,0.068793,0.03808,0.197063,0.234311,0.092727,0.215382,0.19373,0.14466,0.268232,0.312633,0.263908,0.136748,0.712739,0.482784,0.427716,0.598282,0.477035,0.454939


In [264]:
# View the dataframe in range -1 to 1
df_normalized_m1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
0,0.286289,-0.454853,0.231567,0.003181,-0.42024,-0.636464,-0.592784,-0.302485,-0.240404,-0.717355,-0.687127,-0.834821,-0.751119,-0.74868,-0.761227,-0.837354,-0.906061,-0.492328,-0.830922,-0.81778,0.213803,-0.392857,0.079635,-0.129571,-0.304893,-0.690873,-0.614058,0.278351,-0.532821,-0.554244
1,0.202991,-0.219479,0.191486,-0.101166,0.028618,-0.137967,-0.074977,0.271372,0.019192,-0.577506,-0.540757,-0.811395,-0.639259,-0.674156,-0.698338,-0.432091,-0.806465,-0.220307,-0.588619,-0.745989,0.112771,-0.279851,0.016883,-0.250983,-0.03282,-0.22925,-0.280511,0.670103,-0.192588,-0.573134
2,-0.579819,-0.278323,-0.532997,-0.794189,0.622642,0.622723,0.131209,0.045726,0.552525,1.0,-0.721818,-0.64825,-0.74669,-0.92369,-0.497094,0.08643,-0.714091,-0.292669,0.456295,-0.42559,-0.50338,-0.228145,-0.517307,-0.811984,0.830945,0.628023,0.097284,0.769759,1.0,0.547422
3,0.259785,-0.686845,0.261972,-0.021421,-0.139298,-0.304214,-0.072165,0.036779,-0.243434,-0.626369,-0.532356,-0.81387,-0.558875,-0.672625,-0.335282,-0.664163,-0.712727,-0.28585,-0.727641,-0.708401,0.039488,-0.752132,0.013895,-0.31685,-0.125272,-0.65517,-0.361022,0.116838,-0.684999,-0.71481
4,-0.482323,-0.59486,-0.464032,-0.716988,0.357227,-0.076008,-0.260544,-0.195924,0.037374,0.102359,-0.838494,-0.765736,-0.862413,-0.92384,-0.605874,-0.531379,-0.814545,-0.569237,-0.61254,-0.710681,-0.463536,-0.374733,-0.472185,-0.726504,0.425477,-0.034433,-0.144569,0.196564,-0.045929,-0.090122


In [265]:
shuffle_df = False
X_train, X_test, y_train, y_test = train_test_split(df_normalized, diagnosis, shuffle=shuffle_df)
X_train_m1, X_test_m1, y_train_m1, y_test_m1 = train_test_split(df_normalized_m1, diagnosis, shuffle=shuffle_df)

In [266]:
dataset_train = tf.data.Dataset.from_tensor_slices((X_train.values, y_train.values))
dataset_train_m1 = tf.data.Dataset.from_tensor_slices((X_train_m1.values, y_train_m1.values))

In [267]:
dataset_test = tf.data.Dataset.from_tensor_slices((X_test.values, y_test.values))
dataset_test_m1 = tf.data.Dataset.from_tensor_slices((X_test_m1.values, y_test.values))

In [268]:
def show_batch(dataset):
    for batch, label in dataset.take(1):
        print ('Features: {}, Diagnosis: {}'.format(batch.numpy(), label))

In [269]:
show_batch(dataset_train)

Features: [0.225709 0.354075 0.223274 0.1172   0.50799  0.290534 0.160333 0.185785
 0.471212 0.306866 0.141336 0.294554 0.149319 0.058906 0.262433 0.163713
 0.105227 0.218223 0.367085 0.143934 0.191747 0.378198 0.191743 0.088159
 0.426798 0.157086 0.15623  0.271787 0.315987 0.162862], Diagnosis: 0


In [270]:
show_batch(dataset_train_m1)

Features: [-0.325193 -0.785594 -0.360929 -0.598727 -0.269477 -0.793387 -0.917291
 -0.72833  -0.683838 -0.703454 -0.89455  -0.6814   -0.924516 -0.936055
 -0.661488 -0.863791 -0.971126 -0.74893  -0.823324 -0.890745 -0.459267
 -0.681237 -0.521988 -0.722424 -0.248101 -0.837762 -0.915224 -0.595189
 -0.789079 -0.792864], Diagnosis: 0
