In [16]:
# Importing necessary libraries for data manipulation, visualization, and deep learning
import numpy as np  # Library for numerical operations
import pandas as pd  # Library for data manipulation and analysis
import matplotlib.pyplot as plt  # Library for data visualization

import tensorflow as tf  # Deep learning library
from tensorflow import keras  # High-level API for building and training neural networks
from tensorflow.keras import layers  # Module for defining layers in neural networks


In [17]:
from sklearn.model_selection import train_test_split  # Library for splitting data into training and testing sets
from sklearn.preprocessing import StandardScaler  # Library for feature scaling
from sklearn.datasets import load_breast_cancer # Library for loading the breast cancer dataset

# Loading the breast cancer dataset
data = load_breast_cancer()

# Printing the description of the dataset
print(data["DESCR"])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [18]:
# Assigning features (X) and target variable (y) from the loaded diabetes dataset
X = data["data"]  # Features
y = data["target"]  # Target variable

# Splitting the dataset into training and testing sets
# test_size=0.2 means 20% of the data will be used for testing, and the rest for training
# random_state=42 ensures reproducibility of the split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features using StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)  # Fitting the scaler to the training data

# Transforming both training and testing features using the fitted scaler
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Printing the shape of the transformed training features
print(X_train.shape)



(455, 30)


In [19]:
# Creating a sequential model
mdl = keras.Sequential()

# Adding layers to the model
# First hidden layer with 128 neurons, ReLU activation function, and input shape based on the number of features
mdl.add(layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)))

# Second hidden layer with 64 neurons and ReLU activation function
mdl.add(layers.Dense(64, activation="relu"))

# Output layer with 1 neuron and sigmoid activation function (for binary classification)
mdl.add(layers.Dense(1, activation="sigmoid"))

# Compiling the model
# Using Adam optimizer, binary cross-entropy loss function (for binary classification), and accuracy metric
mdl.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Training the model
# Fitting the model to the training data for 100 epochs
mdl.fit(X_train, y_train, epochs=100)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6774 - loss: 0.5659
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9606 - loss: 0.1962 
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9449 - loss: 0.1332 
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9713 - loss: 0.1034 
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9728 - loss: 0.0924 
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9803 - loss: 0.0701 
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9822 - loss: 0.0541 
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 937us/step - accuracy: 0.9809 - loss: 0.0603
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 4.8125e-04 
Epoch 69/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 5.4032e-04 
Epoch 70/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 5.4308e-04 
Epoch 71/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 5.1845e-04 
Epoch 72/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 4.1635e-04 
Epoch 73/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 5.3775e-04 
Epoch 74/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 4.3837e-04 
Epoch 75/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 4.2493e-04 
Epoch 76/100


<keras.src.callbacks.history.History at 0x199f5f9cdd0>

In [20]:
# Evaluate the model on a test dataset.
mdl.evaluate(X_test, y_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.9738 - loss: 0.1776   


[0.1401720643043518, 0.9736841917037964]

In [21]:
# Making predictions using the trained model on the test data
ypred = mdl.predict(X_test)

# Converting predicted probabilities into binary labels
ypred_label = (ypred > 0.5).astype("int")

# Displaying the binary labels
ypred_label

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


array([[1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
    