# Breast Cancer Classification with NN

### Importing Dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.datasets
from sklearn.model_selection import train_test_split

### Data Collection and Processing

In [2]:
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

In [3]:
print(breast_cancer_dataset)

In [4]:
# loading data to dataframe 
data_frame = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)

In [5]:
data_frame.head()

In [6]:
# adding target column to dataframe
data_frame["label"] = breast_cancer_dataset.target

In [7]:
data_frame.tail()

In [8]:
data_frame.shape

In [9]:
# getting information of dataset
data_frame.info()

In [10]:
# checking null values
data_frame.isnull().sum()

In [11]:
# statistical measures of dataset
data_frame.describe()

In [12]:
# distribution of data
data_frame["label"].value_counts()

In [13]:
# mean values
data_frame.groupby("label").mean()

In [14]:
# separating features and target
X = data_frame.drop(columns = "label", axis = 1)
Y = data_frame["label"]

In [15]:
print(X)
print(Y)

#### Data Standardization

In [16]:
from sklearn.preprocessing import StandardScaler

In [17]:
scaler = StandardScaler()

In [18]:
standardized_data = scaler.fit_transform(X)

In [19]:
print(standardized_data)

In [20]:
# updating features with standardized data
X = standardized_data
print(X)

In [21]:
print(Y)

### Split Train and Test data

In [22]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [23]:
print("Total data: ", X.shape)
print("Train data: ", X_train.shape)
print("Test data: ", X_test.shape)

### Building Neural Network

#### Importing Tensorflow and Keras

In [24]:
import tensorflow as tf
tf.random.set_seed(3)
from tensorflow import keras

In [25]:
# setting up the layers of Neural Network

model = keras.Sequential([
    
    keras.layers.Flatten(input_shape = (30,)),
    keras.layers.Dense(20, activation = 'relu'),
    keras.layers.Dense(2, activation = "sigmoid")
])

In [26]:
# compiling the Neural Network

model.compile(optimizer = "adam",
             loss = "sparse_categorical_crossentropy",
             metrics = ["accuracy"])

In [27]:
# training the Neural Network

history = model.fit(X_train, Y_train, validation_split = 0.1, epochs = 10)

In [28]:
# visualize train validation accuracy

plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])

plt.title("Model Accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")

plt.legend(["training", "validation"], loc = "lower right")

In [29]:
# visualize train validation loss

plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])

plt.title("Model Accuracy")
plt.ylabel("loss")
plt.xlabel("epoch")

plt.legend(["training", "validation"], loc = "upper right")

### Accuracy on test data

In [30]:
loss, accuracy = model.evaluate(X_test, Y_test)

In [31]:
print(accuracy)

In [32]:
print(X_test.shape)

In [33]:
print(X_test[0])

In [34]:
Y_pred = model.predict(X_test)

In [35]:
print(Y_pred.shape)

In [36]:
print(Y_pred)

In [37]:
# converting the prediction probability to class labels
Y_pred_labels = [np.argmax(i) for i in Y_pred]
print(Y_pred_labels)

### Predictive System

#### taking data directly from dataset so we need to do some preprocessing

In [38]:
sample = data_frame.drop("label", axis = 1).iloc[0].values

In [39]:
input_data = (sample)
print(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data.reshape(1, -1)

# standardize the input data as model is trained as such
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = model.predict(std_data)
print(prediction)


In [40]:
predictin_label = [np.argmax(prediction)]
print(predictin_label)

if(predictin_label[0] == 0):
    print("The tumor is Malignant.")
else:
    print("The tumor is Benign.")

##### data was standardised before split so we can use it directly after reshaping

In [41]:
input_data = X_test[0]
print(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data.reshape(1, -1)

print(input_data_reshaped)

prediction = model.predict(input_data_reshaped)
print(prediction)

In [42]:
predictin_label = [np.argmax(prediction)]
print(predictin_label)

if(predictin_label[0] == 0):
    print("The tumor is Malignant.")
else:
    print("The tumor is Benign.")