<a href="https://colab.research.google.com/github/chiarabuono/ML-Lab4-Neural-Networks/blob/main/task3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as im
import seaborn as sns               # For visualization

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import plot_model

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
data = pd.read_csv('breast_cancer/wdbc.data',delimiter = ',',header=None)

# Remove patient ID column
data.drop(data.columns[0], axis=1, inplace=True)

# Select target column and removing it from the dataset
targets = data.iloc[:, 0]
targets = targets.map({'M': 0, 'B': 1})   # (M = malignant, B = benign)
data.drop(data.columns[0], axis=1, inplace=True)

# Split data to training and testing data
X_train, X_test, y_train, y_test = train_test_split(data,targets,test_size=0.25,random_state=101)

# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Check correct data dimensions
print(f"X_train {X_train.shape} and X_test {X_test.shape}")
print(f"y_train {y_train.shape} and y_test {y_test.shape}")

In [73]:
# Build neural network architecture
model = Sequential()
model.add(Input(shape=(data.shape[1],)))  # Input layer specifies the shape
model.add(Dense(512, activation='sigmoid'))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(16, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))  # Output layer

# Compile the model
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
es = EarlyStopping(patience=10, restore_best_weights=True)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Set fixed seed for reproducibility
np.random.seed(1)

In [None]:
# Train the model
history = model.fit(X_train,y_train, validation_split=0.2, epochs=500, batch_size=10, callbacks=[es])

# Print summary data
model.summary()

# Get the outputs corresponding to all the data
outputs = model.predict(data)

In [None]:
validation_stop_epoch = np.array(history.history['val_loss']).argmin()
print('Stop epoch: %d' % validation_stop_epoch)
print('Expected RMS error: %.2E' % history.history['val_loss'][validation_stop_epoch])

In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Training history')
plt.ylabel('Performance index')
plt.xlabel('Epoch')
plt.legend(['MSE (train)', 'MSE (valid)'], loc='upper right')
plt.grid()
plt.show()

# Plot target-output relationship (ideal: targets == outputs, straight line)
plt.plot(targets,outputs,'ok')
plt.title('Output scatter plot')
plt.xlabel('targets')
plt.ylabel('outputs')
plt.grid()
plt.show()

In [None]:
print("Train score:", model.evaluate(X_train, y_train)) # Evaluate returns loss and accuracy on train set
print("Test score:", model.evaluate(X_test, y_test)) # Evaluate returns loss and accuracy on test set

# Predict the target values for the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)


# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap = 'Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()