In [22]:
from sklearn import datasets
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score,f1_score
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV


cancer = datasets.load_breast_cancer()
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
df.shape

(569, 30)

In [23]:
print( cancer.feature_names,"\n")
print(cancer.target_names)


['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension'] 

['malignant' 'benign']


In [24]:
data = cancer.data
target = cancer.target

#Split the data as in the scikit-learn documentation
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)



In [25]:
#init NN
clf = MLPClassifier(max_iter=300, random_state=42)
#tune grid
param_grid = {
    'hidden_layer_sizes': [(100,), (150,), (100, 100), (150, 100), (100, 100, 100)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate_init': [0.001, 0.01]
}
#find best tune then fit to training data
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)


Fitting 5 folds for each of 80 candidates, totalling 400 fits


In [26]:
print( grid_search.best_params_)


{'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate_init': 0.001, 'solver': 'adam'}


In [27]:
best_model = grid_search.best_estimator_
y_test_pred = best_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_test_pred)
print("Accuracy:", accuracy)

# Calculate sensitivity
sensitivity = recall_score(y_test, y_test_pred)
print("Sensitivity:", sensitivity)

# Calculate specificity
cm = confusion_matrix(y_test, y_test_pred)
tn, fp, fn, tp = cm.ravel()
specificity = tn / (tn + fp)
print("Specificity:", specificity)

# Calculate F1 score
f1 = f1_score(y_test, y_test_pred)
print("F1 Score:", f1)

Accuracy: 0.9649122807017544
Sensitivity: 0.9859154929577465
Specificity: 0.9302325581395349
F1 Score: 0.9722222222222222


In [28]:
print("Confusion Matrix: ",cm)

Confusion Matrix:  [[40  3]
 [ 1 70]]
