# Bayesian Breast Cancer classifier using MCDropout Bayesian approximation

The input consists of gene expression levels of a patient (vector of numbers), the label consists of the patient breast cancer subtype: LUMINAL A or LUMINAL B.
Implement a simple MLP classifier with MCDropout approximation to get for each test patient the class label and the class probability.

In [359]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import numpy as np
import keras

dataset = pd.read_csv('dataset_LUMINAL_A_B.csv')

X, y = dataset.loc[:, dataset.columns != 'l'], dataset.loc[:, dataset.columns == 'l'].values.reshape(-1,1)

scaler = StandardScaler()
X = scaler.fit_transform(X)

from sklearn.decomposition import PCA

pca = PCA()
#X = pca.fit_transform(X)


encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)




In [348]:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

In [360]:
model = Sequential([
    Dropout(0.2),
    Dense(512, activation='relu'),
    Dropout(0.6),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(2, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train,epochs=5, verbose=1)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x332832ad0>

In [361]:
from sklearn.metrics import accuracy_score

MC_ITERATION = 30
acc = []
mc_predictions = np.zeros((MC_ITERATION, X_test.shape[0], 2))
for i in range(MC_ITERATION):
    y_pred = model.predict(X_test, verbose=False)
    mc_predictions[i] = y_pred
    acc.append(accuracy_score(y_pred.argmax(axis=1), y_test.argmax(axis=1)))

mc_ensemble_pred = np.array(mc_predictions).mean(axis=0)
ensemble_acc = accuracy_score(mc_ensemble_pred.argmax(axis=1), y_test.argmax(axis=1))
print("MC-ensemble accuracy: {:.1%}".format(ensemble_acc))
    

MC-ensemble accuracy: 90.0%
