<a href="https://colab.research.google.com/github/adigew/Breast-Cancer-Detection/blob/main/MobileNetV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

In [None]:
pip install openpyxl

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from tensorflow import keras
import cv2
from tensorflow.keras.preprocessing.image import load_img ,img_to_array
import matplotlib.pyplot as plt
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D,AveragePooling2D,BatchNormalization
from tensorflow.keras.layers import Dense,Activation
from tensorflow.keras.layers import Flatten,Dropout,SpatialDropout2D,AveragePooling2D,GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
path = "/content/gdrive/My Drive/Breast_Cancer/Ultrasound/Dataset_BUSI_with_GT"
dir_list = [os.path.join(path,i) for i in os.listdir(path)]
size_dict = {}
for i,value in enumerate(dir_list):
    size_dict[os.listdir(path)[i]] = len(os.listdir(value))
size_dict 

{'normal': 266, 'malignant': 421, 'benign': 891}

In [None]:
img_size = 128
img_channel = 3
X_b , Xm_b , y_b = np.zeros((437,img_size,img_size,img_channel)) , np.zeros((437,img_size,img_size,img_channel)) , np.full(437,'benign') 
X_n , Xm_n , y_n = np.zeros((133,img_size,img_size,img_channel)) , np.zeros((133,img_size,img_size,img_channel)) , np.full(133,'normal') 
X_m , Xm_m , y_m = np.zeros((210,img_size,img_size,img_channel)) , np.zeros((210,img_size,img_size,img_channel)) , np.full(210,'malignant')

In [None]:
img1_path = os.path.join(os.path.join(path,'benign'),os.listdir(os.path.join(path,'benign'))[1])

pil_img = load_img(img1_path,color_mode = 'rgb',target_size=(img_size,img_size))
img = img_to_array(pil_img)
img_shape = img.shape
print(img_shape)

def img_num(filename):
    
    val = 0
    
    for i in range(len(filename)) :
        if filename[i] == '(' :
            while True :
                i += 1
                if filename[i] == ')' :
                    break
                val = (val*10) + int(filename[i])
            break
    
    return val

for tumor_path in dir_list :
    for image in os.listdir(tumor_path) :
        p = os.path.join(tumor_path, image)
        pil_img = load_img(p,color_mode = 'rgb',target_size=(img_size,img_size))         # read image as  grayscale ans resize it
        
        if image[-5] == ')' :                                   #if real image 
            
            if image[0] == 'b' :
                X_b[img_num(image)-1]+= img_to_array(pil_img)  # If image is real add it
            if image[0] == 'n' :                               # to X as benign , normal
                X_n[img_num(image)-1]+= img_to_array(pil_img)  # or malignant.
            if image[0] == 'm' :
                X_m[img_num(image)-1]+= img_to_array(pil_img)
        else :                                                 #else masked image
            
            if image[0] == 'b' :
                Xm_b[img_num(image)-1]+= img_to_array(pil_img)  # Similarly add the target
            if image[0] == 'n' :                               # mask to y.
                Xm_n[img_num(image)-1]+= img_to_array(pil_img)
            if image[0] == 'm' :
                Xm_m[img_num(image)-1]+= img_to_array(pil_img)

(128, 128, 3)


In [None]:
X = np.concatenate((X_b, X_n, X_m), axis = 0)
Xm = np.concatenate((Xm_b, Xm_n, Xm_m), axis = 0)
y = np.concatenate((y_b, y_n, y_m), axis = 0)

print(X.shape)
print(Xm.shape)
print(y.shape)
X /= 255.0
Xm /= 255.0

print(X.max())
print(Xm.min())

(780, 128, 128, 3)
(780, 128, 128, 3)
(780,)
1.0
0.0


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

encoder  = OneHotEncoder()
# y = y.toarray()
y=encoder.fit_transform(y.reshape(y.shape[0],1))

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.15,shuffle=True,random_state=42)
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size=0.1,random_state=42,stratify = y_train.toarray())
Xm_train,Xm_test,ym_train,ym_test = train_test_split(Xm,y,test_size = 0.15,shuffle=True,random_state=42,stratify=y.toarray())
Xm_train,Xm_val,ym_train,ym_val = train_test_split(Xm_train,ym_train,test_size=0.1,random_state=42,stratify = ym_train.toarray())

class_list = encoder.categories_
print(X_train.shape,X_test.shape)
print(y_train.shape,y_test.shape)

In [None]:
base_model = tf.keras.applications.MobileNetV2(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=img_shape,
    pooling=None
)

X_feat_out = base_model.output
X_feat_flatten = Flatten()(X_feat_out)

X_feat_model = Model(inputs = base_model.input,outputs = X_feat_flatten)
X_feat_train = X_feat_model.predict(X_train)
X_feat_val = X_feat_model.predict(X_val)
X_feat_test = X_feat_model.predict(X_test)


Xm_feat_train = X_feat_model.predict(Xm_train)
Xm_feat_val = X_feat_model.predict(Xm_val)
Xm_feat_test = X_feat_model.predict(Xm_test)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neural_network import MLPClassifier
from tensorflow.keras.layers import LSTM,Bidirectional,Reshape
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score,roc_auc_score,cohen_kappa_score

In [None]:
df = pd.DataFrame(columns = ['classifier',"train_accuracy",'val_accuracy',"test_accuracy","f1_measure","kappa_score","recall","Precision"])

In [None]:
def eval(classifier_name,y_train,y_train_pred,y_val,y_val_pred,y_true,y_pred):
    y_train = np.argmax(y_train,axis=1)
#     y_train_pred = np.argmax(y_train_pred,axis=1)
    y_val = np.argmax(y_val,axis=1)
#     y_val_pred = np.argmax(y_val_pred,axis=1)
    y_true = np.argmax(y_true,axis=1)
#     y_pred = np.argmax(y_pred,axis=1)
    
    train_accuracy = round(accuracy_score(y_train,y_train_pred),4)
    val_accuracy = round(accuracy_score(y_val,y_val_pred),4)
    test_accuracy = round(accuracy_score(y_true,y_pred),4)
    f1_measure = round(f1_score(y_true,y_pred,average='weighted'),4)
    kappa_score = round(cohen_kappa_score(y_true,y_pred),4)
    recall = round(recall_score(y_true,y_pred,average='weighted'),4)
    precision = round(precision_score(y_true,y_pred,average='weighted'),4)
    
    score = {"classifier":classifier_name,"train_accuracy":train_accuracy , "val_accuracy":val_accuracy,"test_accuracy":test_accuracy,"f1_measure":f1_measure,"kappa_score":kappa_score,"recall":recall,"precision":precision}
#     df = df.append(score,ignore_index=True)
    df.loc[len(df.index)] = score.values()
    for e,a in score.items():
        print(e,a)
    print("--"*20)

In [None]:
def classifier_eval(classifier,classifier_name,X_train,y_train,X_val,y_val,X_test,y_test):
    
    classifier.fit(X_train,np.argmax(y_train,axis=1))
    
    y_train_pred = classifier.predict(X_train)
    y_val_pred = classifier.predict(X_val)
    y_test_pred = classifier.predict(X_test)
    
    eval(classifier_name,y_train,y_train_pred,y_val,y_val_pred,y_test,y_test_pred)

In [None]:
names = ['SVM',
    'Random Forest',
    'AdaBoost',
    'KNN',
    'XGBoost',
    'Bagging',
    'ANN'
        ]

classifier = [
    SVC(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    KNeighborsClassifier(),
    XGBClassifier(),
    BaggingClassifier(),
    MLPClassifier(max_iter = 400),
]

cls_list = zip(names,classifier)
clsm_list = zip(names,classifier)


In [None]:
for n,c in cls_list:
    classifier_eval(c,n,X_feat_train,y_train.toarray(),X_feat_val,y_val.toarray(),X_feat_test,y_test.toarray())

classifier SVM
train_accuracy 0.9279
val_accuracy 0.791
test_accuracy 0.7949
f1_measure 0.7871
kappa_score 0.6506
recall 0.7949
precision 0.8087
----------------------------------------
classifier Random Forest
train_accuracy 1.0
val_accuracy 0.7463
test_accuracy 0.7436
f1_measure 0.7277
kappa_score 0.5465
recall 0.7436
precision 0.8066
----------------------------------------
classifier AdaBoost
train_accuracy 0.8322
val_accuracy 0.7761
test_accuracy 0.6239
f1_measure 0.5883
kappa_score 0.3257
recall 0.6239
precision 0.6605
----------------------------------------
classifier KNN
train_accuracy 0.8154
val_accuracy 0.6716
test_accuracy 0.7179
f1_measure 0.7145
kappa_score 0.5413
recall 0.7179
precision 0.7155
----------------------------------------


KeyboardInterrupt: ignored

In [None]:
for l in base_model.layers:
    l.trainable = False

In [None]:
lstm_model = Sequential()
lstm_model.add(base_model)
lstm_model.add(Reshape((base_model.output.shape[1]*base_model.output.shape[2],base_model.output.shape[3])))
lstm_model.add(LSTM(128, dropout=0.5,recurrent_dropout=0.5))
lstm_model.add(Dense(3,activation='softmax'))

lstm_model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])

In [None]:
bidir_model = Sequential()
bidir_model.add(base_model)
bidir_model.add(Reshape((base_model.output.shape[1]*base_model.output.shape[2],base_model.output.shape[3])))
bidir_model.add(Bidirectional(LSTM(128, dropout=0.5,recurrent_dropout=0.5)))
bidir_model.add(Dense(3,activation='softmax'))

bidir_model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
es = EarlyStopping(monitor='val_loss', mode='min', patience=4,restore_best_weights=True, verbose=1)

In [None]:
history = lstm_model.fit(X_train,y_train.toarray(),epochs = 20,validation_data = (X_val,y_val.toarray()),callbacks = [es])
lstm_train_predict = np.argmax(lstm_model.predict(X_train),axis=1)
lstm_val_predict = np.argmax(lstm_model.predict(X_val),axis=1)
lstm_test_predict = np.argmax(lstm_model.predict(X_test),axis=1)
eval("LSTM",y_train,lstm_train_predict,y_val,lstm_val_predict,y_test,lstm_test_predict)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 10: early stopping
classifier LSTM
train_accuracy 0.9614
val_accuracy 0.8209
test_accuracy 0.7436
f1_measure 0.7349
kappa_score 0.5652
recall 0.7436
precision 0.7489
----------------------------------------


In [None]:
history = bidir_model.fit(X_train,y_train.toarray(),epochs = 20,validation_data = (X_val,y_val.toarray()),callbacks = [es])
bidir_train_predict = np.argmax(bidir_model.predict(X_train),axis=1)
bidir_val_predict = np.argmax(bidir_model.predict(X_val),axis=1)
bidir_test_predict = np.argmax(bidir_model.predict(X_test),axis=1)
eval("Bi-dir",y_train,bidir_train_predict,y_val,bidir_val_predict,y_test,bidir_test_predict)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: early stopping
classifier Bi-dir
train_accuracy 0.9279
val_accuracy 0.806
test_accuracy 0.7607
f1_measure 0.7575
kappa_score 0.6051
recall 0.7607
precision 0.76
----------------------------------------


In [None]:
df

Unnamed: 0,classifier,train_accuracy,val_accuracy,test_accuracy,f1_measure,kappa_score,recall,Precision
0,SVM,0.9279,0.791,0.7949,0.7871,0.6506,0.7949,0.8087
1,Random Forest,1.0,0.7313,0.7179,0.7007,0.5071,0.7179,0.7503
2,AdaBoost,0.8322,0.7761,0.6239,0.5883,0.3257,0.6239,0.6605
3,KNN,0.8154,0.6716,0.7179,0.7145,0.5413,0.7179,0.7155
4,XGBoost,1.0,0.7463,0.7863,0.7757,0.6311,0.7863,0.8168
5,Bagging,0.9933,0.597,0.7179,0.7032,0.509,0.7179,0.7582
6,ANN,1.0,0.8209,0.8034,0.7963,0.6701,0.8034,0.8045
7,LSTM,0.9614,0.8209,0.7436,0.7349,0.5652,0.7436,0.7489
8,Bi-dir,0.9279,0.806,0.7607,0.7575,0.6051,0.7607,0.76


In [None]:
df = pd.DataFrame(columns = ['classifier',"train_accuracy",'val_accuracy',"test_accuracy","f1_measure","kappa_score","recall","Precision"])
for n,c in clsm_list:
    classifier_eval(c,n,Xm_feat_train,ym_train.toarray(),Xm_feat_val,ym_val.toarray(),Xm_feat_test,ym_test.toarray())

classifier SVM
train_accuracy 0.9916
val_accuracy 0.9104
test_accuracy 0.9829
f1_measure 0.9829
kappa_score 0.9706
recall 0.9829
precision 0.9829
----------------------------------------
classifier Random Forest
train_accuracy 1.0
val_accuracy 0.9104
test_accuracy 0.9658
f1_measure 0.9655
kappa_score 0.9408
recall 0.9658
precision 0.966
----------------------------------------
classifier AdaBoost
train_accuracy 0.948
val_accuracy 0.8955
test_accuracy 0.906
f1_measure 0.9076
kappa_score 0.842
recall 0.906
precision 0.9134
----------------------------------------
classifier KNN
train_accuracy 0.9799
val_accuracy 0.9104
test_accuracy 0.9829
f1_measure 0.9827
kappa_score 0.9704
recall 0.9829
precision 0.9834
----------------------------------------
classifier XGBoost
train_accuracy 1.0
val_accuracy 0.9104
test_accuracy 0.9744
f1_measure 0.9745
kappa_score 0.9562
recall 0.9744
precision 0.9748
----------------------------------------


In [None]:
history = lstm_model.fit(Xm_train,ym_train.toarray(),epochs = 20,validation_data = (Xm_val,ym_val.toarray()),callbacks = [es])
lstm_train_predict = np.argmax(lstm_model.predict(Xm_train),axis=1)
lstm_val_predict = np.argmax(lstm_model.predict(Xm_val),axis=1)
lstm_test_predict = np.argmax(lstm_model.predict(Xm_test),axis=1)
eval("LSTM",ym_train,lstm_train_predict,ym_val,lstm_val_predict,ym_test,lstm_test_predict)

history = bidir_model.fit(Xm_train,ym_train.toarray(),epochs = 20,validation_data = (Xm_val,ym_val.toarray()),callbacks = [es])
bidir_train_predict = np.argmax(bidir_model.predict(Xm_train),axis=1)
bidir_val_predict = np.argmax(bidir_model.predict(Xm_val),axis=1)
bidir_test_predict = np.argmax(bidir_model.predict(Xm_test),axis=1)
eval("Bi-LSTM",ym_train,bidir_train_predict,ym_val,bidir_val_predict,ym_test,bidir_test_predict)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 12: early stopping
classifier LSTM
train_accuracy 0.9899
val_accuracy 1.0
test_accuracy 0.9829
f1_measure 0.983
kappa_score 0.9709
recall 0.9829
precision 0.9839
----------------------------------------
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 9: early stopping
classifier Bi-LSTM
train_accuracy 0.9983
val_accuracy 0.9552
test_accuracy 0.9744
f1_measure 0.9742
kappa_score 0.9558
recall 0.9744
precision 0.9743
----------------------------------------


In [None]:
print(df)
df.to_excel("MobileNetV2_m.xlsx",index=False)

      classifier  train_accuracy  val_accuracy  test_accuracy  f1_measure  \
0            SVM          0.9916        0.9104         0.9829      0.9829   
1  Random Forest          1.0000        0.9104         0.9744      0.9742   
2       AdaBoost          0.9480        0.8955         0.9060      0.9076   
3            KNN          0.9799        0.9104         0.9829      0.9827   
4        XGBoost          1.0000        0.9104         0.9744      0.9745   
5        Bagging          1.0000        0.9254         0.9744      0.9745   
6            ANN          1.0000        0.9403         0.9744      0.9742   
7           LSTM          0.9899        1.0000         0.9829      0.9830   
8        Bi-LSTM          0.9983        0.9552         0.9744      0.9742   

   kappa_score  recall  Precision  
0       0.9706  0.9829     0.9829  
1       0.9558  0.9744     0.9743  
2       0.8420  0.9060     0.9134  
3       0.9704  0.9829     0.9834  
4       0.9562  0.9744     0.9748  
5       0.956