In [1]:
import numpy as np 
import pandas as pd 
import os 
import cv2

In [2]:
base_dir = os.path.join('.', "archive", "all-mias")
info = os.path.join('.', "archive", "Info.txt")
mammogram_df = pd.read_csv(info, sep=" ")
mammogram_df.drop("Unnamed: 7", axis=1, inplace=True)

tif_dir = 'tiffs'
# if not os.path.exists(tif_dir):  
os.makedirs(tif_dir, exist_ok=True)
from skimage.io import imread, imsave
def to_path(c_row):
#         image =  imread(os.path.join(base_dir, '%s.pgm' % c_row['REFNUM']))
    out_path = os.path.join(tif_dir, '%s.tif' % c_row['REFNUM'])
#         imsave(out_path, image)
    return out_path
mammogram_df['path'] =  mammogram_df.apply(to_path,1)

In [3]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical 
encoder = LabelEncoder()
mammogram_df['CLASS_ID'] = encoder.fit_transform(mammogram_df['CLASS'])
mammogram_df['CLASS_VEC'] = mammogram_df['CLASS_ID'].map(lambda x: to_categorical(x, num_classes=len(encoder.classes_)))
mammogram_df['SEVERITY'].fillna('N', inplace=True)

2022-10-12 21:00:37.088817: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
mammogram_df.head(20)

Unnamed: 0,REFNUM,BG,CLASS,SEVERITY,X,Y,RADIUS,path,CLASS_ID,CLASS_VEC
0,mdb001,G,CIRC,B,535.0,425.0,197.0,tiffs/mdb001.tif,3,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]"
1,mdb002,G,CIRC,B,522.0,280.0,69.0,tiffs/mdb002.tif,3,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]"
2,mdb003,D,NORM,N,,,,tiffs/mdb003.tif,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
3,mdb004,D,NORM,N,,,,tiffs/mdb004.tif,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
4,mdb005,F,CIRC,B,477.0,133.0,30.0,tiffs/mdb005.tif,3,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]"
5,mdb005,F,CIRC,B,500.0,168.0,26.0,tiffs/mdb005.tif,3,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]"
6,mdb006,F,NORM,N,,,,tiffs/mdb006.tif,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
7,mdb007,G,NORM,N,,,,tiffs/mdb007.tif,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
8,mdb008,G,NORM,N,,,,tiffs/mdb008.tif,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
9,mdb009,F,NORM,N,,,,tiffs/mdb009.tif,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"


In [5]:
from sklearn.model_selection import train_test_split
raw_train_df, validation_df = train_test_split(mammogram_df, 
                                   test_size = 0.25, 
                                   random_state = 335,
                                   stratify = mammogram_df[['CLASS_ID', 'SEVERITY']])
print('train', raw_train_df.shape[0], 'validation', validation_df.shape[0])

train_df = raw_train_df.groupby(['CLASS', 'SEVERITY']).apply(lambda x: x.sample(100, replace = True)
                                                      ).reset_index(drop = True)
print('New Data Size:', train_df.shape[0], 'Old Size:', raw_train_df.shape[0])

train 247 validation 83
New Data Size: 1300 Old Size: 247


In [6]:
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.imagenet_utils import preprocess_input
IMG_SIZE = (192, 192)
core_datagen = ImageDataGenerator(horizontal_flip = True, 
                              vertical_flip = False, 
                              height_shift_range = 0.15, 
                              width_shift_range = 0.15, 
                              rotation_range = 5, 
                              shear_range = 0.01,
                              fill_mode = 'nearest',
                              zoom_range=0.2,
                              preprocessing_function = preprocess_input)

test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

In [7]:
def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
    df_gen = img_data_gen.flow_from_dataframe(in_df, x_col=path_col, y_col=y_col, class_mode = 'raw', **dflow_args)
    return df_gen

In [8]:
train_gen = flow_from_dataframe(core_datagen, train_df, 
                             path_col = 'path',
                            y_col = 'CLASS_ID', 
                            target_size = IMG_SIZE,
                            color_mode = 'rgb',
                            batch_size = 32)

valid_gen = flow_from_dataframe(test_datagen, validation_df, path_col = 'path', y_col = 'CLASS_ID', target_size = IMG_SIZE,
                            color_mode = 'rgb',
                            batch_size = 83)

test_X, test_Y = next(flow_from_dataframe(test_datagen, 
                            validation_df, 
                            path_col = 'path',
                            y_col = 'CLASS_ID', 
                            target_size = IMG_SIZE,
                            color_mode = 'rgb',
                            batch_size = 83))

Found 1300 validated image filenames.
Found 83 validated image filenames.
Found 83 validated image filenames.


In [9]:
t_x, t_y = next(train_gen)
t_x.shape
t_y.shape

(32,)

In [10]:
from keras.applications.vgg16 import VGG16
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda
from keras.models import Model
in_lay = Input(t_x.shape[1:])
base_pretrained_model = VGG16(input_shape =  t_x.shape[1:], include_top = False, weights = 'imagenet')
base_pretrained_model.trainable = False
pt_depth = 512
pt_features = base_pretrained_model(in_lay)
from keras.layers import BatchNormalization
bn_features = BatchNormalization()(pt_features)

# here we do an attention mechanism to turn pixels in the GAP on an off

attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(bn_features)
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1, 
                                kernel_size = (1,1), 
                                padding = 'valid', 
                                activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same', 
               activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)

mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.5)(gap)
dr_steps = Dropout(0.25)(Dense(128, activation = 'elu')(gap_dr))
out_layer = Dense(len(encoder.classes_), activation = 'softmax')(dr_steps) # linear is what 16bit did
mammo_model = Model(inputs = [in_lay], outputs = [out_layer])

mammo_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy',
                           metrics = ['accuracy', 'sparse_categorical_accuracy'])

mammo_model.summary()

2022-10-12 21:00:46.245154: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 192, 192, 3  0           []                               
                                )]                                                                
                                                                                                  
 vgg16 (Functional)             (None, 6, 6, 512)    14714688    ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 6, 6, 512)   2048        ['vgg16[0][0]']                  
 alization)                                                                                       
                                                                                              

In [11]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('mammo_result2')

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)


reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=5, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss", mode="min", patience=5) 

callbacks_list = [checkpoint, early, reduceLROnPlat]



In [12]:
mammo_model.fit_generator(train_gen, steps_per_epoch = 35, validation_data = valid_gen, epochs = 10, callbacks = callbacks_list)

  mammo_model.fit_generator(train_gen, steps_per_epoch = 35, validation_data = valid_gen, epochs = 10, callbacks = callbacks_list)


Epoch 1/10
Epoch 1: val_loss improved from inf to 2.92067, saving model to mammo_result2_weights.best.hdf5
Epoch 2/10
Epoch 2: val_loss did not improve from 2.92067
Epoch 3/10
Epoch 3: val_loss did not improve from 2.92067
Epoch 4/10
Epoch 4: val_loss improved from 2.92067 to 2.54661, saving model to mammo_result2_weights.best.hdf5
Epoch 5/10
Epoch 5: val_loss did not improve from 2.54661
Epoch 6/10
Epoch 6: val_loss did not improve from 2.54661
Epoch 7/10
Epoch 7: val_loss did not improve from 2.54661
Epoch 8/10
Epoch 8: val_loss did not improve from 2.54661
Epoch 9/10
Epoch 9: val_loss did not improve from 2.54661

Epoch 9: ReduceLROnPlateau reducing learning rate to 0.000800000037997961.


<keras.callbacks.History at 0x135325cd0>