In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.patches import Circle
import numpy as np
import os
import sys

In [None]:
sys.path.append('../src/')

In [None]:
from modules import helpers as hp
from modules import mammoscan as ms

In [None]:
plt.style.use('dark_background') # comment or delete it for light background

In [None]:
mammo = pd.read_table('../scan_file_data.txt', delimiter='\s', engine='python')

In [None]:
# rename the class column to avoid conflicts with the class keyword in python
mammo.columns = ['refnum', 'bg', 'ab_class', 'severity', 'x', 'y', 'radius']

In [None]:
mammo.ab_class.value_counts().plot(kind='bar', color='orange')

In [None]:
mammo.severity.value_counts().plot(kind='bar', color='orange')

In [None]:
mammo.bg.value_counts().plot(kind='bar', color='orange')

In [None]:
mammo.info()

In [None]:
mx = mammo.groupby(['ab_class', 'severity']).size().unstack().plot(kind='bar', 
                                                                   figsize=(10, 8), 
                                                                   xlabel='Abnormality Class', 
                                                                   ylabel='SCANS',
                                                                   title='SEVERITY BY CLASS')


In [None]:
ab_sev = mammo.groupby(['ab_class', 'severity']).size()

In [None]:
ab_sev = mammo.groupby('ab_class').severity.value_counts(normalize=True)
ab_sev

In [None]:
ax = ab_sev.unstack().plot(kind='bar', 
                      stacked=True, 
                      figsize=(10, 8), 
                      xlabel='Abnormality Class', 
                      ylabel='Percentage',
                      title='SEVERITY BY CLASS');

# manipulate
vals = ax.get_yticks()
ax.set_yticklabels([f'{x:.2%}' for x in vals]);

In [None]:
# get indices of the norm class only
norms = lambda x: x == 'NORM'
mammo.severity = mammo.severity.fillna('A')

In [None]:
from pathlib import Path
import re

INITIALLY REMOVE ONLY THE SCANS WITHOUT INVALID X ENTRY *NOTE

In [None]:
test_df = mammo.copy()

In [None]:
test_df

In [None]:
test_df.drop_duplicates(subset='refnum', keep='first', inplace=True)

In [None]:
test_df.set_index(keys='refnum', drop=True, inplace=True)

In [None]:
test_df.index;

In [None]:
clean_df = hp.clean_ds_files(test_df)

In [None]:
clean_df.head()

In [None]:
# save clean to file
clean_df.to_csv('test_df.csv')

In [None]:
type(clean_df.loc['mdb005'])

In [None]:
scan_info = clean_df.loc['mdb003'].copy() # need to ensure we get a deep copy of the Series to avoid changing a value in it and keeping the reference
scan_info

In [None]:
from PIL import Image

In [None]:
test_img = Image.open('../all-mias/mdb003.pgm')

In [None]:
test = ms.MammoScan(test_img, scan_info)

In [None]:
test.pixel_matrix.shape

In [None]:
test.plot()

In [None]:
transformed = test.transformations
transformed

In [None]:
'''

for angle, imgs in transformed.items():
    for transf, img in imgs.items():
        print(f'{angle}, {transf}')
        print(np.asarray(img))
        display(img)
        
'''        
        
# 90 degrees left-right == 270 degree top-bottom
# 0 degrees left-right == 180 top-bottom
# drop duplicates???

In [None]:
scans_filenames_dic = hp.create_scan_filenames_dic('../all-mias/')

### Check the quality of crops

In [None]:
counter = 0
for scan_name, filename in scans_filenames_dic.items():
        # create image and scan info objects
        try:
            scan = Image.open(filename)
        except FileNotFoundError as fnf:
            print({fnf})
            
        scan_info = clean_df.loc[scan_name].copy()
        print(scan_info.name)
        # create the MammoScan object
        m_scan = ms.MammoScan(scan, scan_info)
        
        m_scan.plot()
        # get the transformations
        transf_scans = m_scan.transformations
        
        figure, axes = plt.subplots(nrows=1, ncols=4, figsize=(6, 6))
        
        for item in zip(axes.ravel(), transf_scans.items()):
            axes, images = item
            angle, transfs = images
            for transf, img in transfs.items():
                axes.imshow(img, cmap=plt.cm.gray_r)
                axes.set_xticks([]) # remove x-axis tick marks
                axes.set_yticks([]) # remove y-axis tick marks
                axes.set_title(f'{angle}: {transf}')
        # plt.tight_layout
        counter+= 1
        if counter == 19:
            break


In [None]:
test_sub = hp.save_subsamples(scans_filenames_dic, clean_df)

In [None]:
test_sub

### Using Image Generator

In [None]:
# TRAIN AND TEST DATA FOR THE CALCIFICATIONS
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
data_generator = ImageDataGenerator(width_shift_range=.1, 
                                    height_shift_range=.1, 
                                    rotation_range=180,
                                    zoom_range=.1,
                                    horizontal_flip=True,
                                    validation_split=.2)

In [None]:
train_data = data_generator.flow_from_dataframe(calcifications, 
                                                x_col="subsample_path", 
                                                y_col="severity",
                                                class_mode="categorical",
                                                target_size=(48,48),
                                                subset="training",
                                                color_mode="grayscale",
                                                shuffle=True)
test_data = data_generator.flow_from_dataframe(calcifications, 
                                               x_col="subsample_path", 
                                               y_col="severity",
                                               class_mode="categorical",
                                               target_size=(48,48),
                                               subset="validation",
                                               color_mode="grayscale",
                                               shuffle=False)


In [None]:
# create neural network
from tensorflow.keras.models import Sequential

In [None]:
cnn_calc = Sequential()

In [None]:
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D

In [None]:
# add layers
cnn_calc.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=(48, 48, 1)))

In [None]:
# Add a Pooling Layer
cnn_calc.add(MaxPooling2D(pool_size=(2, 2)))

In [None]:
cnn_calc.add(Conv2D(filters=64, kernel_size=(5,5), activation='relu'))

In [None]:
cnn_calc.add(MaxPooling2D(pool_size=(2,2)))

#### Flattening the results

In [None]:
cnn_calc.add(Flatten())

#### Add dense layer to reduce the number of features

In [None]:
from tensorflow.keras.layers import Dropout


In [None]:
cnn_calc.add(Dropout(0.5)) # to avoid overfitting , drop 50%

In [None]:
cnn_calc.add(Dense(units=1024, activation='relu'))

#### Dense layer to produce final output

In [None]:
cnn_calc.add(Dense(units=2, activation='softmax')) # 2 units - B/ M

In [None]:
cnn_calc.summary()

#### Visualise the Model's Structure

In [None]:
from tensorflow.keras.utils import plot_model
from IPython.display import Image

In [None]:
plot_model(cnn_calc, to_file='convnet.png', show_shapes=True, show_layer_names=True)
Image(filename='convnet.png')

In [None]:
cnn_calc.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

### Training and Evaluating the Model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

In [None]:
cnn_calc.fit(train_data, 
             validation_data=test_data, 
             epochs=100, 
             batch_size=32,
             callbacks=[early_stopping])