In [None]:
import tensorflow as tf

tf.enable_eager_execution()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as image
import pydicom
import png
import csv
from os.path import join
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D
import tensorflow.contrib.eager as tfe
from matplotlib import pyplot as plt

### Load csv file

In [None]:
csv_path = './CSV.csv'

In [None]:
df_0 = pd.read_csv(csv_path)
df_0.head(10)

In [None]:
# Drop records labelled as 'BENIGN_WITHOUT_CALLBACK'

df = df_0[df_0["pathology"] !='BENIGN_WITHOUT_CALLBACK']
df.reset_index(inplace = True)
df

In [None]:
# Extract labels

labels = []

for i in df['pathology']:
    if i == 'MALIGNANT':
        labels.append(1)
    else:
        labels.append(0)

### Convert DICOM to PNG

In [None]:
def convert_png(inputdata, savepath, index_num):
    shape = inputdata.shape
    image_2d = inputdata.astype(float)
    image_2d_scaled = (np.maximum(image_2d,0) / image_2d.max()) * 255.0
    image_2d_scaled = np.uint8(image_2d_scaled)
    
    with open(join(savepath + str(index_num) + '.png'), 'wb') as png_file:
        # Save PNG images
        w = png.Writer(shape[1], shape[0], greyscale=True)
        w.write(png_file, image_2d_scaled)

### 1. ROI


Filter messed ROI and mask images, then convert to PNG

In [None]:
def checkmask (dicdata):
    pd_dic = pd.DataFrame(dicdata)
    sum_zero = (pd_dic == 0).sum(axis=1).sum()
    sum_size = pd_dic.shape[0]*pd_dic.shape[1]
    percentage = sum_zero/sum_size
    return percentage

In [None]:
def dcmtopng(input_data, folder_path, save_path):   
# input_data: csv file (should be read by pandas, type Dataframe), 
# folder_path: the path of dicom dataset, 
# save_path: the path to save png

    index_sum = len(input_data) # calculate the length of the dataset
    for i in range(index_sum):
        mask_path = input_data['ROI mask file path'][i].strip()  # strip the '/n' in the file path of csv file
        mass_path = input_data['cropped image file path'][i].strip()
        
        mask_path = join(folder_path + mask_path) # creat the complete path of file
        mass_path = join(folder_path + mass_path)
        
        data = pydicom.dcmread(mask_path).pixel_array  # read the image information of dicom file
        
        if checkmask(data) > 0.75:  # check the data and convert it into png, then save to the save path
            data = pydicom.dcmread(mass_path).pixel_array
            convert_png(data,save_path,i)
            
        else:
            convert_png(data,save_path,i)

In [None]:
folder_path = './Mass/'
save_path = './PNG/'

In [None]:
dcmtopng(df, folder_path, save_path)

### 2. Full mammographs


Convert full mammographs into PNG

In [None]:
def dcmtopng_full(input_data, folder_path, save_path):   
# input_data: csv file (should be read by pandas, type Dataframe) 
# folder_path: the path of dicom dataset 
# save_path: the path to save png

    index_sum = len(input_data) # calculate the length of the dataset
    for i in range(index_sum):
        image_path = input_data['image file path'][i].strip()  # strip the '/n' in the file path of csv file
        image_path = join(folder_path + image_path) # creat the complete path of file
        data = pydicom.dcmread(image_path).pixel_array  # read the image information of dicom file
        convert_png(data, save_path, i)

In [None]:
folder_path_2 = './Full/'
save_path_2 = './PNG_Full/'

In [None]:
dcmtopng_full(df, folder_path_2, save_path_2)

### Create dataset

In [None]:
# sample_size = len(df['cropped image file path'])
sample_size = len(df['image file path'])

In [None]:
png_path = []
# png_folder = './PNG/'
png_folder = './PNG_Full/'
ext = '.png'

for i in range(sample_size):
    png_path.append(join(png_folder+str(i)+ext))

### Setup Xception Model with pooling

In [None]:
def create_dataset_fmr_images(file_paths):
    # Resize png images
    def _parse_function(filename):
        image_string = tf.read_file(filename)
        image_decoded = tf.image.decode_png(image_string)
        img=tf.tile(image_decoded,[1,1,3])
        image_resized = tf.image.resize_images(img, [299, 299])
        return image_resized

    file_paths = tf.constant(file_paths)
    
    dataset = tf.data.Dataset.from_tensor_slices((file_paths))
    dataset = dataset.map(_parse_function)

    return dataset

In [None]:
class XceptionBottleneck(tf.keras.Model):
    
    def __init__(self):
        super(XceptionBottleneck, self).__init__()
        self.xception_layers = Xception(include_top=False, weights='imagenet') 
        self.pooling_layer = GlobalAveragePooling2D()
        
    def call(self, inputs):
        result = self.xception_layers(inputs)
        result = self.pooling_layer(result)
        return result

### Save bottle neck layers

In [None]:
def cache_bottleneck_layers(file_paths, batch_size, device):
    
    bottle_necks = []
    dataset = create_dataset_fmr_images(file_paths).batch(batch_size)
    n_samples = len(file_paths)

    device = "gpu:0" if tfe.num_gpus() else "cpu:0"
    
    with tf.device(device):
        xception_out = XceptionBottleneck()
        for batch_num, image in enumerate(dataset):
            print('\rComputing bottle neck layers... batch {} of {}'.format(batch_num+1, n_samples//batch_size), end="")
            
            # Compute bottle necks layer for image batch convert to numpy and append to bottle_necks
            result = xception_out.call(image)
            bottle_necks.append(result.numpy())
            
    return np.vstack(bottle_necks)

### Save bottle necks

In [None]:
device = "gpu:0" if tfe.num_gpus() else "cpu:0"
bottle_necks = cache_bottleneck_layers(png_path, batch_size=15, device=device)

In [None]:
import os

cache_path = './Cache/'

fname = 'bottle_neck.npz'
save_path = os.path.join(cache_path, fname)

if not os.path.isdir(cache_path): 
    os.mkdir(cache_path)

In [None]:
np.savez(save_path, bottle_necks=bottle_necks, labels=labels)