In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from collections import defaultdict
from tensorflow import keras

In [2]:
patient_data = pd.read_excel('data/CMMD_clinicaldata_revision.xlsx',index_col='ID1')
patient_data.index += patient_data.LeftRight
patient_data

Unnamed: 0,LeftRight,Age,number,abnormality,classification,subtype
D1-0001R,R,44,2,calcification,Benign,
D1-0002L,L,40,2,calcification,Benign,
D1-0003L,L,39,2,calcification,Benign,
D1-0004L,L,41,2,calcification,Benign,
D1-0005R,R,42,2,calcification,Benign,
...,...,...,...,...,...,...
D2-0745L,L,43,2,mass,Malignant,triple negative
D2-0746R,R,59,2,mass,Malignant,HER2-enriched
D2-0747R,R,53,2,mass,Malignant,Luminal B
D2-0748R,R,29,2,mass,Malignant,Luminal B


In [3]:
all_data = defaultdict(dict)

for pat_id in patient_data.index:
    if 'D2' in pat_id:
        continue
    all_data[pat_id]['R'] = 1 if patient_data.LeftRight[pat_id] == 'R' else 0
    all_data[pat_id]['age'] = patient_data.Age[pat_id]
    all_data[pat_id]['calc'] = 1 if patient_data.abnormality[pat_id] != 'mass' else 0
    all_data[pat_id]['mass'] = 1 if patient_data.abnormality[pat_id] != 'calcification' else 0
    all_data[pat_id]['classification'] = 1 if patient_data.classification[pat_id] == 'Malignant' else 0

In [4]:
os.chdir('images/final_images_untouched/')

In [5]:
count = 0

for fname in os.listdir(os.getcwd()):
    count += 1
    print(f'\r{count}',end='')
    pat_id = ''.join(fname.split('.')[:2])
    img = np.array(Image.open(fname)).astype(np.float16)/255
    view = 'img_side' if '.S' in fname else 'img_top'
    all_data[pat_id][view] = img

2214

In [8]:
all_data_df = pd.DataFrame(all_data).T.sample(frac=1)

In [13]:
del all_data
del patient_data

In [14]:
all_data_df.age /= np.max(all_data_df.age)
all_data_df

Unnamed: 0,R,age,calc,mass,classification,img_side,img_top
D1-0710R,1,0.25,0,1,0,"[[0.1726, 0.1765, 0.149, 0.2118, 0.1765, 0.207...","[[0.9883, 0.8115, 0.71, 0.6196, 0.702, 0.7764,..."
D1-0252R,1,0.440476,1,1,0,"[[0.643, 0.3293, 0.306, 0.3293, 0.2666, 0.2825...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
D1-0667L,0,0.642857,1,1,1,"[[0.408, 0.204, 0.153, 0.1333, 0.1137, 0.153, ...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
D1-0764R,1,0.416667,0,1,0,"[[0.2196, 0.2393, 0.251, 0.3452, 0.306, 0.306,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
D1-0531R,1,0.595238,1,1,1,"[[0.396, 0.3608, 0.3176, 0.3726, 0.4941, 0.451...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
...,...,...,...,...,...,...,...
D1-0251R,1,0.464286,1,1,0,"[[0.302, 0.2903, 0.306, 0.3333, 0.3687, 0.353,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
D1-1452L,0,0.928571,0,1,1,"[[0.898, 0.7256, 0.5767, 0.5923, 0.639, 0.549,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
D1-0196L,0,0.52381,1,0,1,"[[0.9214, 0.6274, 0.6, 0.5884, 0.6274, 0.6743,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
D1-0078R,1,0.404762,1,0,1,"[[0.1804, 0.2157, 0.2705, 0.2903, 0.2864, 0.25...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


In [None]:
side_img_tensor = np.stack(all_data_df.img_side)
top_img_tensor = np.stack(all_data_df.img_top)
other_features = np.expand_dims(all_data_df[['R','age']].to_numpy(),axis=1).astype(np.float16)
labels = all_data_df[['calc','mass','classification']].to_numpy().astype(np.float16)

In [11]:
for t in (side_img_tensor,top_img_tensor,other_features,labels):
    print(t.shape)

(1107, 2400, 2000)
(1107, 2400, 2000)
(1107, 2)
(1107, 3)


In [13]:
del all_data_df

In [14]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Conv2D, MaxPooling2D

In [15]:
split_point = int(round(1107 * 0.75))
x_train = [side_img_tensor[:split_point],top_img_tensor[:split_point],other_features[:split_point]]
x_val = [side_img_tensor[split_point:],top_img_tensor[split_point:],other_features[split_point:]]
y_train = labels[:split_point]
y_val = labels[:split_point]

In [16]:
for x in [x_train,x_val]:
    for t in x:
        print(t.shape)

for y in [y_train,y_val]:
    print(y.shape)

(830, 2400, 2000)
(830, 2400, 2000)
(830, 2, 1)
(277, 2400, 2000)
(277, 2400, 2000)
(277, 2, 1)
(830, 3)
(830, 3)


In [23]:
for x in [x_train,x_val]:
    for t in x:
        print(t.dtype)

for y in [y_train,y_val]:
    print(y.dtype)

uint8
uint8
object
uint8
uint8
object
object
object


In [27]:
x_train[2].astype(float)

array([[[1.        ],
        [0.46428571]],

       [[0.        ],
        [0.46428571]],

       [[1.        ],
        [0.58333333]],

       ...,

       [[1.        ],
        [0.48809524]],

       [[0.        ],
        [0.48809524]],

       [[0.        ],
        [0.48809524]]])

In [46]:
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Concatenate, Conv2D, MaxPooling2D

# Define input layers
side_view_input = Input(shape=(2400, 2000, 1), name='input_image_side')
top_view_input = Input(shape=(2400, 2000, 1), name='input_image_top')
features_input = Input(shape=(2,), name='input_features')

# First convolutional layers for both images
side_conv_1 = Conv2D(32, kernel_size=(25,25), strides=5, padding='same', activation='relu')(side_view_input)
top_conv_1 = Conv2D(32, kernel_size=(25,25), strides=5, padding='same', activation='relu')(top_view_input)

# Pooling layer receives a tensor that is 480x400, and outputs 240x200
side_pool_1 = MaxPooling2D(pool_size=(2,2),padding='same')(side_conv_1)
top_pool_1 = MaxPooling2D(pool_size=(2,2),padding='same')(top_conv_1)
                    
# Second convolutional layers for both images
side_conv_2 = Conv2D(32, kernel_size=(5,5), strides=2, padding='same', activation='relu')(side_pool_1)
top_conv_2 = Conv2D(32, kernel_size=(5,5), strides=2, padding='same', activation='relu')(top_pool_1)

# Pooling layer receives a tensor that is 120x100, and outputs a layer that is 24x20
side_pool_2 = MaxPooling2D(pool_size=(5,5),padding='same')(side_conv_2)
top_pool_2 = MaxPooling2D(pool_size=(5,5),padding='same')(top_conv_2)

# Flatten both layers
side_flat = Flatten()(side_pool_2)
top_flat = Flatten()(top_pool_2)

# Merge flattened image layers and other features
merged = Concatenate()([side_flat,top_flat,features_input])

# Dense layer to process merged flattened images and auxilliary features
dense = Dense(32, activation='relu')(merged)

# Output layer
output = Dense(3, activation='sigmoid')(dense)

model = Model(inputs=[side_view_input,top_view_input,features_input],outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(x=[x_train[0].astype(float), x_train[1].astype(float), x_train[2]].astype(float), y=y_train.astype(float),
          validation_data=([x_val[0].astype(float), x_val[1].astype(float), x_val[2]].astype(float), y_val.as_type(float)),
          epochs=10, batch_size=32)