In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from tqdm import tqdm_notebook
from IPython.display import clear_output
import time
%matplotlib inline

In [None]:
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.models import Sequential

In [None]:
df = pd.read_csv('Data_Entry_2017.csv')
df = df[['Image Index','Finding Labels','Follow-up #','Patient ID','Patient Age','Patient Gender']]
pathology_list = ['Cardiomegaly','Emphysema','Effusion','Hernia','Nodule','Pneumothorax','Atelectasis','Pleural_Thickening','Mass','Edema','Consolidation','Infiltration','Fibrosis','Pneumonia']
for pathology in pathology_list :
    df[pathology] = df['Finding Labels'].apply(lambda x: 1 if pathology in x else 0)
df = df.set_index('Image Index')


In [None]:
img_files = os.listdir('images')

In [None]:
num_classes = len(df.T['00000001_000.png'].iloc[5:].values.astype(float))

In [None]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3,3), padding='same', 
                 activation='relu', input_shape=(1024, 1024, 3), strides=(2,2)))
model.add(MaxPooling2D(pool_size=(500,500), strides=(2,2)))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(num_classes, activation='sigmoid'))
model.compile(optimizer='sgd', loss='mse', metrics=['accuracy'])
model.summary()

In [None]:
dir_len = len(img_files)
step_size = 10
val_count = 300
X_val, Y_val = [], []

for img in tqdm_notebook(img_files[0:val_count]):
    X_val.append(cv2.imread('images/'+img)/255.0)
    Y_val.append(df.T[img].iloc[5:].values.astype(float))
    #break
X_val = np.array(X_val)
Y_val = np.array(Y_val)
tr_loss_avg, tr_acc_avg = [], []
val_loss, val_acc = [], []
print('Validate on:', val_count, 'Train on: ', dir_len-val_count)

for epoch in range(0, 100):
    tr_loss, tr_acc = [], []
    for i in range(val_count, dir_len, step_size):
        clear_output(wait=True)
        X, Y = [], []
        for img in img_files[i:i+step_size]:
            X.append(cv2.imread('images/'+img)/255.0)
            Y.append(df.T[img].iloc[5:].values.astype(float))
            #break
        X = np.array(X)
        Y = np.array(Y)
        hist = model.train_on_batch(X, Y)
        tr_loss.append(hist[0])
        tr_acc.append(hist[1])
        print('progress:', format(i*100/dir_len, '.2f'), '%', 
              ', in loss:', format(hist[0], '.2f'), 
              ', in acc:', format(hist[1], '.2f'))
        #break
    tr_loss_avg.append(np.average(tr_loss))
    tr_acc_avg.append(np.average(tr_acc))
    val = model.evaluate(X_val, Y_val, verbose=0)
    if len(val_loss)==0 or val[0]<val_loss[-1]:
        model.save_weights(''.join(['weights/weights_cnn_epoch_', str(epoch), 
                                    '_val_loss_', format(val[0], '.2f'), 
                                    '_val_acc_', format(val[1], '.2f'),
                                    '.hdf5']))
    val_loss.append(val[0])
    val_acc.append(val[1])    
    print('epoch:',epoch, 
          ', tr loss:', format(np.average(tr_loss), '.2f'), 
          ', tr acc:', format(np.average(tr_acc), '.2f'), 
          ', val loss:', format(val[0], '.2f'),
          ', val acc:', format(val[1], '.2f'))
    time.sleep(0.001)
    #break

In [None]:
out_dict = {}
out_dict['tr_loss'] = tr_loss_avg
out_dict['tr_acc'] = tr_acc_avg
out_dict['val_loss'] = val_loss
out_dict['val_acc'] = val_acc
out_df = pd.DataFrame.from_dict(out_dict)
out_df.plot()