In [6]:
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as np
from functools import partial
from tensorflow.keras.applications import EfficientNetB1
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Conv2D, MaxPooling2D, BatchNormalization, LeakyReLU, Flatten
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras import callbacks
import os
import random
import pprint
import pandas as pd
import numpy as np
#tf.config.optimizer.set_jit(True)
tf.config.optimizer.set_experimental_options(
    {"auto_mixed_precision": True})

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
df= pd.read_csv('/kaggle/input/nih-chest-xrays-tfrecords/preprocessed_data.csv')

In [None]:
df.rename(columns = {'Unnamed: 0' : 'image'}, inplace = True)
df.head()

In [5]:
columns = df.columns[1:]
plt.figure(figsize = (15,25))

for j,i in zip(range(len(columns)), range(1,16)):
            plt.subplot(4,4,i)
            valeur = df.value_counts(columns[j],sort = False)
            valeur.plot.bar(label =columns[j], color = {'green': 'False', 'red': 'True'})
            plt.legend();

In [6]:
dossier = '/kaggle/input/nih-chest-xrays-tfrecords/data/'
image = os.listdir(dossier)
img = [dossier+x for x in image]
filenames = tf.io.gfile.glob(img)

In [7]:
split_ind = int(0.8 * len(filenames))
split_ind2 = int(0.9 * len(filenames))

training= filenames[:split_ind]
valid = filenames[split_ind:split_ind2]
test = filenames[split_ind2:]

print("Train TFRecord Files:", len(training))
print("Validation TFRecord Files:", len(valid))
print("Test TFRecord Files:", len(test))
print(len(training)+len(valid)+len(test)==len(filenames))

In [8]:
feature_map= {}

for elem in list(df.columns)[1:]:
    feature_map[elem] = tf.io.FixedLenFeature([], tf.int64)
    
feature_map['image'] = tf.io.FixedLenFeature([], tf.string)
feature_map

In [9]:
def read_tfrecord(example):
    example = tf.io.parse_single_example(example, feature_map)
    image = tf.io.decode_jpeg(example["image"], channels=3)
    image = tf.image.resize(image, (200,200))
    image = tf.cast(image, tf.float32) / 255.0
    
    label = []
    
    for val in columns:
        label.append(example[val])
    
    return image, label

In [10]:
def load_dataset(filenames):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_tfrecord)
    
    return dataset

In [11]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
def get_dataset(filenames):
    dataset = load_dataset(filenames)
    dataset = dataset.shuffle(2048)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.batch(128)
    
    return dataset

In [12]:
train_dataset = get_dataset(training)
valid_dataset = get_dataset(valid)
test_dataset = get_dataset(test)

In [13]:
from tensorflow.keras.applications import DenseNet201, ResNet152V2
from tensorflow.keras.layers import Average
from tensorflow.keras.layers import Dense, Input, Flatten, LeakyReLU, Dropout,GlobalAveragePooling2D
from tensorflow.keras.models import Model

def densenet(model_input):
    base_model = DenseNet201(weights='imagenet', include_top=False, input_tensor=model_input)
    last = base_model.output 
    x = Flatten()(last)
    #x = Dense(1024)(x)
   # x = LeakyReLU()(x)
   # x = Dropout(0.5)(x)
   # x = Dense(512)(x)
   # x = LeakyReLU()(x)
   # x = Dropout(0.3)(x)
   # x = GlobalAveragePooling2D()(x)
    x = Dense(128)(x)
    x = LeakyReLU()(x)
    x = Dropout(0.2)(x)
    predictions = Dense(15, activation="softmax")(x)
    model = Model(inputs= base_model.input, outputs= predictions)
    for l in model.layers:
        if not l.name.startswith('conv5_block2') or l.name.startswith('conv5_block3'):
            l.trainable = False
    
#     for layer in model.layers:
#         layer._name = layer.name + str("_2")
    
    return model 

def resnet(model_input):
    base_model = ResNet152V2(weights='imagenet', include_top=False, input_tensor=model_input)
    last = base_model.output 
    x = Flatten()(last)
    #x = Dense(1024)(x)
    #x = LeakyReLU()(x)
    #x = Dropout(0.5)(x)
    #x = Dense(512)(x)
   # x = LeakyReLU()(x)
   # x = Dropout(0.3)(x)
   # x = GlobalAveragePooling2D()(x)
    x = Dense(128)(x)
    x = LeakyReLU()(x)
    x = Dropout(0.2)(x)
    predictions = Dense(15, activation="softmax")(x)
    model = Model(inputs= base_model.input, outputs= predictions)
    for l in model.layers:
        if not l.name.startswith('conv5_block2') or l.name.startswith('conv5_block3'):
            l.trainable = False
    for layer in model.layers:
        layer._name = layer.name + str("_2")

    return model 

model_input = Input(shape=(200, 200, 3))
densenet_model = densenet(model_input)
resnet_model = resnet(model_input)
ensemble_model = [densenet_model, resnet_model]

def ensemble(models, model_input):
    outputs = [model.outputs[0] for model in models]
    y = Average()(outputs)
    model = Model(model_input, y, name='ensemble')
    return model


In [14]:
ensembleNN = ensemble(ensemble_model, model_input=model_input)
for l in ensembleNN.layers:
    if not l in ensembleNN.layers[:-30]:
        l.trainable = False
ensembleNN.summary()
#model.add(eff)
#model.add(GlobalAveragePooling2D())
#model.add(Dense(1024, activation='relu'))
#model.add(Dropout(0.2))
#model.add(Dense(512, activation='relu'))
#model.add(Dropout(0.2))
#model.add(Dense(15, activation='softmax'))

In [15]:
ensembleNN.compile(optimizer='adam', loss ='binary_crossentropy',
              metrics = [tf.keras.metrics.AUC(name="auc"), 'accuracy'])

In [16]:
plateau = callbacks.ReduceLROnPlateau(monitor='val_auc',
                                     patience = 5,
                                     factor = 0.01,
                                     mode = 'max')

early_stopping = callbacks.EarlyStopping(monitor = 'val_auc',
                                        patience = 5,
                                        restore_best_weights=True,
                                        mode = 'max')

In [None]:
ensembleNN.fit(train_dataset, epochs = 10,steps_per_epoch=800,
          callbacks=[plateau, early_stopping], validation_data=valid_dataset)

In [None]:
ensembleNN.save_weights("./ensemble2.h5")

In [19]:
ensembleNN.evaluate(test_dataset)