In [1]:
%cd ~/ChestXray-14/

/home/jovyan/ChestXray-14


In [2]:
"""
Chonsawat Path: input_path = "/content/drive/MyDrive/KKU /Project/Dataset/ChestXray NIH"
Deepnote Path: input_path = "/datasets/chonsawat-drive/KKU /Project/Dataset/ChestXray NIH"
Elab Path: input_path = "~/ChestXray-14/dataset/ChestXray NIH"
"""
input_path = "dataset/ChestXray NIH"

from sklearn.utils import shuffle
from tqdm.notebook import tqdm
import tensorflow as tf
import pandas as pd
import numpy as np
import os

import warnings
warnings.filterwarnings('ignore')

In [3]:
STRATEGY = tf.distribute.get_strategy()    
BATCH_SIZE = 16
IMG_SIZE = 224
SEED = 42
    
print('Using tensorflow %s' % tf.__version__)

Using tensorflow 2.6.2


In [4]:
feature_map = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'image_id': tf.io.FixedLenFeature([], tf.string),
    'No Finding': tf.io.FixedLenFeature([], tf.int64),
    'Atelectasis': tf.io.FixedLenFeature([], tf.int64),
    'Consolidation': tf.io.FixedLenFeature([], tf.int64),
    'Infiltration': tf.io.FixedLenFeature([], tf.int64),
    'Pneumothorax': tf.io.FixedLenFeature([], tf.int64),
    'Edema': tf.io.FixedLenFeature([], tf.int64),
    'Emphysema': tf.io.FixedLenFeature([], tf.int64),
    'Fibrosis': tf.io.FixedLenFeature([], tf.int64),
    'Effusion': tf.io.FixedLenFeature([], tf.int64),
    'Pneumonia': tf.io.FixedLenFeature([], tf.int64),
    'Pleural_Thickening': tf.io.FixedLenFeature([], tf.int64),
    'Cardiomegaly': tf.io.FixedLenFeature([], tf.int64),
    'Nodule': tf.io.FixedLenFeature([], tf.int64),
    'Mass': tf.io.FixedLenFeature([], tf.int64),
    'Hernia': tf.io.FixedLenFeature([], tf.int64)}


def count_data_items(filenames):
    return np.sum([int(x[:-6].split('-')[-1]) for x in filenames])


def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=1)
    image = tf.reshape(image, [IMG_SIZE, IMG_SIZE, 1])
    return image


def scale_image(image, target):
    image = tf.cast(image, tf.float32) / 255.
    return image, target


def read_tfrecord(example):
    example = tf.io.parse_single_example(example, feature_map)
    image = decode_image(example['image'])
    target = [
        example['No Finding'],
        example['Atelectasis'],
        example['Consolidation'],
        example['Infiltration'],
        example['Pneumothorax'],
        example['Edema'],
        example['Emphysema'],
        example['Fibrosis'],
        example['Effusion'],
        example['Pneumonia'],
        example['Pleural_Thickening'],
        example['Cardiomegaly'],
        example['Nodule'],
        example['Mass'],
        example['Hernia']]
    return image, target


def data_augment(image, target):
    image = tf.image.random_flip_left_right(image, seed=SEED)
    image = tf.image.random_flip_up_down(image, seed=SEED)
    return image, target


def get_dataset(filenames, shuffled=False, repeated=False, 
                cached=False, augmented=False, distributed=True):
    auto = tf.data.experimental.AUTOTUNE
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=auto)
    dataset = dataset.map(read_tfrecord, num_parallel_calls=auto)
    if augmented:
        dataset = dataset.map(data_augment, num_parallel_calls=auto)
    dataset = dataset.map(scale_image, num_parallel_calls=auto)
    if shuffled:
        dataset = dataset.shuffle(2048, seed=SEED)
    if repeated:
        dataset = dataset.repeat()
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    if cached:
        dataset = dataset.cache()
    dataset = dataset.prefetch(auto)
    if distributed:
        dataset = STRATEGY.experimental_distribute_dataset(dataset)
    return dataset


def get_model():
    model = tf.keras.models.Sequential([
        tf.keras.applications.EfficientNetB0(
            include_top=False,
            input_shape=(None, None, 1),
            weights=None,
            pooling='avg'),
        tf.keras.layers.Dense(15, activation='sigmoid')
    ])
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=tf.keras.metrics.AUC(multi_label=True))

    return model

In [18]:
filenames = tf.io.gfile.glob(f'{input_path}/data/224x224/*.tfrec')

train_filenames = [filenames[0]]
val_filenames = [filenames[1]]

get_dataset(val_filenames, shuffled=True, repeated=True, augmented=True)

<PrefetchDataset shapes: ((16, 224, 224, 1), (16, 15)), types: (tf.float32, tf.int64)>

In [3]:
filenames = tf.io.gfile.glob(f'{input_path}/data/224x224/*.tfrec')
filenames

['dataset/ChestXray NIH/data/224x224/000-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/001-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/002-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/003-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/004-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/005-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/006-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/007-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/008-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/009-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/010-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/011-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/012-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/013-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/014-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/015-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/016-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/017-438.tfrec',
 'dataset/ChestXray NIH/data/224x224/018-438.t

In [5]:
train_filenames = tf.io.gfile.glob(f'{input_path}/data/224x224/train/*.tfrec')
val_filenames = tf.io.gfile.glob(f'{input_path}/data/224x224/valid/*.tfrec')
test_filenames = tf.io.gfile.glob(f'{input_path}/data/224x224/test/*.tfrec')

steps_per_epoch = count_data_items(train_filenames) // BATCH_SIZE
validation_steps = count_data_items(val_filenames) // BATCH_SIZE

train_dataset = get_dataset(train_filenames, shuffled=True, repeated=True, augmented=True)
val_dataset = get_dataset(val_filenames, cached=True)

with STRATEGY.scope():
    model = get_model()
    
history = model.fit(
    train_dataset,
    steps_per_epoch=steps_per_epoch,
    epochs=2,
    validation_data=val_dataset,
    validation_steps=validation_steps,
    verbose=1)

2022-03-29 15:49:32.231576: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-29 15:49:33.114378: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8003 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB MIG 2g.10gb, pci bus id: 0000:17:00.0, compute capability: 8.0


Epoch 1/2


2022-03-29 15:49:38.459118: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-03-29 15:49:40.993691: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8201
2022-03-29 15:49:41.935629: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-03-29 15:49:41.936703: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-03-29 15:49:41.936753: W tensorflow/stream_executor/gpu/asm_compiler.cc:77] Couldn't get ptxas version string: Internal: Couldn't invoke ptxas --version
2022-03-29 15:49:41.937793: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-03-29 15:49:41.937989: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: Failed to launch ptxas
Relying on driver to perform ptx co

Epoch 2/2


In [6]:
model.save(f"{input_path}/models/EfficientNetB0.h5")