This notebook is a follw-up of the has_pnemonia_classifier.ipynb which is designed to classify if a person has pnemonia. In this notebook, we try to classify the type of illness 

Load Tensorflow

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import numpy as np
import random
import os
from glob import glob
from PIL import Image
import pathlib

import tensorflow as tf
print("TensorFlow version:", tf.__version__)

from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import utils
from tensorflow.keras import Model

batch_size = 32

TensorFlow version: 2.6.0


Load helper functions and get the data

In [2]:
from src.datareader import get_metadata
from src.find_images_without_metadata import find_images_without_metadata
from src.labelenums import Label2

metadata: pd.DataFrame = get_metadata()
metadata = metadata.loc[metadata['Label'] == 'Pnemonia']
metadata['Label_2_Virus_category'] = metadata['Label_2_Virus_category'].replace(to_replace='', value='Unknown')

Separate between training and testing

In [3]:
train_sample = metadata.loc[metadata['Dataset_type'] == 'TRAIN']
test_sample = metadata.loc[metadata['Dataset_type'] == 'TEST']
train_path = os.path.join(os.getcwd(), 'dataset', 'TRAIN')
test_path = os.path.join(os.getcwd(), 'dataset', 'TEST')

In [4]:
num_classes_covid = len(np.unique(list(train_sample['Label_2_Virus_category'])))
print('Number of classes = %d' % num_classes_covid)

Number of classes = 5


Get the images and labels. 

In [5]:
new_size = (256, 256)
AUTOTUNE = tf.data.AUTOTUNE
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
diagnosis_labeler = Label2()


def get_train_label(file_path):
    # Convert the path to a list of path components
    filename = os.path.basename(file_path)
    copy = train_sample.copy()
    names_ds = list(copy['X_ray_image_name'].copy())
    labels_ds = list(copy['Label_2_Virus_category'].copy())
    label_str = labels_ds[names_ds.index(filename)]
    label_int = diagnosis_labeler[label_str]
    return label_int


def get_test_label(file_path):
    # Convert the path to a list of path components
    filename = os.path.basename(file_path)
    copy = test_sample.copy()
    names_ds = list(copy['X_ray_image_name'].copy())
    labels_ds = list(copy['Label_2_Virus_category'].copy())
    label_str = labels_ds[names_ds.index(filename)]
    label_int = diagnosis_labeler[label_str]
    return label_int


def process_path(file_path, label):
    img1 = tf.io.read_file(file_path)
    img2 = tf.io.decode_png(img1, channels=1)
    img3 = tf.image.resize(img2, new_size, method='lanczos3', antialias=True)
    img4 = normalization_layer(img3)
    return img4, label


In [6]:
train_image_paths = [os.path.join(train_path, x) for x in list(train_sample['X_ray_image_name'])]
test_image_paths = [os.path.join(test_path, x) for x in list(test_sample['X_ray_image_name'])]
train_labels = np.array([get_train_label(x) for x in train_image_paths], dtype=int)
test_labels = np.array([get_test_label(x) for x in test_image_paths], dtype=int)

In [7]:
train_image_paths_tensors = tf.convert_to_tensor(train_image_paths, dtype=tf.string)
train_labels_tensors = tf.convert_to_tensor(train_labels)
train_dataset = tf.data.Dataset.from_tensor_slices((train_image_paths_tensors, train_labels_tensors))
train_dataset = train_dataset.map(process_path, num_parallel_calls=AUTOTUNE)

test_image_paths_tensors = tf.convert_to_tensor(test_image_paths, dtype=tf.string)
test_labels_tensors = tf.convert_to_tensor(test_labels)
test_dataset = tf.data.Dataset.from_tensor_slices((test_image_paths_tensors, test_labels_tensors))
test_dataset = test_dataset.map(process_path, num_parallel_calls=AUTOTUNE)


print(tf.data.experimental.cardinality(train_dataset).numpy())
print(tf.data.experimental.cardinality(test_dataset).numpy())

3944
390


In [8]:
def configure_for_performance(ds):
  ds = ds.cache()
  ds = ds.shuffle(buffer_size=1000)
  ds = ds.batch(batch_size)
  ds = ds.prefetch(buffer_size=AUTOTUNE)
  return ds

train_dataset = configure_for_performance(train_dataset)
test_dataset = configure_for_performance(test_dataset)

In [20]:
metrics = [
    tf.keras.metrics.RootMeanSquaredError(),
    tf.keras.metrics.MeanAbsoluteError()
]

In [21]:
filters = 10
kernel_size = (3, 3)
activation = 'relu'

has_covid_classifier = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters, 
                           kernel_size=kernel_size, 
                           activation=activation,
                           strides=1,
                           padding='valid',
                           input_shape=(new_size[0], new_size[1], 1)),
    tf.keras.layers.Conv2D(filters,
                           kernel_size=kernel_size,
                           activation=activation),
    tf.keras.layers.Conv2D(filters,
                           kernel_size=kernel_size,
                           activation=activation),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=activation),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(1)
])

In [22]:
has_covid_classifier.compile(
    optimizer='adam',
    loss='MeanSquaredError',
    metrics=metrics
)

In [23]:
checkpoint_path = "training_has_covid/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

early_stoping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                 patience=2, 
                                                 restore_best_weights=True)


In [24]:
epochs = 10
has_covid_classifier.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=epochs,
    callbacks=[cp_callback, early_stoping]
)

Epoch 1/10

Epoch 00001: saving model to training_has_covid\cp.ckpt
Epoch 2/10

Epoch 00002: saving model to training_has_covid\cp.ckpt
Epoch 3/10

Epoch 00003: saving model to training_has_covid\cp.ckpt
Epoch 4/10

Epoch 00004: saving model to training_has_covid\cp.ckpt
Epoch 5/10

Epoch 00005: saving model to training_has_covid\cp.ckpt
Epoch 6/10

Epoch 00006: saving model to training_has_covid\cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x2a530724c70>

In [None]:
has_covid_classifier.evaluate