In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
      #  print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
image_height = 299
image_width = 299
batch_size = 32

In [None]:
train_path = '../input/covid19xrays/Dataset/Training'
test_path = '../input/covid19xrays/Dataset/Testing'

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  train_path,
  validation_split=0.25,
  subset="training",
  labels='inferred',
  label_mode="binary",
  color_mode="grayscale",
  seed=312,
  image_size=(image_height, image_width),
  batch_size=batch_size,
  shuffle=True)


In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  train_path,
  validation_split=0.25,
  subset="validation",
  labels='inferred',
  label_mode="binary",
  color_mode="grayscale",
  seed=312,
  image_size=(image_height, image_width),
  batch_size=batch_size,
  shuffle=True)

In [None]:
classes = train_ds.class_names
print(classes)

In [None]:
classes_num = len(classes)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i],cmap='binary')
    #plt.title(classes[labels[i]])
    plt.axis("off")

In [None]:
# Optimizing the dataset for performance using caching

AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(9000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

## Creating a sequential model


In [None]:


model = Sequential([
  layers.Rescaling(1./255, input_shape=(image_height, image_width, 1)),
  layers.Conv2D(16, 1, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 1, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 1, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(classes_num)
])

In [None]:
# Compiling the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs=10
hist_data = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

## Improvements

In [None]:
data_augmentation = keras.Sequential(
  [
    layers.RandomFlip("horizontal",
                      input_shape=(image_height,
                                  image_width,
                                  1)),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.05),
    layers.RandomContrast(0.05)
  ]
)

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
  for i in range(9):
    augmented_images = data_augmentation(images)
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(augmented_images[i],cmap='binary')
    plt.axis("off")

In [None]:
model = Sequential([
  data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 1, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 1, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 1, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.5),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(classes_num)
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
train_ds

In [None]:
epochs = 15
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

## Predictions

In [None]:
folder = "../input/covid19xrays/Dataset/Testing"
test_files = sorted(os.listdir(folder), key=lambda x: int(os.path.splitext(x)[0]))
len(test_files)

In [None]:
i = 0
test_images = []
for _file in test_files:
    image_path = folder + "/" + _file  # this is a PIL image
    image = tf.keras.preprocessing.image.load_img(image_path,color_mode="grayscale")
    #img.thumbnail((image_width, image_height))
    # Convert to Numpy Array
    #x = img_to_array(img)
    x = tf.keras.preprocessing.image.img_to_array(image)
    test_images.append(x)

In [None]:
test_images = np.array(test_images)
len(test_images)

In [None]:
predictions = model.predict(test_images,batch_size=batch_size)

In [None]:
i =0
predictions_labeled = []
for pred in predictions:
    i = i+1
    score = tf.nn.softmax(pred)
    #print(
    #"This image most likely belongs to {} with a {:.2f} percent confidence."
    #.format(classes[np.argmax(score)], 100 * np.max(score)))
    predictions_labeled.append([i,classes[np.argmax(score)].upper()])

In [None]:
predictions_labeled[223]

In [None]:
result = pd.DataFrame(predictions_labeled,columns=['Filename','Label'])
result.to_csv('output.csv',index=False)