In [None]:
# libriaries used are loaded

from google.colab import drive
from tqdm import tqdm
from PIL import Image
import pandas as pd
import os
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, Input

from tensorflow.keras.layers import Conv2D, MaxPool2D, BatchNormalization
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam

In [None]:
def load_data(folder_in):
    drive.mount('/content/drive')
    folder = "/content/drive/My Drive/CISC873_Assignments/"+folder_in
  

    images = []
    # lists all files in the folder directory
    for file in os.listdir(folder):
        # saves file id without the 'png' part
        file_id = file.replace('.png', '')
        # open an image file when the path is made from os.path.join
        image = Image.open(
            os.path.join(folder, file)
            #image is converted to 'LA' and resizes the image
        ).convert('LA').resize((200, 200))
        # create numpy array of image
        arr = np.array(image)
        images.append(
            (int(file_id), arr)
        )
    # sorts images ascending and creates a callable anaonymous function which 
    #set in the following set of instructions that points .sort at thost elements 
    #in which they should be sorted by
    images.sort(key=lambda i: i[0])
    return np.array([v for _id, v in images])



x_train = load_data('train')

drive.mount('/content/drive')
y_train = pd.read_csv("/content/drive/My Drive/CISC873_Assignments/y_train.csv")['infection']

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Fully Connected NN

In [None]:
# fully connected neural network 
def build():
  # this is the reshape layer or 'inpt' layer and converts input data for layers
  # below. Here images are 256 * 256 * 2 (3D) pixels
    img_in = Input(shape=(200, 200, 2))
    # converts data into a 1D array for inputting into the next layer to create
    # a single long feature vector
    flattened = Flatten()(img_in)
    # creates a dense layer. A dense layer feeds all outputs from previous layer
    # to all its neurons. 
    fc1 = Dense(32)(flattened)
    # adds dropout layer to avoid overfitting
    fc1 = Dropout(0.3)(fc1)
    # another dense layer
    fc2 = Dense(64)(fc1)
    fc3 = Dense(128)(fc2)
    # adds dropout layer to avoid overfitting
    #fc2 = Dropout(0.3)(fc2)
    # used to determine the output of a neural network. Here we are using a sigmoid
    # function (exists between 0 and 1). 
    output = Dense(1, activation = 'sigmoid')(fc3)
    # Model groups layers into an object with training and inference features
    model = tf.keras.Model(inputs=img_in, outputs=output)
    return model

# generates composite action coontaining all actions so far
model = build()
# configures model with losses and metrics with .compile
model.compile(
    # specifies the optimizer algorithm that is going to be used. Ada is a stochastic
    # gradient descent method that is based on adaptive estimation of the first order and
    # second order moments. 
        #optimizer=tf.keras.optimizers.RMSprop(),
        #optimizer=tf.keras.optimizers.Adagrad(),
        optimizer=tf.keras.optimizers.Adam(),
        # implements a loss function to compute the quantity that a model should seek
        # to minimize during training. 'binary_crossentropy' computes the cross-entropy
        # loss between labels and predicted labels. 
        loss='binary_crossentropy',
        # judges performance of model using 'BinaryAccuracy' which calculates how often
        # predictions match binary labels. 
        metrics=['BinaryAccuracy', 'AUC']
        )

model.summary()

Convolutional NN

In [None]:
# CNN MODEL 1

def build():
    img_in = Input(shape=(256, 256, 2))
    # first convolution layer
    fc0 = Conv2D(filters = 8, kernel_size = (3, 3), activation='relu')(img_in)
    # batch normalization to standardize input
    fc1 = BatchNormalization()(fc0)
    # Max pooling yaer to downsample
    fc2 = MaxPool2D(strides=(2,2))(fc1)
    # flattening the array of pixels
    fc3 = Flatten()(fc2)
    # adding a dense layer to feed all outputs from the prevsiou later to all its neurons
    fc4 = Dense(64)(fc3)

    output = Dense(1, activation = 'sigmoid')(fc4)
    model = tf.keras.Model(inputs=img_in, outputs=output)
    return model


model = build()
model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss='binary_crossentropy',
        metrics=['BinaryAccuracy', 'AUC']
        )

model.summary()

In [None]:
# CNN MODEL 2

def build():
    img_in = Input(shape=(256, 256, 2))
    fc0 = Conv2D(filters = 16, kernel_size = (3, 3))(img_in)
    fc1 = BatchNormalization()(fc0)
    fc2 = Conv2D(filters = 8, kernel_size = (3, 3))(fc1)
    fc3 = MaxPool2D(strides=(2,2))(fc2)
    fc4 = Flatten()(fc3)
    fc5 = Dense(64)(fc4)
    fc6 = Dense(32)(fc5)

    output = Dense(1, activation = 'sigmoid')(fc6)
    model = tf.keras.Model(inputs=img_in, outputs=output)
    return model


model = build()
model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss='binary_crossentropy',
        metrics=['BinaryAccuracy', 'AUC']
        )

model.summary()

In [None]:
# CNN MODEL 3 (best performing model)
def build():
    img_in = Input(shape=(200, 200, 2))
    fc0 = Conv2D(filters = 32, kernel_size = (3, 3))(img_in)
    fc1 = MaxPool2D(strides=(2,2))(fc0)
    fc2 = Conv2D(filters = 64, kernel_size = (3, 3))(fc1)
    fc3 = MaxPool2D(strides=(2,2))(fc2)



    flattened = Flatten()(fc2)
    fc4 = Dense(64)(flattened)
    fc5 = Dropout(0.5)(fc4)


    output = Dense(1, activation = 'sigmoid')(fc5)
    model = tf.keras.Model(inputs=img_in, outputs=output)
    return model


model = build()
model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss='binary_crossentropy',
        metrics=['BinaryAccuracy', 'AUC']
        )

model.summary()

In [None]:
# fitting model
epochs = 40
# defines number of samples to work through before updating the internal model parameters. 
batch_size = 70
# training takes place when fit() is called, it takes training and validation data
# and we specify a certain number of epochs we're training for.
history = model.fit(x = x_train.reshape(487,200,200,2),
                    y = y_train,
                    batch_size = batch_size,
                    validation_split=0.2,
                    epochs=epochs)

In [None]:
x_test = load_data('test')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
y_test = model.predict(x_test)

y_test_df = pd.DataFrame()
y_test_df['id'] = np.arange(len(y_test))
y_test_df['infection'] = y_test.astype(int)
y_test_df.to_csv('insert_new_name.csv', index=False)