In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import tensorflow as tf
print("Num GPUs Available", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available 0


In [3]:
from src.preprocessing import train_val_test_split, parse_input_file, transform_df
from src.metrics import one_hot_encode_labels, plot_metrics, compute_score
from src.utility import find_largest_resolution, unzip

In [4]:
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.metrics import TruePositives, FalsePositives, TrueNegatives, FalseNegatives, BinaryAccuracy, Precision, Recall, AUC
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout, Activation, SpatialDropout2D, BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import History, EarlyStopping, ReduceLROnPlateau
from typing import Tuple
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns

  import pandas.util.testing as tm


In [6]:
# directory to image folder - change this accordingly
DTIF = os.path.join('data', 'img')
CAT_LABEL_FILEPATH = 'category_label.csv'
DF_FILEPATH = 'list_category_img.txt'

In [7]:
df = parse_input_file(DF_FILEPATH, DTIF, CAT_LABEL_FILEPATH)
df = transform_df(df)
# remove this line for actual testing
# df = df.iloc[1:30000]
train_df, val_df, test_df = train_val_test_split(df)

In [9]:
df['class'].value_counts()

2    58963
1    58963
3    58963
Name: class, dtype: int64

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255
)
val_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

In [None]:
train_gen = train_datagen.flow_from_dataframe(
    train_df,
    weight_col=None, target_size=(150, 150), color_mode='rgb',
    batch_size=64,
    class_mode='categorical', 
    shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    weight_col=None, target_size=(150, 150), color_mode='rgb',
    batch_size=64,
    class_mode='categorical', 
    shuffle=False
)

test_gen = test_datagen.flow_from_dataframe(
    test_df,
    weight_col=None, target_size=(150, 150), color_mode='rgb',
    batch_size=64,
    class_mode='categorical', 
    shuffle=False
)

In [None]:
plt.figure(figsize=(16, 10))
for image_batch, labels_batch in train_gen:
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(image_batch[i])
        plt.title(labels_batch[i])
        plt.axis("off")        
    break

In [None]:
def base_model() -> Sequential:
    model = Sequential()
    model.add(Input((150, 150, 3)))    
    # Conv2D Layer
    model.add(Conv2D(filters=32, kernel_size=5, padding='same'))
    model.add(Activation('relu'))    
    model.add(SpatialDropout2D(0.5))

    # BatchNormalization
    model.add(BatchNormalization())
    model.add(Activation('relu'))    

    # Max Pooling
    model.add(MaxPooling2D(pool_size=(5, 5), strides = 5))

    model.add(Flatten())

    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.4))

    model.add(Dense(5, activation='softmax'))
    
    return model  

In [None]:
METRICS = [
      BinaryAccuracy(name='accuracy'),
      Precision(name='precision'),
      Recall(name='recall'),
      AUC(name='auc'),
      AUC(name='prc', curve='PR')
]

model = base_model()
model.compile(optimizer='adam', loss=CategoricalCrossentropy(), metrics=METRICS)
model.summary()

In [None]:
number_epochs = 3
early_stoppage = EarlyStopping(monitor='val_loss', patience=3)
variable_learning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=2, min_lr=0.0001)
history = model.fit(
    train_gen,
    epochs=number_epochs,
    validation_data=val_gen,
    callbacks=[variable_learning_rate, early_stoppage]    
)

In [None]:
plot_metrics(history)

In [None]:
y_test = one_hot_encode_labels(test_gen.labels)
y_pred = model.predict(test_gen)
compute_score("Test Dataset", y_test, y_pred)