In [14]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2 
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, load_model, save_model
import sklearn
from sklearn.model_selection import train_test_split, cross_val_score
import seaborn as sns
from tensorflow.keras.applications import *
from efficientnet.tfkeras import EfficientNetB7
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils import resample
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [33]:
# General settings
BATCH_SIZE = 100
IMG_SIZE = 224
auto = tf.data.experimental.AUTOTUNE
n_epochs = 600

In [34]:
df = pd.read_csv("trainLabels_cropped.csv", index_col=False).drop(["Unnamed: 0", "Unnamed: 0.1"], axis=1)
df.head()

Unnamed: 0,image,level
0,10_left,0
1,10_right,0
2,13_left,0
3,13_right,0
4,15_left,1


In [35]:
img_dir = "./resized_train_cropped/resized_train_cropped/"
df["image"] = df["image"].apply(lambda pic: img_dir+pic+".jpeg")
df["level"] = df["level"].apply(lambda num: str(num))
df.head()

Unnamed: 0,image,level
0,./resized_train_cropped/resized_train_cropped/...,0
1,./resized_train_cropped/resized_train_cropped/...,0
2,./resized_train_cropped/resized_train_cropped/...,0
3,./resized_train_cropped/resized_train_cropped/...,0
4,./resized_train_cropped/resized_train_cropped/...,1


In [36]:
paths = df["image"].values
labels = df["level"].values

train_paths, test_paths, train_labels, test_labels = train_test_split(paths, labels, 
                                                                      test_size=0.33, random_state=1996)

In [37]:
t_paths_df = pd.DataFrame(train_paths, columns=["path"])
t_labels_df = pd.DataFrame(train_labels, columns=["label"])
train_df = pd.concat([t_paths_df, t_labels_df], axis=1)
train_df

Unnamed: 0,path,label
0,./resized_train_cropped/resized_train_cropped/...,0
1,./resized_train_cropped/resized_train_cropped/...,1
2,./resized_train_cropped/resized_train_cropped/...,2
3,./resized_train_cropped/resized_train_cropped/...,0
4,./resized_train_cropped/resized_train_cropped/...,2
...,...,...
23517,./resized_train_cropped/resized_train_cropped/...,2
23518,./resized_train_cropped/resized_train_cropped/...,1
23519,./resized_train_cropped/resized_train_cropped/...,0
23520,./resized_train_cropped/resized_train_cropped/...,0


In [42]:
t_paths_df = pd.DataFrame(test_paths, columns=["path"])
t_labels_df = pd.DataFrame(test_labels, columns=["label"])
test_df = pd.concat([t_paths_df, t_labels_df], axis=1)
#test_df["label"] = test_df["label"].astype(int)
test_df

Unnamed: 0,path,label
0,./resized_train_cropped/resized_train_cropped/...,0
1,./resized_train_cropped/resized_train_cropped/...,0
2,./resized_train_cropped/resized_train_cropped/...,0
3,./resized_train_cropped/resized_train_cropped/...,0
4,./resized_train_cropped/resized_train_cropped/...,0
...,...,...
11581,./resized_train_cropped/resized_train_cropped/...,1
11582,./resized_train_cropped/resized_train_cropped/...,0
11583,./resized_train_cropped/resized_train_cropped/...,0
11584,./resized_train_cropped/resized_train_cropped/...,2


In [39]:
def preprocess_data(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.addWeighted(img, 4, cv2.GaussianBlur(img, (0,0), 10), -4, 128)
    return img

In [40]:
train_gen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=360,
                              rescale=(1/255.), preprocessing_function=preprocess_data, validation_split=0.15,
                              )
test_gen = ImageDataGenerator(rescale=(1/255.))

In [43]:
training_gen = train_gen.flow_from_dataframe(train_df, x_col="path", y_col="label", target_size=(224,224),
                                            class_mode="sparse", batch_size=BATCH_SIZE, subset="training")

valid_gen = train_gen.flow_from_dataframe(train_df, x_col="path", y_col="label", target_size=(224,224),
                                            class_mode="sparse", batch_size=BATCH_SIZE, subset="validation")

test_gen = test_gen.flow_from_dataframe(test_df, x_col="path", y_col="label", target_size=(224,224),
                                        class_mode="sparse", batch_size=BATCH_SIZE, shuffle=False)

Found 19994 validated image filenames belonging to 5 classes.
Found 3528 validated image filenames belonging to 5 classes.
Found 11586 validated image filenames belonging to 5 classes.


In [24]:
def get_model(which="mobilenet", dense=False, img_size=(IMG_SIZE,IMG_SIZE,3)):
    """
    Parameters:
    dense - (bool) If True, makes the top layer (classifier) a fully-connected dense layer (default). If False the top layer is a global average pooling layer.

    which - (String) {Options:  mobilenet=MobileNetV2(default), incepresnet=InceptionResNetV2, densenet=DenseNet121, effnet=EfficientNetB7} (case-insensitive) which model to use as the base classifier

    img_size - (Tuple) Size of input images
        
    Returns a model
    
    """
    # Choosing which model to use as our base
    which = which.lower()
    
    if which=="incepresnet":
        base = InceptionResNetV2(input_shape=(img_size), include_top=False, weights="imagenet")
    elif which=="densenet":
        base = DenseNet121(input_shape=(img_size), include_top=False, weights="imagenet")
    elif which=="effnet":
        base = EfficientNetB7(input_shape=(img_size), include_top=False, weights="imagenet")
    else: 
        base = MobileNetV2(input_shape=(img_size), include_top=False, weights="imagenet")
    
    # Choosing the top for our model
    if dense:
        flatten = Flatten()
        fc1 = Dense(512, activation="relu")
        fc2 = Dense(BATCH_SIZE, activation="relu")
        model = Sequential([
            base,
            flatten,
            fc1,
            fc2
        ])
    else:
        GAP = GlobalAveragePooling2D()
        dropout = Dropout(0.5)
        model = Sequential([base, dropout, GAP])
    
    output = Dense(5, activation="softmax")
    model.add(output)
    
    optimizer = tf.keras.optimizers.Adam(.0005)
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    
    model.compile(loss=loss, optimizer=optimizer, metrics=["acc"])
    model.summary()
    return model

In [25]:
# Training parameters and callbacks
STEPS = len(train_paths) // (4*BATCH_SIZE)
reduce_lr  = tf.keras.callbacks.ReduceLROnPlateau(patience=5, verbose=1)
early_stop = EarlyStopping(monitor="val_loss", patience=15, restore_best_weights=True)

# Setting up class weights
freq = [(1 - df["level"].value_counts()[i] / len(df)) for i in range(5)]
class_weights = {
    0: freq[0],
    1: freq[1],
    2: freq[2],
    3: freq[3],
    4: freq[4]
}

In [26]:
model = get_model()
model.fit(training_gen, validation_data=valid_gen, epochs=n_epochs, batch_size=BATCH_SIZE, 
          class_weight=class_weights, callbacks=[early_stop, reduce_lr])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Model) (None, 7, 7, 1280)        2257984   
_________________________________________________________________
dropout (Dropout)            (None, 7, 7, 1280)        0         
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 5)                 6405      
Total params: 2,264,389
Trainable params: 2,230,277
Non-trainable params: 34,112
_________________________________________________________________
Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 00011: ReduceLROnPlateau reducing learning rate to 5.0000002374872565e-05.
Epoch 12/600
Epoch 13/60

Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 00049: ReduceLROnPlateau reducing learning rate to 5.000000413701855e-09.


<tensorflow.python.keras.callbacks.History at 0x7ffa780ca4e0>

In [27]:
save_model(model, "mobilenet_img_preprocess.h5")

In [48]:
preds = np.argmax(model.predict(test_gen), axis=1).astype(str)

In [49]:
print(classification_report(test_labels, preds))

              precision    recall  f1-score   support

           0       0.77      0.80      0.78      8610
           1       0.09      0.04      0.05       770
           2       0.20      0.25      0.22      1721
           3       0.30      0.05      0.08       286
           4       0.34      0.16      0.21       199

    accuracy                           0.64     11586
   macro avg       0.34      0.26      0.27     11586
weighted avg       0.62      0.64      0.62     11586



In [50]:
old = load_model("models/mobilenet_class_weights.h5")
preds = np.argmax(old.predict(test_gen), axis=1).astype(str)
print(classification_report(test_labels, preds))

              precision    recall  f1-score   support

           0       0.86      0.91      0.88      8610
           1       0.17      0.09      0.12       770
           2       0.54      0.54      0.54      1721
           3       0.56      0.28      0.37       286
           4       0.68      0.44      0.54       199

    accuracy                           0.78     11586
   macro avg       0.56      0.45      0.49     11586
weighted avg       0.75      0.78      0.76     11586

