In [47]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Dropout, BatchNormalization
from keras.callbacks import Callback, EarlyStopping,ModelCheckpoint, ReduceLROnPlateau
from keras import Model
from tensorflow.keras.optimizers import Adam
# System libraries
from pathlib import Path
import os.path
# Metrics
from sklearn.metrics import classification_report, confusion_matrix

In [48]:
#check dataset
import os
import PIL
from PIL import Image
from pathlib import Path
from PIL import UnidentifiedImageError
dataset = "/home/bkcs/NIMA/test_img_4-5, 6-7"
path = Path(dataset).rglob("*.jpg")
for img_p in os.listdir(dataset):
    try:
        img = Image.open(os.path.join(dataset, img_p))
    except PIL.UnidentifiedImageError:
        os.remove(os.path.join(dataset, img_p))



In [71]:
BATCH_SIZE = 8
TARGET_SIZE = (400, 400)
dataset = "/home/bkcs/NIMA/dataset4"
def convert_path_to_df(dataset):
    image_dir = Path(dataset)

    # Get filepaths and labels
    filepaths = list(image_dir.glob(r'**/*.JPG')) + list(image_dir.glob(r'**/*.jpg')) + list(image_dir.glob(r'**/*.png')) + list(image_dir.glob(r'**/*.PNG'))

    labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

    filepaths = pd.Series(filepaths, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    # Concatenate filepaths and labels
    image_df = pd.concat([filepaths, labels], axis=1)
    return image_df

image_df = convert_path_to_df(dataset)

In [72]:
image_df

Unnamed: 0,Filepath,Label
0,/home/bkcs/NIMA/dataset4/Medium/446171.jpg,Medium
1,/home/bkcs/NIMA/dataset4/Medium/44807.jpg,Medium
2,/home/bkcs/NIMA/dataset4/Medium/598245.jpg,Medium
3,/home/bkcs/NIMA/dataset4/Medium/785652.jpg,Medium
4,/home/bkcs/NIMA/dataset4/Medium/6493.jpg,Medium
...,...,...
139042,/home/bkcs/NIMA/dataset4/High/411332.jpg,High
139043,/home/bkcs/NIMA/dataset4/High/473409.jpg,High
139044,/home/bkcs/NIMA/dataset4/High/502630.jpg,High
139045,/home/bkcs/NIMA/dataset4/High/22284.jpg,High


In [4]:
from sklearn.utils import resample
def apply_oversampling(image_df):
    # Separate majority and minority classes
    
    majority_df = image_df[image_df['Label'] == 'Medium']
    minority_df_Low = image_df[image_df['Label'] == "Low"]
    minority_df_High = image_df[image_df['Label'] == "High"]

    # Oversample the minority class to have the same number of instances as the majority class
    oversampled_minority_df_1 = resample(minority_df_Low ,n_samples=len(majority_df), replace=True, random_state=42)
    oversampled_minority_df_2 = resample(minority_df_High, n_samples=len(majority_df), replace=True, random_state=42)
    # Combine the oversampled minority class and the majority class
    oversampled_df = pd.concat( [oversampled_minority_df_1, majority_df, oversampled_minority_df_2 ])

    return oversampled_df
# image_df = apply_oversampling(image_df)

In [20]:
from sklearn.utils import resample
def apply_downsampling(image_df):
    # Separate majority and minority classes
    
    majority_df = image_df[image_df['Label'] == 'Medium']
    minority_df_Low = image_df[image_df['Label'] == "Low"]
    minority_df_High = image_df[image_df['Label'] == "High"]

    # Oversample the minority class to have the same number of instances as the majority class
    downsampled_majority_df_1 = resample(majority_df ,n_samples=len(minority_df_Low), replace=True, random_state=42)
    upsampled_majority_df_2 = resample(minority_df_High  ,n_samples=len(minority_df_Low), replace=True, random_state=42)
    # Combine the oversampled minority class and the majority class
    downsampled_df = pd.concat( [upsampled_majority_df_2 ,   minority_df_Low , downsampled_majority_df_1 ])

    return downsampled_df
#image_df = apply_downsampling(image_df)

In [73]:
from sklearn.utils import resample
def hybrid_sampling(image_df):
    # Separate majority and minority classes
    
    majority_df = image_df[image_df['Label'] == 'Medium']
    minority_df_Low = image_df[image_df['Label'] == "Low"]
    minority_df_High = image_df[image_df['Label'] == "High"]
    n_samples = (len(majority_df) + len(minority_df_Low)+len(minority_df_High))//3 #(len(majority_df)
    # Oversample the minority class to have the same number of instances as the majority class
    downsampled_majority_df = resample( majority_df ,n_samples=n_samples, replace=True, random_state=42)
    oversampled_minority_df_1 = resample(minority_df_High, n_samples=n_samples, replace=True, random_state=42)
    oversampled_minority_df_2 = resample(minority_df_Low, n_samples=n_samples, replace=True, random_state=42)
    # Combine the oversampled minority class and the majority class
    oversampled_df = pd.concat( [oversampled_minority_df_1, downsampled_majority_df , oversampled_minority_df_2 ])

    return oversampled_df

In [74]:
# Separate in train and test data
train_df, test_df = train_test_split(image_df, test_size=0.2, shuffle=True, random_state=42)
#train_df = hybrid_sampling(train_df)

train_generator = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg19.preprocess_input,
    validation_split=0.1
)
val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=TARGET_SIZE,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_generator = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg19.preprocess_input,
)
# Split the data into three categories.
train_df = hybrid_sampling(train_df)
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=TARGET_SIZE,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42,
    subset='training'
)
print(train_images)

print(val_images)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=TARGET_SIZE,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)

Found 11123 validated image filenames belonging to 3 classes.
Found 100114 validated image filenames belonging to 3 classes.
<keras.preprocessing.image.DataFrameIterator object at 0x7f737c601b70>
<keras.preprocessing.image.DataFrameIterator object at 0x7f73e45a8730>
Found 27810 validated image filenames belonging to 3 classes.


In [64]:
from tensorflow import keras
import numpy as np
# Load the pretained model

pretrained_model = tf.keras.applications.efficientnet.EfficientNetB4(
    input_shape=(400, 400, 3),
    weights = 'imagenet',
    include_top=False,
    pooling='max'
)

pretrained_model.trainable = True
# Create checkpoint callback
checkpoint_path = "/home/bkcs/NIMA/weights/EfficientNetB4_classification_weights_hybridsample_3cls(5-6)(400x400).h5"
checkpoint_callback = ModelCheckpoint(checkpoint_path,
                                      save_weights_only=True,
                                      monitor="val_accuracy",
                                      save_best_only=True)

# Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 3 epochs
early_stopping = EarlyStopping(monitor = "val_accuracy", # watch the val loss metric
                               patience = 5,
                               restore_best_weights = True) 
# if val loss decreases for 3 epochs in a row, stop training

reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=3, min_lr=1e-6)

METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

In [65]:
#augment
crop_size = (150, 150)
augment = tf.keras.Sequential([
  #tf.keras.layers.experimental.preprocessing.Resizing(224,224),
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
  #tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
  tf.keras.layers.experimental.preprocessing.RandomCrop(*crop_size),
  tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
])
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [1]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

2023-07-11 20:49:02.246733: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-11 20:49:02.895271: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-07-11 20:49:04.864172: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/bkcs/anaconda3/lib/:/home/bkcs/anaconda3/lib/python3.10/site-packages/nvidia/cudnn/lib:/home/bkcs/anaconda3/lib/:/home/bkcs/anaconda3/lib/python3.10/site-packages/nvidia/cudnn/lib:/home/bkcs/anaconda3/lib/

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [9]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  NVIDIA GeForce GTX 1080 Ti, compute capability 6.1
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.


2023-07-10 09:00:27.722324: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355


In [66]:
inputs = pretrained_model.input
x = augment(inputs)
x = Dense(128, activation='relu')(pretrained_model.output)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)


outputs = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)
model.load_weights('/home/bkcs/NIMA/weights/EfficientNetB4_classification_weights_hybridsample_3cls(5-6)(400x400).h5')



model.compile(
    optimizer=Adam(0.0001),
    loss='categorical_crossentropy',
    metrics=METRICS
)

# history = model.fit(
#     train_images,
#     steps_per_epoch=len(train_images),
#     validation_data=val_images,
#     validation_steps=len(val_images),
#     epochs=100,
#     callbacks=[
#         early_stopping,
#         checkpoint_callback,
#         reduce_lr
#     ]
# )

Test on separate data

In [43]:
results = model.evaluate(test_images, verbose=0)


In [44]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]



In [None]:
inputs = pretrained_model.input
x = augment(inputs)
x = Dense(128, activation='relu')(pretrained_model.output)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)


outputs = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)
model.load_weights('/home/bkcs/NIMA/weights/EfficientNetB4_classification_weights_downsample_3cls(official)(500x500).h5')



model.compile(
    optimizer=Adam(0.0001),
    loss='categorical_crossentropy',
    metrics=METRICS
)

history = model.fit(
    train_images,
    steps_per_epoch=len(train_images),
    validation_data=val_images,
    validation_steps=len(val_images),
    epochs=100,
    callbacks=[
        early_stopping,
        checkpoint_callback,
        reduce_lr
    ]
)

Epoch 1/100
  371/12515 [..............................] - ETA: 1:09:33 - loss: 0.6282 - tp: 2140.0000 - fp: 707.0000 - tn: 5229.0000 - fn: 828.0000 - accuracy: 0.8276 - precision: 0.7517 - recall: 0.7210 - auc: 0.8972 - prc: 0.8137



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


In [45]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        High       0.42      0.25      0.32       630
         Low       0.61      0.57      0.59      1542
      Medium       0.96      0.97      0.96     25638

    accuracy                           0.93     27810
   macro avg       0.66      0.60      0.62     27810
weighted avg       0.92      0.93      0.93     27810



In [46]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[  160,     0,   470],
       [    0,   886,   656],
       [  225,   578, 24835]])

EfficientNetB4 testing 

In [23]:
from keras.models import Model
pretrained_model = tf.keras.applications.efficientnet.EfficientNetB4(
    input_shape=(224, 224, 3),
    weights = 'imagenet',
    include_top=False,
    pooling='max'
)
inputs = pretrained_model.input
x = augment(inputs)
x = Dense(128, activation='relu')(pretrained_model.output)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
outputs = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)

model.load_weights('/home/bkcs/NIMA/weights/EfficientNetB4_classification_weights_upsample_3cls(official).h5')
model.compile(
    optimizer=Adam(0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))



    Test Loss: 0.55753
Test Accuracy: 87.10%


In [24]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]

In [25]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        High       0.09      0.16      0.11       624
         Low       0.18      0.57      0.27      1623
      Medium       0.97      0.89      0.93     47578

    accuracy                           0.87     49825
   macro avg       0.41      0.54      0.44     49825
weighted avg       0.94      0.87      0.90     49825



In [26]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[   97,     2,   525],
       [    1,   928,   694],
       [  985,  4220, 42373]])

In [16]:
from utils.score_utils import mean_score
from keras.applications.efficientnet import EfficientNetB2
from keras.preprocessing.image import img_to_array, load_img
import numpy as np
from keras.applications.efficientnet import preprocess_input
# from sklearn.metrics import mean_squared_error
image_size = 224
base_model = EfficientNetB2(input_shape=(image_size, image_size, 3), include_top=False, pooling='avg', weights='imagenet')
x = Dense(128, activation='relu')(base_model.output)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.75)(x)

x = Dense(10, activation='softmax')(x)

model2 = Model(base_model.input, x)
for i in range(len(pred)):
    if(pred[i]=="High"):
       model2.load_weights('/home/bkcs/NIMA/weights/efficientNetB2_weights_Medium(Best).h5')
       score = model2.predict(test_images[i], batch_size=1, verbose=0)[0]
       print(test_df['Filepath'][i]," : ", mean_score(score))
#score_list=[]
#score_dist_list=[]


# scores = model2.predict(test_images, batch_size=1, verbose=0)[0]
#         #score_dist_list.append(scores)
# mean = mean_score(scores)
        #print(mean)

# score_list.append(mean)
# #test_score_mean=[]
# for value in test_score:
# test_score_mean.append(mean_score(value))

ValueError: in user code:

    File "/home/bkcs/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1801, in predict_function  *
        return step_function(self, iterator)
    File "/home/bkcs/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1790, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/bkcs/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1783, in run_step  **
        outputs = model.predict_step(data)
    File "/home/bkcs/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1751, in predict_step
        return self(x, training=False)
    File "/home/bkcs/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/bkcs/.local/lib/python3.10/site-packages/keras/engine/input_spec.py", line 200, in assert_input_compatibility
        raise ValueError(f'Layer "{layer_name}" expects {len(input_spec)} input(s),'

    ValueError: Layer "model_5" expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(1, 224, 224, 3) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(1, 3) dtype=float32>]


Test on increasing resolution to 400x400 (downsampling)


In [25]:
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

    Test Loss: 0.69030
Test Accuracy: 3609000.00%


In [26]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]



In [27]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        High       0.05      0.41      0.10       624
         Low       0.13      0.74      0.22      1623
      Medium       0.98      0.74      0.84     47578

    accuracy                           0.73     49825
   macro avg       0.39      0.63      0.38     49825
weighted avg       0.94      0.73      0.81     49825



In [28]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[  253,    10,   361],
       [    8,  1195,   420],
       [ 4423,  8167, 34988]])

Test on increasing resolution to 400x400

In [16]:
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

    Test Loss: 0.46930
Test Accuracy: 4683500.00%


In [17]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]



In [18]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        High       0.11      0.04      0.06       624
         Low       0.34      0.37      0.35      1623
      Medium       0.97      0.97      0.97     47578

    accuracy                           0.94     49825
   macro avg       0.47      0.46      0.46     49825
weighted avg       0.93      0.94      0.94     49825



In [19]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[   25,     0,   599],
       [    0,   600,  1023],
       [  202,  1166, 46210]])

Test on undersampling to len(High)

In [10]:
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

    Test Loss: 0.82073
Test Accuracy: 65.08%


In [11]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]

In [12]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        High       0.48      0.72      0.58       607
         Low       0.77      0.86      0.81      1578
      Medium       0.35      0.08      0.13       650

    accuracy                           0.65      2835
   macro avg       0.53      0.55      0.51      2835
weighted avg       0.61      0.65      0.60      2835



In [13]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[ 439,  116,   52],
       [ 180, 1354,   44],
       [ 300,  298,   52]])

Test on undersampling to len(Low)

In [71]:
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

    Test Loss: 0.86896
Test Accuracy: 60.52%


In [72]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]

In [73]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        High       0.52      0.12      0.19       616
         Low       0.72      0.68      0.70      1587
      Medium       0.53      0.72      0.61      1563

    accuracy                           0.61      3766
   macro avg       0.59      0.51      0.50      3766
weighted avg       0.61      0.61      0.58      3766



In [74]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[  71,   34,  511],
       [   9, 1086,  492],
       [  56,  385, 1122]])

Test on oversampling

In [10]:
from keras.models import Model
pretrained_model = tf.keras.applications.vgg19.VGG19(
    input_shape=(224, 224, 3),
    # weights = 'imagenet',
    include_top=False,
    pooling='max'
)
inputs = pretrained_model.input
x = Dense(128, activation='relu')(pretrained_model.output)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.45)(x)
outputs = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)

model.load_weights('/home/bkcs/NIMA/weights/VGG19_classification_weights_upsample(2).h5')
model.compile(
    optimizer=Adam(0.00001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))



    Test Loss: 0.43153
Test Accuracy: 89.34%


In [11]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]


In [12]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

        High       0.09      0.08      0.09       640
         Low       0.16      0.41      0.23      1569
      Medium       0.97      0.92      0.94     47617

    accuracy                           0.89     49826
   macro avg       0.41      0.47      0.42     49826
weighted avg       0.93      0.89      0.91     49826



In [13]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[   53,     1,   586],
       [    0,   643,   926],
       [  534,  3262, 43821]])

Test on outside dataset on oversampling

In [76]:
BATCH_SIZE = 8
TARGET_SIZE = (400, 400)
dataset = "/home/bkcs/NIMA/Medium"
def convert_path_to_df(dataset):
    image_dir = Path(dataset)

    # Get filepaths and labels
    filepaths = list(image_dir.glob(r'**/*.JPG')) + list(image_dir.glob(r'**/*.jpg')) + list(image_dir.glob(r'**/*.png')) + list(image_dir.glob(r'**/*.PNG'))

    labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

    filepaths = pd.Series(filepaths, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    # Concatenate filepaths and labels
    image_df = pd.concat([filepaths, labels], axis=1)
    return image_df

image_df = convert_path_to_df(dataset)

# Separate in train and test data
train_df, test_df = train_test_split(image_df, test_size=0.999, shuffle=True, random_state=42)

test_generator = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg19.preprocess_input,
)


test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=TARGET_SIZE,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)



Found 109983 validated image filenames belonging to 1 classes.


In [77]:
image_df

Unnamed: 0,Filepath,Label
0,/home/bkcs/NIMA/Medium/160409.jpg,Medium
1,/home/bkcs/NIMA/Medium/754457.jpg,Medium
2,/home/bkcs/NIMA/Medium/656600.jpg,Medium
3,/home/bkcs/NIMA/Medium/461258.jpg,Medium
4,/home/bkcs/NIMA/Medium/856981.jpg,Medium
...,...,...
110088,/home/bkcs/NIMA/Medium/457255.jpg,Medium
110089,/home/bkcs/NIMA/Medium/41128.jpg,Medium
110090,/home/bkcs/NIMA/Medium/347293.jpg,Medium
110091,/home/bkcs/NIMA/Medium/286644.jpg,Medium


In [78]:
import numpy as np
# Predict the label of the test_images
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

# Map the label
labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]

  345/13748 [..............................] - ETA: 18:45









In [79]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        High       0.00      0.00      0.00         0
         Low       0.00      0.00      0.00         0
      Medium       1.00      0.89      0.94    109983

    accuracy                           0.89    109983
   macro avg       0.33      0.30      0.31    109983
weighted avg       1.00      0.89      0.94    109983



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [80]:
confusion_matrix(y_test, pred, labels=list(labels.values()))

array([[    0,     0,     0],
       [    0,     0,     0],
       [ 2629,  9631, 97723]])