In [189]:
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten , Dropout, MaxPooling2D
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam

# importing image and image preprocessing

In [190]:
train_image_directory = 'archive/train/'
test_image_directory = 'archive/test/'
size = 128
good_train_images = []
good_train_labels = []
bad_train_images = []
bad_train_labels = []

test_images = []

In [191]:
good_train_images_dir = os.listdir(train_image_directory + 'good/')

for i,image_name in enumerate(good_train_images_dir):
    image = cv2.imread(train_image_directory + 'good/' + image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
    image = cv2.resize(image, (size, size))  
    image = np.array(image)
    good_train_images.append(image)
    good_train_labels.append(1)

In [192]:
bad_train_images_dir = os.listdir(train_image_directory + 'not-good/')

for i,image_name in enumerate(bad_train_images_dir):
    image = cv2.imread(train_image_directory + 'not-good/' + image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
    image = cv2.resize(image, (size, size))  
    image = np.array(image)
    bad_train_images.append(image)
    bad_train_labels.append(0)

In [193]:
#test images
test_images_dir = os.listdir(test_image_directory)

for i,image_name in enumerate(test_images_dir):
    image = cv2.imread(test_image_directory + image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
    image = cv2.resize(image, (size, size))  
    image = np.array(image)
    test_images.append(image)

#normalize the test images
test_images = np.array(test_images)
test_images = test_images.astype('float32') / 255.0
test_images = (test_images - np.mean(test_images)) / np.std(test_images)

In [194]:
# To normalize the pixel values combine the good and bad images 

all_train_images = good_train_images + bad_train_images

# Normalize the pixel values
all_train_images = np.array(all_train_images)
all_train_images = all_train_images.astype('float32') / 255.0

# value - mean / std
all_train_images = (all_train_images - np.mean(all_train_images)) / np.std(all_train_images)

all_train_images.shape

(300, 128, 128, 3)

In [195]:
good_train_images = np.array(good_train_images)
bad_train_images = np.array(bad_train_images)

print(good_train_images.shape)
print(bad_train_images.shape)

(250, 128, 128, 3)
(50, 128, 128, 3)


In [196]:
# now split all_train_images into good_train_images and bad_train_images

good_train_images = all_train_images[:len(good_train_images)]
bad_train_images = all_train_images[len(good_train_images):]

In [197]:
all_train_labels = good_train_labels + bad_train_labels
all_train_labels = np.array(all_train_labels)

# balance the imbalanced data

In [198]:
# oversample the bad images
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=42)
all_train_images, all_train_labels = ros.fit_resample(all_train_images.reshape(-1, size*size*3), all_train_labels)
all_train_images = all_train_images.reshape(-1, size, size, 3)


In [199]:
print(all_train_images.shape)
print(all_train_labels.shape)

(500, 128, 128, 3)
(500,)


In [200]:
# find the number of labels 1 and 0
unique, counts = np.unique(all_train_labels, return_counts=True)
print(np.asarray((unique, counts)).T)

[[  0 250]
 [  1 250]]


# Train and validation split

In [201]:
# split the data into train and validation
train_images, validation_images, train_labels, validation_labels = train_test_split(all_train_images, all_train_labels, test_size=0.2)

# Model

In [202]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(size, size, 3)))
model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu'))
# model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [203]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_16 (Conv2D)          (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d_16 (MaxPoolin  (None, 63, 63, 32)       0         
 g2D)                                                            
                                                                 
 flatten_8 (Flatten)         (None, 127008)            0         
                                                                 
 dense_16 (Dense)            (None, 128)               16257152  
                                                                 
 dropout_8 (Dropout)         (None, 128)               0         
                                                                 
 dense_17 (Dense)            (None, 1)                 129       
                                                      

In [204]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Precision, Recall

# define the optimizer
optimizer = Adam(learning_rate=0.001)

# define the evaluation metrics F1 score
#to do

# compile the model and use both precision and recall as metrics




In [205]:
import tensorflow.keras.backend as K

def f1_score(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    actual_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (actual_positives + K.epsilon())
    f1 = 2 * ((precision * recall) / (precision + recall + K.epsilon()))
    return f1


In [206]:
metrics = [Precision(), Recall(), f1_score]
#use tensorboard to hyperparameter tuning

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=metrics)

In [207]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=3, monitor='val_loss'),
    tf.keras.callbacks.TensorBoard(log_dir='logs'),
]

In [208]:
# train the model
history = model.fit(train_images, train_labels, epochs=20, batch_size=16, validation_data=(validation_images, validation_labels),callbacks=callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [209]:
# predict the test images
predictions = model.predict(test_images)



In [210]:
print(predictions.max())
print(predictions.min())
print(predictions.mean())

0.9996854
0.010934816
0.842386


In [211]:
# convert the predictions into 0 and 1
predictions = np.where(predictions > 0.5, 1, 0)

In [212]:
predictions

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
    

In [213]:
#if the prediction is 1 then the image is good else it is bad
for i in range(len(predictions)):
    if predictions[i] == 1:
        print("test_{} is good".format(i))
    else:
        print("test_{} is not-good".format(i))

test_0 is good
test_1 is good
test_2 is good
test_3 is good
test_4 is good
test_5 is good
test_6 is good
test_7 is good
test_8 is good
test_9 is good
test_10 is good
test_11 is good
test_12 is good
test_13 is good
test_14 is not-good
test_15 is good
test_16 is good
test_17 is good
test_18 is good
test_19 is good
test_20 is good
test_21 is good
test_22 is good
test_23 is good
test_24 is good
test_25 is good
test_26 is good
test_27 is good
test_28 is good
test_29 is good
test_30 is good
test_31 is good
test_32 is good
test_33 is good
test_34 is good
test_35 is good
test_36 is good
test_37 is not-good
test_38 is good
test_39 is not-good
test_40 is good
test_41 is not-good
test_42 is good
test_43 is not-good
test_44 is good
test_45 is good
test_46 is good
test_47 is good
test_48 is good
test_49 is good
test_50 is good
test_51 is not-good
test_52 is good
test_53 is good
test_54 is good
test_55 is good
test_56 is good
test_57 is good
test_58 is not-good
test_59 is not-good
test_60 is good
te

In [214]:
# count the number of good and bad images
unique, counts = np.unique(predictions, return_counts=True)
print(np.asarray((unique, counts)).T)

[[  0  22]
 [  1 158]]


In [215]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
    tf.keras.callbacks.TensorBoard(log_dir='logs'),
]