In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
import os
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
import random

#read new dataset and save as numpy


In [None]:
csv = pd.read_csv('./train_labels.csv', header=None)

In [None]:
numpy_csv = csv.to_numpy()

In [None]:
files = numpy_csv[:,0]
labels = numpy_csv[:,1]

In [None]:
X_train = []
y_train = []

image_size = 224
folderPath = os.path.join('./train_imgs') #define the filepath to the directory with images

for i in tqdm(range(len(files))):
  img = plt.imread(os.path.join(folderPath,files[i]))
  img = cv2.resize(img,(image_size, image_size))
  X_train.append(img)
  y_train.append(labels[i])

100%|██████████| 2619/2619 [21:40<00:00,  2.01it/s]


In [None]:
X_train = np.array(X_train)
y_train = np.array(y_train)

In [None]:
BASE_DIR = './' #Choose directory to save dataset
run_save = True
if run_save:
    with open(BASE_DIR + "train_set.npy", 'wb') as f:
      np.save(f, X_train)
      np.save(f, y_train)

# download preloaded dataset

In [None]:
BASE_DIR = './' #filepath to preloaded dataset
run_save = True

with open(BASE_DIR + "train_set.npy", 'rb') as f:
    X_train = np.load(f, allow_pickle=True)
    y_train = np.load(f, allow_pickle=True)

y_train = y_train.reshape((-1,1))

#split_data

In [None]:
train_X, val_X, train_y, val_y = train_test_split(X_train, y_train, train_size=0.88, shuffle=True, random_state=7)

print(train_X.shape)
print(val_X.shape)

(2304, 224, 224, 3)
(315, 224, 224, 3)


# Augment

In [None]:
datagen = ImageDataGenerator(rotation_range = 30,
                             zoom_range = 0.3,
                             #width_shift_range=0.15,
                             #height_shift_range=0.15,
                             shear_range = 0.2,
                             horizontal_flip=True,
                             vertical_flip=False,
                             brightness_range = [0.5,1.5],
                             fill_mode = 'wrap')

In [None]:
idx = np.where(train_y == 0)[0]
choices = random.choices(idx, k=500)


aug_iter = datagen.flow(train_X[choices], train_y[choices], batch_size=50)

for i in range(10):
  aug_img = next(aug_iter)
  train_X = np.append(train_X, aug_img[0], axis=0)
  train_y = np.append(train_y, aug_img[1], axis=0)

print(train_X.shape)

(2804, 224, 224, 3)


#Model

In [None]:
def get_model():
  effnet = EfficientNetB0(weights='imagenet',include_top=False,input_shape=(224 ,224 ,3))
  model = effnet.output
  model = tf.keras.layers.GlobalMaxPooling2D()(model)
  model = tf.keras.layers.Dropout(rate=0.5)(model)
  model = tf.keras.layers.Dense(1,activation='sigmoid')(model)
  model = tf.keras.models.Model(inputs=effnet.input, outputs = model)
  model.compile(loss='binary_crossentropy',optimizer = 'Adam', metrics= ['accuracy'])
  return model

In [None]:
tensorboard = TensorBoard(log_dir = './logs')
#choose directory to save best model
checkpoint = ModelCheckpoint("./best_model/effnet.h5",monitor="val_accuracy",save_best_only=True,mode="auto",verbose=1)
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.3, patience = 2, min_delta = 0.001,
                              mode='auto',verbose=1)

In [None]:
model = get_model()

train_X, train_y = shuffle(train_X, train_y, random_state=101)

history = model.fit(train_X, train_y, validation_data = (val_X, val_y), epochs = 12, verbose=1, batch_size=32,
                   callbacks=[tensorboard,checkpoint,reduce_lr])

In [None]:
model=tf.keras.models.load_model("./best_model/effnet.h5")

In [None]:
model.evaluate(val_X, val_y)



[0.013504959642887115, 0.9968253970146179]

In [None]:
pre=model.predict(val_X)



In [None]:
pre=[int(i>0.5) for i in pre]

  pre=[int(i>0.5) for i in pre]


In [None]:
f1_score(val_y, pre, average=None)

array([0.99638989, 0.99716714])

In [None]:
confusion_matrix(val_y,pre)

array([[138,   1],
       [  0, 176]])

#write predictions to csv

In [None]:
%cd ./test_imgs/

/content/drive/Shareddrives/dataton/test_imgs


In [None]:
photos = !ls

In [None]:
names=[]
for i in photos:
  n=i.split()
  names+=n

In [None]:
image_size = 224
folderPath = os.path.join('./test_imgs') #folder with test images

with open('./test_labels.csv', 'w') as f:

  for i in range(len(names)):
    img = plt.imread(os.path.join(folderPath,names[i]))
    img = cv2.resize(img,(image_size, image_size))
    labell=model.predict(np.expand_dims(img,0), verbose=0)
    print("processing image",i+1)
    labell=int(labell>0.5)
    f.write(names[i]+','+str(labell)+'\n')