In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install keras-tuner

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras import layers
from keras.models import Sequential
from keras.optimizers import SGD
from keras.models import load_model
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
import kerastuner as kt

In [None]:
#Constants

IMAGE_WIDTH= 128
IMAGE_HEIGHT= 128
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3
batch_size = 32
data_dir = "/content/drive/Shareddrives/A&D co./Conference Paper/Codes/2000_images"
checkpoint_filepath = "/content/drive/Shareddrives/A&D co./Conference Paper/Codes/CNN/"



results = []
img_gen = ImageDataGenerator(
rescale=1/255
)

earlystop = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_accuracy',
    mode = 'max',
    patience = 7
    verbose = 1,
    min_delta = 0.015
)

In [None]:
def evaluation(model,test_generator):
  index_to_cls = {v: k for k, v in test_generator.class_indices.items()}
  Y_pred = model.predict(test_generator)
  y_pred = np.argmax(Y_pred, axis=1)
  y_true = test_generator.classes
  y_pred_st = np.vectorize(index_to_cls.get)(y_pred)
  y_true_st = np.vectorize(index_to_cls.get)(test_generator.classes)
  roc = roc_auc_score(y_true, Y_pred[:,0])
  cm = confusion_matrix(y_true_st, y_pred_st, labels=np.unique(y_true_st)) 
  TP = cm[0][0]
  FP = cm[0][1]
  FN = cm[1][0]
  TN = cm[1][1]
  return [TP,FP,FN,TN,roc]

In [None]:
def build_model(hp):
  inputs = tf.keras.Input(shape=(IMAGE_SIZE[0],IMAGE_SIZE[1],3))
  x = inputs
  for i in range(hp.Int('conv_blocks', 3, 6)):
    filters = hp.Int('filters_' + str(i), 32, 256, step=32,default = 64)
    x = tf.keras.layers.Convolution2D(
        filters, kernel_size=(3, 3), padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)
    if hp.Choice('pooling_' + str(i), ['avg', 'max']) == 'max':
      x = tf.keras.layers.MaxPool2D()(x)
    else:
      x = tf.keras.layers.AvgPool2D()(x)
  if hp.Choice('Global vs Flatten', ['global', 'flatten']) == 'global':
     x = tf.keras.layers.GlobalAvgPool2D()(x)
  else:
      x = tf.keras.layers.Flatten()(x)
  x = tf.keras.layers.Dense(
      hp.Int('hidden_size', 32, 256, step=16, default=128),
      activation='relu')(x)
  x = tf.keras.layers.Dropout(
      hp.Float('dropout', 0, 0.5, step=0.1, default=0.5))(x)
  if hp.Choice('Sigm vs Softmax', ['softmax', 'sigmoid']) == 'softmax':
     outputs = tf.keras.layers.Dense(2, activation='softmax')(x)
     loss = 'sparse_categorical_crossentropy'
  else:
      outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
      loss = 'binary_crossentropy'

  model = tf.keras.Model(inputs, outputs)
  if hp.Choice('Adam vs SGD', ['adam', 'sgd']) == 'adam':
    model.compile(
      optimizer=tf.keras.optimizers.Adam(
        hp.Float('learning_rate_Adam', 1e-6, 1e-2, sampling='log')),
      loss=loss, 
      metrics=["accuracy",
                ])
  else:
        opt = SGD(lr=hp.Float('learning_rate_SGD', 1e-6, 1e-2, sampling='log'),momentum = hp.Choice('momentum',values = [0.9,0.8,0.7]))
        model.compile(optimizer=opt, loss=loss, metrics=["accuracy",
                                                                      ])
  return model

In [None]:
#Reading and Shuffling csv that contain filenames and label
df1 = pd.read_csv("/content/drive/Shareddrives/A&D co./Conference Paper/Codes/2000_labels.csv")
from sklearn.utils import shuffle
df2 = shuffle(df1, random_state = 15)
df2.reset_index(inplace = True, drop=True)

In [None]:
tuner = kt.Hyperband(build_model,
                     objective = 'val_accuracy', 
                     max_epochs = 50,
                     factor = 2,
                     project_name = 'HyperBand_CNN')

In [None]:
# 5-fold cross validaiton
for case in range(5):
  one,two,three,four,five= np.split(df2, [int(.20*len(df2)),int(.40*len(df2)),int(.60*len(df2)), int(.80*len(df2))])
  holdouts = [one,two,three,four,five]
  test = holdouts[case]
  frames = holdouts[:case] + holdouts[case+1:]
  df3 = pd.concat(frames)
  df4 = shuffle(df3, random_state = case*10)
  df4.reset_index(inplace = True, drop=True)
  train,validate = np.split(df4,[int(.75*len(df4))])

  train_generator = img_gen.flow_from_dataframe(
            train, 
            directory = data_dir,
            batch_size=batch_size,
            x_col='filename',
            y_col='label',
            target_size=IMAGE_SIZE,
            class_mode='binary',
            shuffle = True
    )
    
  validation_generator = img_gen.flow_from_dataframe(
            validate, 
            directory = data_dir,
            batch_size=1,
            x_col='filename',
            y_col='label',
            target_size=IMAGE_SIZE,
            class_mode='binary',
            shuffle = True
                )
  test_generator = img_gen.flow_from_dataframe(
            test, 
            directory = data_dir,
            batch_size=1,
            x_col='filename',
            y_col='label',
            target_size=IMAGE_SIZE,
            class_mode='binary',
            shuffle = False
    )
  tuner.search(train_generator, validation_data = validation_generator,verbose = 1, callbacks = [earlystop]) #checkpoint
  best_model = tuner.get_best_models(num_models=5)[0]
  best_model.save(checkpoint_filepath+"Model_CNN/best_model_"+str(case)+".h5")
  r = evaluation(best_model,test_generator)
  print(r)
  results.append(r)

In [None]:
new_result = []
for holdout in results:
  accuracy = (holdout[0]+holdout[3])/(holdout[0]+holdout[1]+holdout[2]+holdout[3])
  precision = holdout[0]/(holdout[0]+holdout[1])
  recall = holdout[0]/(holdout[0]+holdout[2])
  f1_score = (2*precision*recall)/(precision+recall)
  holdout.append(accuracy)
  holdout.append(precision)
  holdout.append(recall)
  holdout.append(f1_score)
  new_results.append(holdout)
rs = pd.DataFrame(new_result, columns =['TP','FP','FN','TN','AUC ROC','Accuracy','Precision','Recall','F1_score'])
rs.to_csv(path_or_buf = checkpoint_filepath+"CNN_CV_Results.csv"  ,index=False)