#Import library

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import numpy as np
from skimage import exposure
from skimage import filters
import seaborn as sn
import pandas as pd
import os
import random
import shutil

#Pretraining

Pembagian dataset

In [None]:
classLabels = ['1000', '10000', '100000', '2000', '20000', '5000', '50000']
def transferBetweenFolders(source, dest, splitRate):   
    global sourceFiles
    sourceFiles=os.listdir(source)
    if(len(sourceFiles)!=0):
        transferFileNumbers=int(len(sourceFiles)*splitRate)
        transferIndex=random.sample(range(0, len(sourceFiles)), transferFileNumbers)
        for eachIndex in transferIndex:
            shutil.move(source+str(sourceFiles[eachIndex]), dest+str(sourceFiles[eachIndex]))
    else:
        print("No file moved. Source empty!")
        
def transferAllClassBetweenFolders(source, dest, splitRate):
    for label in classLabels:
        transferBetweenFolders('/content/drive/My Drive/TA/splitdata'+'/'+source+'/'+label+'/', 
                               '/content/drive/My Drive/TA/splitdata'+'/'+dest+'/'+label+'/', 
                               splitRate)

In [None]:
# os.chdir('/content/drive/My Drive/TA/splitdata')
# First, check if test folder is empty or not, if not transfer all existing files to train
# transferAllClassBetweenFolders('validation', 'train', 1.0)
# Now, split some part of train data into the test folders.
transferAllClassBetweenFolders('train', 'validation', 0.20)

Fungsi metrics

In [None]:
def my_metrics(y_true, y_pred):
    accuracy=accuracy_score(y_true, y_pred)
    precision=precision_score(y_true, y_pred,average='weighted')
    f1Score=f1_score(y_true, y_pred, average='weighted') 
    print("Accuracy  : {}".format(accuracy))
    print("Precision : {}".format(precision))
    print("f1Score : {}".format(f1Score))
    cm=confusion_matrix(y_true, y_pred)
    print(cm)
    return accuracy, precision, f1Score

Prepare lables

In [None]:
X=[]
Y=[]

def prepareNameWithLabels(folderName):
    sourceFiles=os.listdir('/content/drive/My Drive/TA/splitdata/train/'+folderName)
    for val in sourceFiles:
        X.append(val)
        if(folderName==classLabels[0]):
            Y.append(0)
        elif(folderName==classLabels[1]):
            Y.append(1)
        elif(folderName==classLabels[2]):
            Y.append(2)
        elif(folderName==classLabels[3]):
            Y.append(3)
        elif(folderName==classLabels[4]):
            Y.append(4)
        elif(folderName==classLabels[5]):
            Y.append(5)
        else:
            Y.append(6)

In [None]:
# Organize file names and class labels in X and Y variables
for label in classLabels:
  prepareNameWithLabels(label)      

X=np.asarray(X)
Y=np.asarray(Y)

Preprocess image function

In [None]:
def preprocess(image):
  # image = filters.median(image)
  # p2, p98 = np.percentile(image, (2, 98))
  # image = exposure.rescale_intensity(image, in_range=(p2, p98))
  # image = tf.cast(image, tf.float32)
  # image = tf.image.resize(image, (224, 224))
  # image = tf.keras.applications.mobilenet_v2.preprocess_input(image)
  # image = exposure.equalize_adapthist(image, clip_limit=0.03)
  # image = filters.gaussian(image, sigma=1)
  
  # image = image[None, ...]
  return image

Image data generator

In [None]:
image_datagen = ImageDataGenerator(
    # width_shift_range= 0.1, 
    # height_shift_range= 0.1,
    rotation_range=15,
    zoom_range=0.2,
    rescale=1./255,
    # preprocessing_function = preprocess,
    # shear_range = 0.2,
    # fill_mode = 'nearest'
    )

#Training

Create and compile model

In [None]:
# CREATE NEW MODEL
base_model = tf.keras.applications.MobileNetV2(
    # input_shape=(224, 224, 3),
    input_shape=(312, 416, 3),  
    include_top=False,
    weights='imagenet',
)

model = tf.keras.Sequential([
  base_model, 
  tf.keras.layers.GlobalMaxPooling2D(),
  tf.keras.layers.Dropout(0.1),
  tf.keras.layers.Dense(512, activation='sigmoid'),
  tf.keras.layers.Dense(128, activation='sigmoid'),
  tf.keras.layers.Dense(7, activation='softmax'),
])

# COMPILE NEW MODEL
model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.SGD(lr=0.01),
              metrics=['accuracy'])



Train with K-Folds

In [None]:
kf = KFold(n_splits=2, shuffle=True)
kf.get_n_splits(X, Y)
    
VALIDATION_ACCURACY = []
VALIDATION_LOSS = []
TRAINING_ACCURACY = []
TRAINING_LOSS = []

fold_var = 0

train_path = '/content/drive/My Drive/TA/splitdata/train'
val_path = '/content/drive/My Drive/TA/splitdata/validation'

for train_index, val_index in kf.split(X, Y):
  #First cut all images from validation to train (if any exists)
  transferAllClassBetweenFolders('validation', 'train', 1.0)
  fold_var += 1
  print("Results for fold",fold_var)
  X_train, X_val = X[train_index], X[val_index]
  Y_train, Y_val = Y[train_index], Y[val_index]
  # Move validation images of this fold from train folder to the validation folder
  for eachIndex in range(len(X_val)):
      classLabel=''
      if(Y_val[eachIndex]==0):
          classLabel=classLabels[0]
      elif(Y_val[eachIndex]==1):
          classLabel=classLabels[1]
      elif(Y_val[eachIndex]==2):
          classLabel=classLabels[2]
      elif(Y_val[eachIndex]==3):
          classLabel=classLabels[3]
      elif(Y_val[eachIndex]==4):
          classLabel=classLabels[4]
      elif(Y_val[eachIndex]==5):
          classLabel=classLabels[5]
      else:
          classLabel=classLabels[6]   
      #Then, copy the validation images to the validation folder
      shutil.move('/content/drive/My Drive/TA/splitdata/train/'+classLabel+'/'+X_val[eachIndex], 
                  '/content/drive/My Drive/TA/splitdata/validation/'+classLabel+'/'+X_val[eachIndex])

  train_generator = image_datagen.flow_from_directory(train_path,
                                                      target_size=(312, 416),
                                                      # target_size=(224, 224), 
                                                      batch_size=32,
                                                      shuffle=True,
                                                      subset="training",
                                                      class_mode='categorical')

  validation_generator = image_datagen.flow_from_directory(
      val_path,
      target_size=(312, 416),
      # target_size=(224, 224),
      batch_size=32,
      shuffle=True,
      # subset="validation",
      class_mode='categorical')
	
	# FIT THE MODEL
  history = model.fit(
    train_generator,
    # steps_per_epoch=50,
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=validation_generator,
    # validation_steps=20,
    validation_steps=len(validation_generator))
  
  VALIDATION_ACCURACY.append(history.history['val_accuracy'])
  VALIDATION_LOSS.append(history.history['val_loss'])
  TRAINING_ACCURACY.append(history.history['accuracy'])
  TRAINING_LOSS.append(history.history['loss'])

  predictions = model.predict_generator(validation_generator, verbose=1)
  yPredictions = np.argmax(predictions, axis=1)
  true_classes = validation_generator.classes
  
  # evaluate validation performance
  print("***Performance on Validation data***")    
  valAcc, valPrec, valFScore = my_metrics(true_classes, yPredictions)
  
  tf.keras.backend.clear_session()

No file moved. Source empty!
No file moved. Source empty!
No file moved. Source empty!
No file moved. Source empty!
No file moved. Source empty!
No file moved. Source empty!
Results for fold 1
Found 999 images belonging to 7 classes.
Found 667 images belonging to 7 classes.
Epoch 1/10
Epoch 2/10

Show chart

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(TRAINING_ACCURACY, label='Training Accuracy')
plt.plot(VALIDATION_ACCURACY, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(TRAINING_LOSS, label='Training Loss')
plt.plot(VALIDATION_LOSS, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,3.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

#Testing

In [None]:
test_dir = "/content/drive/My Drive/TA/splitdata/test"

test_generator = image_datagen.flow_from_directory(
    test_dir,
    target_size=(224,224),
    batch_size=32,
    shuffle = False,
    class_mode='categorical',
)


print("Model evaluate:")
results = model.evaluate(test_generator)

NameError: ignored

Show confussion matrix

In [None]:
Y_pred = model.predict(test_generator, len(test_generator))
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_generator.classes, y_pred)
print(cm)
df_cm = pd.DataFrame(cm, index = test_generator.class_indices, columns = test_generator.class_indices)
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True, cmap="OrRd")

# print('Classification Report')
# print(classification_report(test_generator.classes, y_pred))