> ### **OPTIMAL DATA RANGE**
---
*   ClipLimit: 2-3
*   Learning rate: 0.0001 < lr < 0.1
*   Batch size: 32, 64, 128
*   Dropout rate: 0,5 - 0,8
*   Weight: auto

> ### **STRUCTURE**
---

*   Optimizer: Adam
*   Activation function: ReLu (training), Softmax (final)
*   Pooling: _Max Pooling_
*   Kernel size: 130x242
*   Training/testing/validation: 70/10/20
*   Epoch: 30-65
*   Class: 7
*   Weight: ImageNet
*   Loss: Cross-Entropy

 ## **DATA PREPARATION**
---
menyiapkan data (termasuk penerapan image enhancement)

In [None]:
pip install -r "Rupiah-PCR-Using-Image-Processing-and-CNN\requirements.txt"

In [1]:
import os
import cv2
import PIL
from PIL import Image
import matplotlib.pyplot as plt
from sklearn import preprocessing, neighbors
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
dataset = r"Rupiah-PCR-Using-Image-Processing-and-CNN\datasetUang"

kelas = os.listdir(path)
print("Classes list = ",kelas,"\n\nNumber of classes = ", len(kelas))

In [25]:
import splitfolders
splitfolders.ratio(dataset, output="datasetSplit", seed=1307, ratio=(0.7, 0.2, 0.1)) #70% : 10% : 20%

Copying files: 700 files [00:10, 63.86 files/s]


In [26]:
dataset = "datasetSplit"
#daftar_file(dataset)

In [3]:
import pandas as pd
import random
from IPython.display import Image
import matplotlib.pyplot as plt
import seaborn as sns

from keras.utils import plot_model
from sklearn.metrics import classification_report
from collections import Counter
import tensorflow as tf

import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications.vgg16 import VGG16

from keras import Model, layers
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from keras.layers import GlobalMaxPooling2D, GlobalAveragePooling2D, Dropout, Dense, Input, Conv2D, MaxPooling2D, Flatten,MaxPooling3D

In [4]:
IMAGE_SIZE = (130, 242, 3)
dataset="datasetSplit"
train_pred_test_folders = os.listdir(dataset)
train_path = dataset+'/train'
test_path = dataset+'/test'
val_path = dataset+'/val'

In [None]:
quantity_train = {} 
quantity_test = {}
quantity_val = {}
for folder in os.listdir(train_path):
    quantity_train[folder] = len(os.listdir(train_path+'/'+folder))

for folder in os.listdir(test_path):
    quantity_test[folder] = len(os.listdir(test_path+'/'+folder))

for folder in os.listdir(val_path):
    quantity_val[folder] = len(os.listdir(val_path+'/'+folder))

quantity_train = pd.DataFrame(list(quantity_train.items()), index=range(0,len(quantity_train)), columns=['class','count'])
quantity_test = pd.DataFrame(list(quantity_test.items()), index=range(0,len(quantity_test)), columns=['class','count'])
quantity_val = pd.DataFrame(list(quantity_val.items()), index=range(0,len(quantity_val)), columns=['class','count'])

figure, ax = plt.subplots(1,2,figsize=(20,5))
sns.barplot(x='class',y='count',data=quantity_train,ax=ax[0])
sns.barplot(x='class',y='count',data=quantity_test,ax=ax[1])

print("Number of training set:", sum(quantity_train['count'].values))
print("Number of testing set:",sum(quantity_test['count'].values))
print("Number of prediction set:",sum(quantity_val['count'].values))
print("Total:",sum(quantity_train['count'].values)+sum(quantity_test['count'].values)+sum(quantity_val['count'].values))

plt.show()

In [6]:
def save_history(i, history, model_name):
    #convert the history.history dict to a pandas DataFrame:     
    hist_df = pd.DataFrame(history.history) 

    # save ke json 
    hist_json_file = 'CNN_Model'+str(i)+'_noImgPro/'+model_name+'_history.json' 
    with open(hist_json_file, mode='w') as f:
        hist_df.to_json(f)

    # or save ke csv 
    hist_csv_file = 'CNN_Model'+str(i)+'_noImgPro/'+model_name+'_history.csv'
    with open(hist_csv_file, mode='w') as f:
        hist_df.to_csv(f)
        
def plot_accuracy_from_history(history, isinception=False):
  try:
    color = sns.color_palette()
    if(isinception == False):
        acc = history.history['acc']
        val_acc = history.history['val_acc']
    else:
        acc = history.history['accuracy']
        val_acc = history.history['val_accuracy']
    

    epochs = range(len(acc))

    sns.lineplot(epochs, acc, label='Training Accuracy')
    sns.lineplot(epochs, val_acc,label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.figure()
    plt.show()
  except TypeError:
    pass
    
def plot_loss_from_history(history):
  try:
    color = sns.color_palette()
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(len(loss))
    
    sns.lineplot(epochs, loss,label='Training Loss')
    sns.lineplot(epochs, val_loss, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.figure()
    plt.show()
  except TypeError:
    pass
    
def do_history_stuff(i, history, history_file_name, isinception=False):
    save_history(i, history, history_file_name)
    plot_accuracy_from_history(history, isinception)
    plot_loss_from_history(history)

In [7]:
def show_few_images(number_of_examples=2, predict_using_model=None):
    figure1, ax1 = plt.subplots(number_of_examples,len(os.listdir(train_path)), figsize=(20,4*number_of_examples))
    ax1 = ax1.reshape(-1)
    axoff_fun = np.vectorize(lambda ax:ax.axis('off'))
    axoff_fun(ax1)
    axs = 0
    for i, folder in enumerate(os.listdir(train_path)):
        image_ids = os.listdir(os.path.join(train_path,folder))
        for j in [random.randrange(0, len(image_ids)) for i in range(0,number_of_examples)]:
            display = plt.imread(os.path.join(train_path,folder,image_ids[j]))
            plt.axis('off')
            ax1[axs].imshow(display)
            title = 'True:'+folder
            if(predict_using_model):
                predicted_classname = inv_map_classes[np.argmax(vgg16_final_model.predict(np.array([display])))]
                title = title+'\nPredict :'+predicted_classname
            ax1[axs].set_title(title)
            axs=axs+1

In [8]:
tf.keras.backend.clear_session()

In [9]:
import os
def saveCNN_Model(i):
  try: 
    os.mkdir("CNN_Model"+str(i)+"_noImgPro") 
  except OSError as error: 
    print(error)
  saveModel(i)

In [10]:
def saveModel(i):
  try: 
    os.mkdir("CNN_Model"+str(i)+"_noImgPro/model") 
  except OSError as error: 
    print(error)

 ## **VGG-16**
---
Visual Geometry Group (16 layers)

#### **<font color='Pink'>Data Generator</font>**

In [11]:
#normalisasi
train_datagen = ImageDataGenerator(rescale = 1.0/255.,shear_range=0.2,zoom_range=0.2)
train_generator16 = train_datagen.flow_from_directory(train_path,
                                                    batch_size=32,
                                                    shuffle=True,
                                                    class_mode='categorical',
                                                    target_size=(130, 242))

validation_datagen = ImageDataGenerator(rescale = 1.0/255.)

test_datagen = ImageDataGenerator(rescale = 1.0/255.,shear_range=0.2,zoom_range=0.2)
test_generator16 = test_datagen.flow_from_directory(test_path, target_size=(130, 242),
    batch_size=1,
    shuffle=True,
    class_mode='categorical')

validation_generator16 = validation_datagen.flow_from_directory(val_path, shuffle=True, batch_size=1, class_mode='categorical', target_size=(130, 242))

Found 490 images belonging to 7 classes.
Found 70 images belonging to 7 classes.
Found 140 images belonging to 7 classes.


In [12]:
inv_map_classes = {v: k for k, v in validation_generator16.class_indices.items()}
#print(validation_generator16.class_indices)
print(inv_map_classes)

{0: '1000', 1: '10000', 2: '100000', 3: '2000', 4: '20000', 5: '5000', 6: '50000'}


#### **<font color='Pink'>Hyperparameter</font>**

In [13]:
import itertools
import random

array = [['ep1','ep2','ep3','ep4'], ['lr1', 'lr2', 'lr3', 'lr4'], [32, 64, 128], ['dr1', 'dr2', 'dr3', 'dr4']]

parameter = []
lr_values = array[1][1:]  

for _ in range(1):
    updated_array = [array[0], ['lr1'] + lr_values, array[2], array[3]]
    parameter.extend(list(itertools.product(*updated_array)))

parameter = [
    [
        random.randint(30, 38) if val == 'ep1' else
        random.randint(39, 47) if val == 'ep2' else
        random.randint(48, 56) if val == 'ep3' else
        random.randint(57, 65) if val == 'ep4' else

        random.uniform(0.0001, 0.0250249) if val == 'lr1' else
        random.uniform(0.025025, 0.050049) if val == 'lr2' else
        random.uniform(0.05005, 0.0750749) if val == 'lr3' else
        random.uniform(0.075075, 0.1) if val == 'lr4' else

        random.uniform(0.5,0.5749) if val == 'dr1' else
        random.uniform(0.575,0.649) if val == 'dr2' else
        random.uniform(0.65,0.7249) if val == 'dr3' else
        random.uniform(0.725,0.CNN_Model) if val == 'dr4' else

        val
        for val in combination
    ]
    for combination in parameter
]

In [None]:
for combination in parameter:
    print(combination)

#### **<font color='Pink'>Model Training</font>**

In [14]:
from keras.layers import BatchNormalization
def vgg16_training(i, vgg_epoch, learning_rate, batch_size, dropout_rate):
  vgg16_model = VGG16(pooling='max', weights='imagenet', include_top=False, input_shape=(130,242,3), classes=7) #defaultnya AVERAGE POOLING
  for layers in vgg16_model.layers:
              layers.trainable=False

  last_output_vgg16 = vgg16_model.layers[-1].output
  vgg_16 = Flatten()(last_output_vgg16) #awalnya di bawah last output
  vgg_16 = BatchNormalization()(vgg_16)
  vgg_16 = Dense(128, activation = 'relu')(vgg_16)
  vgg_16 = BatchNormalization()(vgg_16)
  vgg_16 = Dropout(dropout_rate)(vgg_16)

  #vgg_16 = Dense(64, activation = 'relu')(vgg_16)
  #vgg_16 = Dropout(dropout_rate)(vgg_16)
  vgg_16 = Dense(7, activation = 'softmax')(vgg_16)
  vgg16_final_model = Model(vgg16_model.input, vgg_16)
  
  opt = Adam(learning_rate=learning_rate)
  vgg16_final_model.compile(loss = 'categorical_crossentropy', optimizer= opt, metrics=['acc'])
  #vgg16_final_model.summary()

  #plot_model(model=vgg16_final_model, show_shapes=True)

  train_generator16.batch_size = batch_size

  vgg16_filepath = 'CNN_Model'+str(i)+'_noImgPro/model/vgg_16_'+str(i)+'-saved-model-{epoch:02d}-acc-{val_acc:.2f}.hdf5'
  vgg16_checkpoint = tf.keras.callbacks.ModelCheckpoint(vgg16_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
  vgg16_history = vgg16_final_model.fit(train_generator16, epochs = vgg_epoch, validation_data = validation_generator16,callbacks=[vgg16_checkpoint,early_stopping],verbose=1)

  do_history_stuff(i, vgg16_history, 'vgg16_model')
  save_vgg16(i, vgg16_final_model)

  predictionTest(i, vgg16_final_model)

  return(vgg16_final_model)

In [15]:
def save_vgg16(i, vgg16_final_model):
  vgg16_final_model.save(('CNN_Model'+str(i)+'_noImgPro/vgg16-model'+str(i)+'.h5'))

#### **<font color='Pink'>Evaluation</font>**

In [16]:
def acc_plot(vgg16_history):
  plt.figure(figsize=(10, 8))
  plt.plot(vgg16_history.history['acc'], label='train acc')
  plt.plot(vgg16_history.history['val_acc'], label='val acc')
  plt.legend()
  plt.title('Accuracy')
  plt.show()

In [17]:
def loss_plot(vgg16_history):
  plt.figure(figsize=(10, 8))
  plt.plot(vgg16_history.history['loss'], label='train loss')
  plt.plot(vgg16_history.history['val_loss'], label='val loss')
  plt.legend()
  plt.title('Loss')
  plt.show()

In [18]:
def predictionTest(i, vgg16_final_model):
  true_value = []
  vgg_pred = []
  for folder in os.listdir(test_path):
      test_image_ids = os.listdir(os.path.join(test_path,folder))
      
      for image_id in test_image_ids[:int(len(test_image_ids))]:
          path = os.path.join(test_path,folder,image_id)
          #print(path)
          true_value.append(test_generator16.class_indices[folder])
          img = cv2.resize(cv2.imread(path),(242,130))
          img_normalized = img/255
          #vgg
          vgg16_image_prediction = np.argmax(vgg16_final_model.predict(np.array([img_normalized]), verbose = 0)) #verbose biar gak ngeprint
          vgg_pred.append(vgg16_image_prediction)

  print("\n")
  clf_report(i, true_value, vgg_pred)

In [19]:
arr_accuracy16 = []
from sklearn.metrics import confusion_matrix
import itertools
#from mlxtend.plotting import plot_confusion_matrix
def clf_report(k, true_value, model_pred):
    classes = validation_generator16.class_indices.keys()
    TP_count = [true_value[i] == model_pred[i] for i in range(len(true_value))]
    model_accuracy = np.sum(TP_count)/len(TP_count)
    print('Model Accuracy', model_accuracy)
    arr_accuracy16.append(model_accuracy)
    plt.figure(figsize=(7,7))
    cm = confusion_matrix(true_value,model_pred)
    plt.imshow(cm,interpolation='nearest',cmap=plt.cm.viridis)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    thresh = cm.max()*0.8
    for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
        plt.text(j,i,cm[i,j],
                horizontalalignment="center",
                color="black" if cm[i,j] > thresh else "white")
        pass
    
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    pass
    
    plt.savefig("CNN_Model"+str(k)+"_noImgPro/conf_matrix"+str(k)+".png")
    print(classification_report(true_value, model_pred, target_names = list(classes)))

In [20]:
resultTable = pd.DataFrame(columns=['Epoch', 'Learning Rate', 'Batch Size', 'Dropout Rate','Accuracy'])
print(resultTable)

Empty DataFrame
Columns: [Epoch, Learning Rate, Batch Size, Dropout Rate, Accuracy]
Index: []


#### **<font color='Pink'>Code Running</font>**

In [None]:
j = 0
for i in range (j,len(parameter)):
  vgg_epoch = (parameter[i][0])
  learning_rate = (parameter[i][1])
  batch_size = (parameter[i][2])
  dropout_rate = (parameter[i][3])
  i=i+1
  saveCNN_Model(i)
  print("CNN_Model",i,"↓")
  print("HYPERPARAMETER".center(100,"─"))
  print("vgg epoch:",vgg_epoch)
  print("learning rate:",learning_rate)
  print("batch size:",batch_size)
  print("dropout rate:",dropout_rate)
  print("".center(100,"─"))
  vgg16_training(i, vgg_epoch, learning_rate, batch_size, dropout_rate)
  new_row = {'Epoch': vgg_epoch, 'Learning Rate': learning_rate, 'Batch Size': batch_size, 'Dropout Rate': dropout_rate, 'Accuracy': arr_accuracy16[-1]}
  resultTable = resultTable.append(new_row, ignore_index=True)
  resultTable.index = resultTable.index + (j+1)
  resultTable.to_excel("resultTable.xlsx")
  print("".center(100,"─"))