In [None]:
%cd /ai-adulteration-detection

In [None]:
import numpy as np
import os
from os.path import isfile
import keras
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Bidirectional, LSTM, Dropout, Activation, GRU
from keras.layers import Conv2D, concatenate, MaxPooling2D, Flatten, Embedding, Lambda
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras import backend as K
from keras.utils import np_utils
from keras.optimizers import Adam, RMSprop
from keras import regularizers
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [None]:
train_data_path = "./npz_files/shuffled_train_arr_MFCCs.npz"
train_data = np.load(train_data_path, allow_pickle = True)
arr_0_train = train_data['arr_0']
arr_1_train = train_data['arr_1']
print(arr_0_train.shape, arr_1_train.shape)

valid_data_path = "./npz_files/shuffled_valid_arr_MFCCs.npz"
valid_data = np.load(valid_data_path, allow_pickle = True)
arr_0_valid = valid_data['arr_0']
arr_1_valid = valid_data['arr_1']
print(arr_0_valid.shape, arr_1_valid.shape)

X = np.concatenate((arr_0_train, arr_0_valid))
y = np.concatenate((arr_1_train, arr_1_valid))
print(X.shape, y.shape)

(8010, 173, 40) (8010,)
(1001, 173, 40) (1001,)
(9011, 173, 40) (9011,)


In [None]:
np.unique(y, return_counts = True)

(array([0, 1, 2, 3]), array([2222, 2271, 2266, 2252]))

In [None]:
nr = 30 # number of runs

### Parallel CNN-RNN MODEL

In [None]:
def parallel_conv_recurrent_model_build(model_input):
    print('Building model...')
    layer = model_input
    
    ### Convolutional Blocks
    conv_1 = Conv2D(filters = nb_filters1, kernel_size = ksize, strides = 1,
                      padding = 'valid', activation = 'relu', name = 'conv_1')(layer)
    pool_1 = MaxPooling2D(pool_size_1)(conv_1)
    
    conv_2 = Conv2D(filters = nb_filters2, kernel_size = ksize, strides = 1,
                      padding = 'valid', activation = 'relu', name = 'conv_2')(pool_1)
    pool_2 = MaxPooling2D(pool_size_2)(conv_2)
    
    conv_3 = Conv2D(filters = nb_filters3, kernel_size = ksize, strides=1,
                      padding = 'valid', activation = 'relu', name = 'conv_3')(pool_2)
    pool_3 = MaxPooling2D(pool_size_3)(conv_3)
        
    flatten1 = Flatten()(pool_3)
    
    ### Recurrent Block
    
    # Pooling layer
    pool_lstm1 = MaxPooling2D(pool_size_4, name = 'pool_lstm')(layer)
    
    # Embedding layer
    squeezed = Lambda(lambda x: K.squeeze(x, axis = -1))(pool_lstm1)
    
    # Bidirectional GRU
    lstm = LSTM(LSTM_COUNT, return_sequences = False)(squeezed)
   
    ### Concat Output
    concat = concatenate([flatten1, lstm], axis = -1, name = 'concat')

    ### Dense Layer
    dense_layer = Dense(64, kernel_regularizer = regularizers.l2(0.001), name = 'dense1')(concat)
    drop_layer = Dropout(0.4)(dense_layer)
    
    ### Softmax Output
    output = Dense(num_classes, activation = 'softmax', name = 'preds')(drop_layer)
    
    model_output = output
    model = Model(model_input, model_output)
    
    opt = Adam(lr=0.001)
    model.compile(loss = 'categorical_crossentropy',
                  optimizer = opt,
                  metrics = ['accuracy'])
    
    # print(model.summary())
    return model

In [None]:
def train_model(x, y, run):
    
    n_frequency = x.shape[2]
    n_frames = x.shape[1]

    x = np.expand_dims(x, axis = -1)
          
    input_shape = (n_frames, n_frequency, 1)
    model_input = Input(input_shape, name = 'input')
    
    model = parallel_conv_recurrent_model_build(model_input)
    
    model_filepath = "./models_30runs/Parallel_CNN_RNN_MFCCs_run_" + str(run + 1) + "_model.h5"
    checkpoint_callback = ModelCheckpoint(filepath = model_filepath, monitor = 'accuracy', verbose = 1,
                                          save_best_only = True, mode = 'max')
    
    reducelr_callback = ReduceLROnPlateau(monitor = 'accuracy', factor = 0.5, patience = 10, min_delta = 0.01, verbose = 1)
    callbacks_list = [checkpoint_callback, reducelr_callback]

    # Fit the model and get training history
    print('Training...')
    history = model.fit(x, y, batch_size = BATCH_SIZE, epochs = EPOCH_COUNT,
                        verbose = 2, callbacks = callbacks_list)
    # verbose: 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.

    return model, history

### TRAINING

In [None]:
num_classes = np.amax(y) + 1
n_features = X.shape[2]
n_time = X.shape[1]

nb_filters1 = 16 
nb_filters2 = 32 
nb_filters3 = 64
ksize = (3, 1)
pool_size_1 = (2, 2) 
pool_size_2 = (2, 2)
pool_size_3 = (2, 2)
pool_size_4 = (2, 1)
LSTM_COUNT = 96

BATCH_SIZE = 64
EPOCH_COUNT = 25

from keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
from datetime import datetime

In [None]:
for run in range(nr):
  
  print(run + 1)
  X = np.concatenate((arr_0_train, arr_0_valid))
  y = np.concatenate((arr_1_train, arr_1_valid))

  train_accuracy_arr = np.empty((0, EPOCH_COUNT))
  train_loss_arr = np.empty((0, EPOCH_COUNT))

  training_times = np.empty((0))
  
  start = datetime.now()

  y = to_categorical(y, num_classes = 4)
  
  model, history  = train_model(X, y, run) 
    
  train_accuracy_arr = np.append(train_accuracy_arr, [history.history['accuracy']], axis = 0)
  train_loss_arr = np.append(train_loss_arr, [history.history['loss']], axis = 0)

  duration = datetime.now() - start
  print("Training completed in time:", duration)

  training_times = np.append(training_times, [duration])
    
  X = np.expand_dims(X, axis = -1)
  score = model.evaluate(X, y, verbose = 1)
  accuracy = 100 * score[1]
  print("Training accuracy: %.4f%%" %accuracy)

  %cd ./npz_files_results_30runs
  np.savez("training_times_MFCCs_for_test_" + str(run + 1), training_times)
  np.savez("train_accuracy_arr_MFCCs_for_test_" + str(run + 1), train_accuracy_arr)
  np.savez("train_loss_arr_MFCCs_for_test_" + str(run + 1), train_loss_arr)
  %cd /ai-adulteration-detection


In [None]:
training_times

array([datetime.timedelta(0, 743, 35236)], dtype=object)

### TEST THE MODEL

In [None]:
# Compile the model
num_classes = y.shape[1]
n_features = X.shape[2]
n_time = X.shape[1]

nb_filters1 = 16 
nb_filters2 = 32 
nb_filters3 = 64
ksize = (3, 1)
pool_size_1 = (2, 2) 
pool_size_2 = (2, 2)
pool_size_3 = (2, 2)
pool_size_4 = (2, 1)
LSTM_COUNT = 96

BATCH_SIZE = 64
EPOCH_COUNT = 25

n_frequency = X.shape[2]
n_frames = X.shape[1]
    
input_shape = (n_frames, n_frequency, 1)
model_input = Input(input_shape, name = 'input')
    
model = parallel_conv_recurrent_model_build(model_input)

Building model...


In [None]:
test_data_path = "./npz_files/shuffled_test_arr_MFCCs.npz"
test_data = np.load(test_data_path, allow_pickle = True)
arr_0_test = test_data['arr_0']
arr_1_test = test_data['arr_1']

X_test = arr_0_test
print(X_test.shape) 
from keras.utils import to_categorical
y_test = to_categorical(arr_1_test)
print(y_test.shape)

(1002, 173, 40)
(1002, 4)


In [None]:
np.unique(arr_1_test, return_counts = True)
X_test = np.expand_dims(X_test, axis = -1)

In [None]:
accuracies = []
for run in range(nr):
  model_filepath = "./models_30runs/Parallel_CNN_RNN_MFCCs_run_" + str(run + 1) + "_model.h5"
  model.load_weights(model_filepath) 

  score = model.evaluate(X_test, y_test, verbose = 1)
  accuracy = 100 * score[1]
  accuracies.append(accuracy)
  print("Training accuracy for test data: %.4f%%" %accuracy)  

Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training ac

### TEST FOR OrganicButter-Smartphone

In [None]:
# Compile the model
num_classes = y.shape[1]
n_features = X.shape[2]
n_time = X.shape[1]

nb_filters1 = 16 
nb_filters2 = 32 
nb_filters3 = 64
ksize = (3, 1)
pool_size_1 = (2, 2) 
pool_size_2 = (2, 2)
pool_size_3 = (2, 2)
pool_size_4 = (2, 1)
LSTM_COUNT = 96

BATCH_SIZE = 64
EPOCH_COUNT = 25

n_frequency = X.shape[2]
n_frames = X.shape[1]
    
input_shape = (n_frames, n_frequency, 1)
model_input = Input(input_shape, name = 'input')
    
model = parallel_conv_recurrent_model_build(model_input)

Building model...


In [None]:
test_data_path = "./npz_files/OrganicButter_smartphone_test_arr_MFCCs.npz"
test_data = np.load(test_data_path, allow_pickle = True)
arr_0_test = test_data['arr_0']
arr_1_test = test_data['arr_1']

X_test_2 = arr_0_test
print(X_test_2.shape) 
from keras.utils import to_categorical
y_test_2 = to_categorical(arr_1_test, num_classes = 4)
print(y_test_2.shape)

In [None]:
X_test_2 = np.expand_dims(X_test_2, axis = -1)

accuracies = []
for run in range(nr):
  model_filepath = "./models_30runs/Parallel_CNN_RNN_MFCCs_run_" + str(run + 1) + "_model.h5"
  model.load_weights(model_filepath) 

  score = model.evaluate(X_test_2, y_test_2, verbose = 1)
  accuracy = 100 * score[1]
  accuracies.append(accuracy)
  print("Training accuracy for test data: %.4f%%" %accuracy)

print(accuracies)

Training accuracy for test data: 4.1361%
Training accuracy for test data: 15.5536%
Training accuracy for test data: 2.2404%
Training accuracy for test data: 3.1883%
Training accuracy for test data: 0.0000%
Training accuracy for test data: 4.7824%
Training accuracy for test data: 7.9707%
Training accuracy for test data: 0.0000%
Training accuracy for test data: 0.8186%
Training accuracy for test data: 0.0431%
Training accuracy for test data: 21.7579%
Training accuracy for test data: 17.2770%
Training accuracy for test data: 0.9048%
Training accuracy for test data: 4.0500%
Training accuracy for test data: 2.1542%
Training accuracy for test data: 4.2223%
Training accuracy for test data: 3.7915%
Training accuracy for test data: 0.9479%
Training accuracy for test data: 6.9798%
Training accuracy for test data: 0.0000%
Training accuracy for test data: 4.2654%
Training accuracy for test data: 11.7191%
Training accuracy for test data: 1.4218%
Training accuracy for test data: 0.0000%
Training acc

In [None]:
accuracies = [4.13614809513092, 15.553641319274902, 2.24041361361742, 3.188280761241913, 0.0, 4.782421514391899, 7.970702648162842, 0.0, 0.8186127059161663, 0.04308487696107477, 21.757863461971283, 17.277035117149353, 0.9047823958098888, 4.049978405237198, 2.1542439237236977, 4.222317785024643, 3.7914693355560303, 0.9478673338890076, 6.979750096797943, 0.0, 4.265403002500534, 11.719086766242981, 1.421800907701254, 0.0, 14.261093735694885, 4.351572692394257, 6.635071337223053, 0.0, 1.2494614347815514, 1.03403702378273]
print(len(accuracies))
print(np.mean(accuracies))

30
4.858538009672581
