In [None]:
%cd /ai-adulteration-detection

In [None]:
import numpy as np
import os
from os.path import isfile
import keras
from keras.models import Sequential, Model
from keras.layers import Input, Dense, TimeDistributed, LSTM, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, Flatten, Conv2D, BatchNormalization, Lambda
from keras.layers.advanced_activations import ELU
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras import backend
from keras.utils import np_utils
from keras.optimizers import Adam, RMSprop
from keras import regularizers
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [None]:
train_data_path = "./npz_files/shuffled_train_arr_MFCCs.npz"
train_data = np.load(train_data_path, allow_pickle = True)
arr_0_train = train_data['arr_0']
arr_1_train = train_data['arr_1']
print(arr_0_train.shape, arr_1_train.shape)

valid_data_path = "./npz_files/shuffled_valid_arr_MFCCs.npz"
valid_data = np.load(valid_data_path, allow_pickle = True)
arr_0_valid = valid_data['arr_0']
arr_1_valid = valid_data['arr_1']
print(arr_0_valid.shape, arr_1_valid.shape)

X = np.concatenate((arr_0_train, arr_0_valid))
y = np.concatenate((arr_1_train, arr_1_valid))
print(X.shape, y.shape) 

(8010, 173, 40) (8010,)
(1001, 173, 40) (1001,)
(9011, 173, 40) (9011,)


In [None]:
np.unique(y, return_counts = True)

(array([0, 1, 2, 3]), array([2222, 2271, 2266, 2252]))

In [None]:
nr = 30 # number of runs

### CRNN MODEL

In [None]:
def conv_recurrent_model_build(model_input):
    print('Building model...')
    layer = model_input
    
    ### 3 1D Convolution Layers
    for i in range(N_LAYERS):
        
        layer = Conv1D(
                filters = CONV_FILTER_COUNT,
                kernel_size = FILTER_LENGTH,
                kernel_regularizer = regularizers.l2(L2_regularization),
                name='convolution_' + str(i + 1))(layer)
        layer = BatchNormalization(momentum = 0.9)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling1D(2)(layer)
        layer = Dropout(0.4)(layer)

    ### LSTM Layer
    layer = LSTM(LSTM_COUNT, return_sequences = False)(layer)
    layer = Dropout(0.4)(layer)
    
    ### Dense Layer
    layer = Dense(NUM_HIDDEN, kernel_regularizer = regularizers.l2(L2_regularization), name = 'dense1')(layer)
    layer = Dropout(0.4)(layer)
    
    ### Softmax Output
    layer = Dense(num_classes)(layer)
    layer = Activation('softmax', name = 'output_realtime')(layer)
    model_output = layer
    model = Model(model_input, model_output)
    
    opt = Adam(lr=0.001)
    model.compile(loss = 'categorical_crossentropy',
                  optimizer = opt,
                  metrics = ['accuracy'])
    
    # print(model.summary())
    return model

In [None]:
def train_model(x, y, run):
    
    n_features = x.shape[2]
    input_shape = (None, n_features)
    model_input = Input(input_shape, name = 'input')
    
    model = conv_recurrent_model_build(model_input)

    model_filepath = "./models_30runs/CRNN_MFCCs_run_" + str(run + 1) + "_model.h5"
    checkpoint_callback = ModelCheckpoint(filepath = model_filepath, monitor = 'accuracy', verbose = 1,
                                          save_best_only = True, mode = 'max')
    
    reducelr_callback = ReduceLROnPlateau(monitor = 'accuracy', factor = 0.5, patience = 10, min_delta = 0.01, verbose = 1)
    callbacks_list = [checkpoint_callback, reducelr_callback]

    # Fit the model and get training history
    print('Training...')
    history = model.fit(x, y, batch_size = BATCH_SIZE, epochs = EPOCH_COUNT,
                        verbose = 2, callbacks = callbacks_list)
    # verbose: 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.

    return model, history

### TRAINING

In [None]:
num_classes = np.amax(y) + 1
n_features = X.shape[2]
n_time = X.shape[1]

N_LAYERS = 3
FILTER_LENGTH = 5
CONV_FILTER_COUNT = 56
BATCH_SIZE = 64
LSTM_COUNT = 96
EPOCH_COUNT = 50
NUM_HIDDEN = 64
L2_regularization = 0.001

from keras.utils import to_categorical
from sklearn.model_selection import StratifiedKFold
from datetime import datetime

In [None]:
for run in range(nr):
  
  print(run + 1)
  X = np.concatenate((arr_0_train, arr_0_valid))
  y = np.concatenate((arr_1_train, arr_1_valid))

  train_accuracy_arr = np.empty((0, EPOCH_COUNT))
  train_loss_arr = np.empty((0, EPOCH_COUNT))

  training_times = np.empty((0))

  start = datetime.now()

  y = to_categorical(y, num_classes = 4)

  model, history  = train_model(X, y, run) 

  train_accuracy_arr = np.append(train_accuracy_arr, [history.history['accuracy']], axis = 0)
  train_loss_arr = np.append(train_loss_arr, [history.history['loss']], axis = 0)

  duration = datetime.now() - start
  print("Training completed in time:", duration)

  training_times = np.append(training_times, [duration])
    
  score = model.evaluate(X, y, verbose = 1)
  accuracy = 100 * score[1]
  print("Training accuracy: %.4f%%" %accuracy)

  %cd ./npz_files_results_30runs
  np.savez("training_times_MFCCs_for_test_" + str(run + 1), training_times)
  np.savez("train_accuracy_arr_MFCCs_for_test_" + str(run + 1), train_accuracy_arr)
  np.savez("train_loss_arr_MFCCs_for_test_" + str(run + 1), train_loss_arr)
  %cd /ai-adulteration-detection


In [None]:
training_times

array([datetime.timedelta(0, 275, 389870)], dtype=object)

### TEST THE MODEL

In [None]:
# Compile the model
num_classes = y.shape[1]
n_features = X.shape[2]
n_time = X.shape[1]

N_LAYERS = 3
FILTER_LENGTH = 5
CONV_FILTER_COUNT = 56
BATCH_SIZE = 64
LSTM_COUNT = 96
EPOCH_COUNT = 50
NUM_HIDDEN = 64
L2_regularization = 0.001

n_features = X.shape[2]
input_shape = (None, n_features)
model_input = Input(input_shape, name = 'input')

model = conv_recurrent_model_build(model_input)

Building model...


In [None]:
test_data_path = "./npz_files/shuffled_test_arr_MFCCs.npz"
test_data = np.load(test_data_path, allow_pickle = True)
arr_0_test = test_data['arr_0']
arr_1_test = test_data['arr_1']

X_test = arr_0_test
print(X_test.shape) 
from keras.utils import to_categorical
y_test = to_categorical(arr_1_test, num_classes = 4)
print(y_test.shape)

(1002, 173, 40)
(1002, 4)


In [None]:
np.unique(arr_1_test, return_counts = True)

(array([0, 1, 2, 3]), array([268, 236, 243, 255]))

In [None]:
accuracies = []
for run in range(nr):
  model_filepath = "./models_30runs/CRNN_MFCCs_run_" + str(run + 1) + "_model.h5"
  model.load_weights(model_filepath) 
  
  score = model.evaluate(X_test, y_test, verbose = 1)
  accuracy = 100 * score[1]
  accuracies.append(accuracy)
  print("Training accuracy for test data: %.4f%%" %accuracy)

Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 100.0000%
Training ac

### TEST FOR OrganicButter-Smartphone

In [None]:
# Compile the model
num_classes = y.shape[1]
n_features = X.shape[2]
n_time = X.shape[1]

N_LAYERS = 3
FILTER_LENGTH = 5
CONV_FILTER_COUNT = 56
BATCH_SIZE = 64
LSTM_COUNT = 96
EPOCH_COUNT = 50
NUM_HIDDEN = 64
L2_regularization = 0.001

n_features = X.shape[2]
input_shape = (None, n_features)
model_input = Input(input_shape, name = 'input')

model = conv_recurrent_model_build(model_input)

Building model...


In [None]:
test_data_path = "./npz_files/OrganicButter_smartphone_test_arr_MFCCs.npz"
test_data = np.load(test_data_path, allow_pickle = True)
arr_0_test = test_data['arr_0']
arr_1_test = test_data['arr_1']

X_test_2 = arr_0_test
print(X_test_2.shape) 
from keras.utils import to_categorical
y_test_2 = to_categorical(arr_1_test, num_classes = 4)
print(y_test_2.shape)

In [None]:
accuracies = []
for run in range(nr):
  model_filepath = "./models_30runs/CRNN_MFCCs_run_" + str(run + 1) + "_model.h5"
  model.load_weights(model_filepath) 
  
  score = model.evaluate(X_test_2, y_test_2, verbose = 1)
  accuracy = 100 * score[1]
  accuracies.append(accuracy)
  print("Training accuracy for test data: %.4f%%" %accuracy)

print(accuracies)

Training accuracy for test data: 92.7187%
Training accuracy for test data: 97.2426%
Training accuracy for test data: 92.7187%
Training accuracy for test data: 88.9272%
Training accuracy for test data: 60.6635%
Training accuracy for test data: 73.2012%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 98.4489%
Training accuracy for test data: 98.5351%
Training accuracy for test data: 90.9091%
Training accuracy for test data: 58.6385%
Training accuracy for test data: 91.5985%
Training accuracy for test data: 99.1383%
Training accuracy for test data: 87.8932%
Training accuracy for test data: 100.0000%
Training accuracy for test data: 49.3753%
Training accuracy for test data: 10.8143%
Training accuracy for test data: 71.9517%
Training accuracy for test data: 89.0995%
Training accuracy for test data: 73.8044%
Training accuracy for test data: 68.9358%
Training accuracy for test data: 90.6075%
Training accuracy for test data: 98.2335%
Training accuracy for test data:

In [None]:
accuracies = [92.71865487098694, 97.24256992340088, 92.71865487098694, 88.92718553543091, 60.663509368896484, 73.20120930671692, 100.0, 98.44894409179688, 98.53511452674866, 90.90909361839294, 58.638519048690796, 91.59845113754272, 99.13830161094666, 87.89315223693848, 100.0, 49.37526881694794, 10.814303904771805, 71.9517469406128, 89.09952640533447, 73.80439639091492, 68.93580555915833, 90.60749411582947, 98.23352098464966, 95.34683227539062, 47.8672981262207, 98.75053763389587, 81.94743394851685, 92.46014356613159, 90.9521758556366, 88.88410329818726]
print(len(accuracies))
print(np.mean(accuracies))

30
82.6554649323225
