In [5]:
import sys
import os

module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
from src.metrics import *
from settings import *
from src.data import generate

instruments = ['Guitar']
instruments_aug = ['Accordion', 'Violin', 'Piano']

# from setup_logging import setup_logging
# setup_logging()

#generate.my_run(instruments)
datasets_raw = [pd.read_pickle(os.path.join(METADATA_DIR_PROCESSED, f'data_{instrument.lower()}.pkl')) for instrument in instruments]
datasets_augmented = [pd.read_pickle(os.path.join(METADATA_DIR_AUGMENTED_RAW, f'data_{instrument.lower()}.pkl')) for instrument in instruments_aug]

from src.data.preprocessing import get_max_shape
max_spectrogram_size = max(map(lambda df: get_max_shape(df), datasets_raw+datasets_augmented))

from src.data.preprocessing import uniform_shape
uniform = lambda df: uniform_shape(df, max_spectrogram_size)

datasets_raw = list(map(uniform,datasets_raw))
datasets_augmented = list(map(uniform,datasets_augmented))
datasets_augmented = list(map(lambda df: df[['spectrogram','class_ID', 'class_name','augmentation']],datasets_augmented))
datasets_augmented = list(map(lambda df: df.reset_index(drop=True), datasets_augmented))

import random
import keras
import os, glob
import logging
import librosa, librosa.display

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from keras.models import model_from_json
from keras import backend as K

from src.metrics import *
from settings import *
from src.data import generate
from src.processing import *
from src.model import CNN_nodropout
from src.data.preprocessing import get_most_shape
from setup_logging import setup_logging

setup_logging()
logger = logging.getLogger('src.train')

train_datas = []
test_datas = []

for dataset in datasets_raw:
    train_data, test_data = train_test_split(dataset, augmented=False, split_ratio=0.65)
    train_datas.append(train_data)
    test_datas.append(test_data)

for dataset in datasets_augmented:
    train_data, test_data = train_test_split(dataset, augmented=True, split_ratio=0.65)
    train_datas.append(train_data)
    test_datas.append(test_data)

train_data = pd.concat(train_datas)
test_data = pd.concat(test_datas)

train_data = train_data.sample(frac=1, random_state=1).reset_index(drop=True)
test_data = test_data.sample(frac=1, random_state=1).reset_index(drop=True)

most_shape = get_most_shape(train_data)

logger.info(f"Number of train samples: {len(train_data)}")
logger.info(f"Number of test samples: {len(test_data)}")
# most_shape = get_most_shape(dataset)
#train_data, test_data = train_test_split(dataset, augmented=augmented, split_ratio=0.65)

X_train, y_train = features_target_split(train_data)
X_test, y_test = features_target_split(test_data)

# Reshape for CNN input
X_train, X_test = reshape_feature_CNN(X_train, size=max_spectrogram_size), reshape_feature_CNN(X_test, size=max_spectrogram_size)

# Preserve y_test values
y_test_values = y_test.copy()

# One-Hot encoding for classes
y_train, y_test = one_hot_encode(y_train), one_hot_encode(y_test)

# Instance of CNN model
cnn = CNN_nodropout(most_shape)
logger.info(str(cnn))

cnn.train(X_train, y_train, X_test, y_test, epochs=25)
cnn.evaluate(X_train, y_train, X_test, y_test)

if tf.__version__ != '1.8.0':
    predict_x=cnn.model.predict(X_test)
    predictions = np.argmax(predict_x,axis=1)
else:    
    predictions = cnn.model.predict_classes(X_test)
conf_matrix=confusion_matrix(y_test_values, predictions, labels=range(10))
logger.info('Confusion Matrix for classes {}:\n{}'.format(CLASSES, conf_matrix))
#cnn.save_model(name="model_all_data_augment_1")

src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 1300
src.processing - INFO - Number of testing samples is 700
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 585
src.processing - INFO - Number of testing samples is 315
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 585
src.processing - INFO - Number of testing samples is 315
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 585
src.processing - INFO - Number of testing samples is 315
src.processing - INFO - Train test split completed
The most frequent shape is (128, 107)
src.train - INFO 

In [6]:
test_instruments = instruments + instruments_aug
for test_data, instrument in zip(test_datas, test_instruments):
    X_test = test_data['spectrogram']
    X_test = np.array([x.reshape( (128, max_spectrogram_size, 1) ) for x in X_test])
    y_test = test_data['class_ID']

    y_test_values=y_test
    y_test = np.array(keras.utils.to_categorical(y_test, 10))

    score = cnn.model.evaluate(X_test,y_test)
    print(f'Test score for instrument: {instrument}')
    print('\tTest loss:', score[0])
    print('\tTest accuracy:', score[1])
    print('\tTest precision:', score[2])
    print('\tTest recall:', score[3])
    print('\tTest f1-score:', score[4])

Test score for instrument: Guitar
	Test loss: 0.7459526658058167
	Test accuracy: 0.7742857336997986
	Test precision: 0.8124560713768005
	Test recall: 0.7140827775001526
	Test f1-score: 0.7562323212623596
Test score for instrument: Accordion
	Test loss: 0.45035186409950256
	Test accuracy: 0.9492063522338867
	Test precision: 0.9528225660324097
	Test recall: 0.949999988079071
	Test f1-score: 0.951388955116272
Test score for instrument: Violin
	Test loss: 0.2913728356361389
	Test accuracy: 0.9365079402923584
	Test precision: 0.939213752746582
	Test recall: 0.934374988079071
	Test f1-score: 0.9367559552192688
Test score for instrument: Piano
	Test loss: 0.3059425354003906
	Test accuracy: 0.9079365134239197
	Test precision: 0.9185339212417603
	Test recall: 0.8968750238418579
	Test f1-score: 0.9070337414741516


In [1]:
import sys
import os

module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
from src.metrics import *
from settings import *
from src.data import generate

instruments = ['Guitar']
instruments_aug = ['Accordion', 'Violin', 'Piano']

# from setup_logging import setup_logging
# setup_logging()

#generate.my_run(instruments)
datasets_raw = [pd.read_pickle(os.path.join(METADATA_DIR_PROCESSED_NORMALIZED, f'data_{instrument.lower()}.pkl')) for instrument in instruments]
datasets_augmented = [pd.read_pickle(os.path.join(METADATA_DIR_AUGMENTED_RAW_NORMALIZED, f'data_{instrument.lower()}.pkl')) for instrument in instruments_aug]

from src.data.preprocessing import get_max_shape
max_spectrogram_size = max(map(lambda df: get_max_shape(df), datasets_raw+datasets_augmented))

from src.data.preprocessing import uniform_shape
uniform = lambda df: uniform_shape(df, max_spectrogram_size)

datasets_raw = list(map(uniform,datasets_raw))
datasets_augmented = list(map(uniform,datasets_augmented))
datasets_augmented = list(map(lambda df: df[['spectrogram','class_ID', 'class_name','augmentation']],datasets_augmented))
datasets_augmented = list(map(lambda df: df.reset_index(drop=True), datasets_augmented))

import random
import keras
import os, glob
import logging
import librosa, librosa.display

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from keras.models import model_from_json
from keras import backend as K

from src.metrics import *
from settings import *
from src.data import generate
from src.processing import *
from src.model import CNN_nodropout
from src.data.preprocessing import get_most_shape
from setup_logging import setup_logging

setup_logging()
logger = logging.getLogger('src.train')

train_datas = []
test_datas = []

for dataset in datasets_raw:
    train_data, test_data = train_test_split(dataset, augmented=False, split_ratio=0.65)
    train_datas.append(train_data)
    test_datas.append(test_data)

for dataset in datasets_augmented:
    train_data, test_data = train_test_split(dataset, augmented=True, split_ratio=0.65)
    train_datas.append(train_data)
    test_datas.append(test_data)

train_data = pd.concat(train_datas)
test_data = pd.concat(test_datas)

train_data = train_data.sample(frac=1, random_state=1).reset_index(drop=True)
test_data = test_data.sample(frac=1, random_state=1).reset_index(drop=True)

most_shape = get_most_shape(train_data)

logger.info(f"Number of train samples: {len(train_data)}")
logger.info(f"Number of test samples: {len(test_data)}")
# most_shape = get_most_shape(dataset)
#train_data, test_data = train_test_split(dataset, augmented=augmented, split_ratio=0.65)

X_train, y_train = features_target_split(train_data)
X_test, y_test = features_target_split(test_data)

# Reshape for CNN input
X_train, X_test = reshape_feature_CNN(X_train, size=max_spectrogram_size), reshape_feature_CNN(X_test, size=max_spectrogram_size)

# Preserve y_test values
y_test_values = y_test.copy()

# One-Hot encoding for classes
y_train, y_test = one_hot_encode(y_train), one_hot_encode(y_test)

# Instance of CNN model
cnn = CNN_nodropout(most_shape)
logger.info(str(cnn))

cnn.train(X_train, y_train, X_test, y_test, epochs=8)
cnn.evaluate(X_train, y_train, X_test, y_test)

if tf.__version__ != '1.8.0':
    predict_x=cnn.model.predict(X_test)
    predictions = np.argmax(predict_x,axis=1)
else:    
    predictions = cnn.model.predict_classes(X_test)
conf_matrix=confusion_matrix(y_test_values, predictions, labels=range(10))
logger.info('Confusion Matrix for classes {}:\n{}'.format(CLASSES, conf_matrix))
#cnn.save_model(name="model_all_data_augment_1")

2023-05-20 15:15:19.477892: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-20 15:15:19.612286: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-20 15:15:20.213779: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvrtc.so.11.0: cannot open shared object file: No such file or directory
2023-05-20 15:15:20.213853: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64]

src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 1300
src.processing - INFO - Number of testing samples is 700
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 585
src.processing - INFO - Number of testing samples is 62
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 585
src.processing - INFO - Number of testing samples is 62
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 585
src.processing - INFO - Number of testing samples is 62
src.processing - INFO - Train test split completed
The most frequent shape is (128, 213)
src.train - INFO - N

2023-05-20 15:15:23.065962: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22324 MB memory:  -> device: 0, name: NVIDIA RTX A5000, pci bus id: 0000:3b:00.0, compute capability: 8.6


Epoch 1/8


2023-05-20 15:15:26.486640: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8600
2023-05-20 15:15:27.283620: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
src.model - INFO - Training completed
src.model - INFO - Evaluating model
src.model - INFO - Train loss: 0.05370834842324257
src.model - INFO - Train precision: 0.9877850413322449
src.model - INFO - Train recall: 0.9807074666023254
src.model - INFO - Train f1-score: 0.9841790795326233
src.model - INFO - Test loss: 0.24418321251869202
src.model - INFO - Test precision: 0.935623049736023
src.model - INFO - Test recall: 0.9241071343421936
src.model - INFO - Test f1-score: 0.9297384023666382
src.train - INFO - Confusion Matrix for classes ['a', 'am', 'bm', 'c', 'd', 'dm', 'e', 'em', 'f', 'g']:
[[ 81   2   0   0   0   0   0   2   0   1]
 [  4  66   0   2   0   0   1   0   2   0]
 [  0   0  73   0   0   0   0   0   0   0]
 [  1   1   0  89   3   0   0   0   0   0]
 [  0   1   2   0  87   8   0   0   0   1]
 [  0   0   0   0   5  79   0   0   0   0]
 [  2   2   1   0   3   0  86   2   0   0]
 [  0   0   0   0   0   0   4  8

In [2]:
test_instruments = instruments + instruments_aug
for test_data, instrument in zip(test_datas, test_instruments):
    X_test = test_data['spectrogram']
    X_test = np.array([x.reshape( (128, max_spectrogram_size, 1) ) for x in X_test])
    y_test = test_data['class_ID']

    y_test_values=y_test
    y_test = np.array(keras.utils.to_categorical(y_test, 10))

    score = cnn.model.evaluate(X_test,y_test)
    print(f'Test score for instrument: {instrument}')
    print('\tTest loss:', score[0])
    print('\tTest accuracy:', score[1])
    print('\tTest precision:', score[2])
    print('\tTest recall:', score[3])
    print('\tTest f1-score:', score[4])

Test score for instrument: Guitar
	Test loss: 0.3013662099838257
	Test accuracy: 0.9100000262260437
	Test precision: 0.919274091720581
	Test recall: 0.9046266078948975
	Test f1-score: 0.9117681384086609
Test score for instrument: Accordion
	Test loss: 0.01303529180586338
	Test accuracy: 1.0
	Test precision: 1.0
	Test recall: 1.0
	Test f1-score: 1.0
Test score for instrument: Violin
	Test loss: 0.025706904008984566
	Test accuracy: 0.9838709831237793
	Test precision: 0.984375
	Test recall: 0.984375
	Test f1-score: 0.984375
Test score for instrument: Piano
	Test loss: 0.04817156493663788
	Test accuracy: 1.0
	Test precision: 1.0
	Test recall: 1.0
	Test f1-score: 1.0
