In [11]:
import pandas as pd
import os, sys

import tensorflow as tf

module_path = os.path.abspath(os.path.join('./..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from settings import *
from notebooks.my_train import *
from src.model import CNN, CNN_nodropout
import librosa

most_shape = (128, 213)
baseline = CNN(most_shape)
baseline.load_model('baseline_latest_128_213')

src.model - INFO - Initializing CNN
src.model - INFO - Input shape = (128, 213, 1)
src.model - INFO - CNN Initialized
src.model - INFO - Loading saved model
src.model - INFO - Model loaded from /home/tzag/danigil/dl/guitarCR/models


In [2]:
from src.data.preprocessing import uniform_shape
from src.processing import *

train_datas = []
test_datas = []

instruments = ['Guitar', 'Accordion', 'Violin', 'Piano']
datasets_raw = [pd.read_pickle(os.path.join(METADATA_DIR_PROCESSED, f'data_{instrument.lower()}.pkl')) for instrument in instruments]

for i in range(len(datasets_raw)):
    datasets_raw[i] = uniform_shape(datasets_raw[i], most_shape[1])

train_data_guitar, test_data_guitar = train_test_split(datasets_raw[0], augmented=False, split_ratio=0.65)

test_datas.append(test_data_guitar)

src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 1300
src.processing - INFO - Number of testing samples is 700
src.processing - INFO - Train test split completed


In [7]:

X_test, y_test = features_target_split(test_data_guitar)
X_train, y_train = features_target_split(train_data_guitar)


X_train, X_test = reshape_feature_CNN(X_train, size=most_shape[1]), reshape_feature_CNN(X_test, size=most_shape[1])

y_test_values = y_test.copy()

y_train, y_test = one_hot_encode(y_train), one_hot_encode(y_test)
baseline.train(X_train, y_train, X_test, y_test, epochs=67)

src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 1300
src.processing - INFO - Number of testing samples is 700
src.processing - INFO - Train test split completed
src.processing - INFO - Start feature target split
src.processing - INFO - Feature target split completed
src.processing - INFO - Start feature target split
src.processing - INFO - Feature target split completed
src.processing - INFO - Features reshaped for CNN Input
src.processing - INFO - Features reshaped for CNN Input
src.processing - INFO - Target one hot encoded
src.processing - INFO - Target one hot encoded
src.model - INFO - Start training model
src.model - INFO - Tensorboard Logging Started
src.model - INFO - Use the following command in the terminal to view the logs during training: tensorboard --logdir logs/training
Epoch 1/67
Epoch 2/67
Epoch 3/67
Epoch 4/67
Epoch 5/67
Epoch 6/67
Epoch 7/67
Epoch 8/67
Epoch 9/67
Epoch 10/67
Epoch 11/67
Epoch

In [8]:
baseline.save_model('baseline_latest_128_213')

src.model - INFO - Saving model
src.model - INFO - Saved model to /home/tzag/danigil/dl/guitarCR/models


In [5]:
train_data_guitar, test_data_guitar = train_test_split(datasets_raw[0], augmented=False, split_ratio=0.65)
test_datas = []
test_datas.append(test_data_guitar)

for i, dataset in enumerate(datasets_raw):
    if i == 0:
        continue

    _, test_data = train_test_split(dataset, augmented=False, split_ratio=0)
    test_datas.append(test_data)

src.processing - INFO - Start train test split with split ratio: 0.65
src.processing - INFO - Number of training samples is 1300
src.processing - INFO - Number of testing samples is 700
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0
src.processing - INFO - Number of training samples is 0
src.processing - INFO - Number of testing samples is 100
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0
src.processing - INFO - Number of training samples is 0
src.processing - INFO - Number of testing samples is 100
src.processing - INFO - Train test split completed
src.processing - INFO - Start train test split with split ratio: 0
src.processing - INFO - Number of training samples is 0
src.processing - INFO - Number of testing samples is 100
src.processing - INFO - Train test split completed


In [12]:
test_instruments = instruments
for test_data, instrument in zip(test_datas, test_instruments):
    X_test = test_data['spectrogram']
    X_test = np.array([x.reshape( (128, most_shape[1], 1) ) for x in X_test])
    y_test = test_data['class_ID']

    y_test_values=y_test
    y_test = np.array(keras.utils.to_categorical(y_test, 10))

    score = baseline.model.evaluate(X_test,y_test)
    print(f'Test score for instrument: {instrument}')
    print('\tTest loss:', score[0])
    print('\tTest accuracy:', score[1])
    print('\tTest precision:', score[2])
    print('\tTest recall:', score[3])
    print('\tTest f1-score:', score[4])

Test score for instrument: Guitar
	Test loss: 0.42860978841781616
	Test accuracy: 0.9442856907844543
	Test precision: 0.9481370449066162
	Test recall: 0.9427759647369385
	Test f1-score: 0.9454139471054077
Test score for instrument: Accordion
	Test loss: 265.3677978515625
	Test accuracy: 0.4300000071525574
	Test precision: 0.39238911867141724
	Test recall: 0.390625
	Test f1-score: 0.391493022441864
Test score for instrument: Violin
	Test loss: 71.860107421875
	Test accuracy: 0.3499999940395355
	Test precision: 0.2734375
	Test recall: 0.2734375
	Test f1-score: 0.2734374701976776
Test score for instrument: Piano
	Test loss: 9.519414901733398
	Test accuracy: 0.3199999928474426
	Test precision: 0.25489628314971924
	Test recall: 0.234375
	Test f1-score: 0.24404756724834442


: 

In [8]:
dataset_piano = pd.read_pickle(os.path.join(METADATA_DIR_RAW_OOD, 'data_piano.pkl'))
dataset_guitar = pd.read_pickle(os.path.join(METADATA_DIR_RAW_OOD, 'data_guitar.pkl'))
size=213
dataset_piano['spectrogram'] = dataset_piano['spectrogram'].apply(lambda x: np.pad(x, ((0, 0), (0, size-x.shape[1])), 'constant'))
dataset_guitar['spectrogram'] = dataset_guitar['spectrogram'].apply(lambda x: np.pad(x, ((0, 0), (0, size-x.shape[1])), 'constant'))

test_data = dataset_piano

X_test = test_data['spectrogram']
X_test = np.array([np.pad(x, ((0, 0), (0, size-x.shape[1])), 'constant') for x in dataset_piano['spectrogram']])
y_test = test_data['class_ID']

X_test = np.array([x.reshape( (128, size, 1) ) for x in X_test])

y_test_values=y_test
y_test = np.array(keras.utils.to_categorical(y_test, 10))

score = baseline.model.evaluate(
	x=X_test,
	y=y_test)

print('Piano Test accuracy:', score[1])

test_data = dataset_guitar

X_test = test_data['spectrogram']
X_test = np.array([np.pad(x, ((0, 0), (0, size-x.shape[1])), 'constant') for x in dataset_guitar['spectrogram']])
y_test = test_data['class_ID']

X_test = np.array([x.reshape( (128, size, 1) ) for x in X_test])

y_test_values=y_test
y_test = np.array(keras.utils.to_categorical(y_test, 10))

score = baseline.model.evaluate(
	x=X_test,
	y=y_test)

print('Guitar Test accuracy:', score[1])

Piano Test accuracy: 0.36000001430511475
Guitar Test accuracy: 0.46666666865348816


In [11]:
y_pred = {}
y_true = {}

from sklearn.metrics import classification_report, accuracy_score, recall_score, precision_score, f1_score
import warnings
warnings.filterwarnings("ignore")

ood2_path = './../data/audio/myood2/splits'
df = pd.DataFrame()

for instrument in os.listdir(ood2_path):
    y_pred[instrument] = []
    # y_pred_3[instrument] = []
    y_true[instrument] = []

    for chord in os.listdir(os.path.join(ood2_path, instrument)):

        #true = file.split('-')[-1][:1]
        true = chord.replace('.wav','')
        assert true in CLASSES
        y_true[instrument].append(true)

        curr_path = os.path.join(ood2_path, instrument, chord)
        y, sr = librosa.load(curr_path, sr=44100, duration=2)

        spectrogram = librosa.feature.melspectrogram(y=y,sr=sr, n_mels=128)
        #spectrogram = librosa.util.normalize(np.log(librosa.feature.melspectrogram(y=y,sr=sr, n_mels=128) + 1e-9))
        spectrogram = np.pad(spectrogram, ((0, 0), (0, most_shape[1]-spectrogram.shape[1])), 'constant')
        spectrogram = spectrogram.reshape((1,)+most_shape+(1,))

        predict_x=baseline.model.predict(spectrogram, batch_size=1, verbose=0)
        predictions = np.argmax(predict_x,axis=1)
        pred = CLASSES[predictions[0]]
        y_pred[instrument].append(pred)
        
        # predict_x_3=experiment3.model.predict(spectrogram, batch_size=1, verbose=0)
        # predictions_3 = np.argmax(predict_x_3,axis=1)
        # pred_3 = CLASSES[predictions_3[0]]
        # y_pred_3[instrument].append(pred_3)

    slen = int((len('#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE') - len(instrument))/2)
    print(f"{'~'*slen}{instrument}{'~'*slen}")
    print(f'#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE')
    print(f"""BASELINE | {'{:.6f}'.format(accuracy_score(y_true[instrument], y_pred[instrument]))} | {'{:.4f}'.format(recall_score(y_true[instrument], y_pred[instrument], average='macro'))} | {'{:.7f}'.format(precision_score(y_true[instrument], y_pred[instrument], average='macro'))} | {'{:.6f}'.format(f1_score(y_true[instrument], y_pred[instrument], average='macro'))}""")
    # print(f"""EXP3 | {'{:.6f}'.format(accuracy_score(y_true[instrument], y_pred_3[instrument]))} | {'{:.4f}'.format(recall_score(y_true[instrument], y_pred_3[instrument], average='macro'))} | {'{:.7f}'.format(precision_score(y_true[instrument], y_pred_3[instrument], average='macro'))} | {'{:.6f}'.format(f1_score(y_true[instrument], y_pred_3[instrument], average='macro'))}""")


~~~~~~~~~~~~~~~~~~~xylophone~~~~~~~~~~~~~~~~~~~
#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE
BASELINE | 0.000000 | 0.0000 | 0.0000000 | 0.000000
~~~~~~~~~~~~~~~~~~~clarinet~~~~~~~~~~~~~~~~~~~
#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE
BASELINE | 0.100000 | 0.1000 | 0.0333333 | 0.050000
~~~~~~~~~~~~~~~~~~~~trumpet~~~~~~~~~~~~~~~~~~~~
#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE
BASELINE | 0.200000 | 0.2000 | 0.0750000 | 0.106667
~~~~~~~~~~~~~~~~~~~~~oboe~~~~~~~~~~~~~~~~~~~~~
#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE
BASELINE | 0.300000 | 0.3000 | 0.1666667 | 0.195238
~~~~~~~~~~~~~~~~~~~~~harp~~~~~~~~~~~~~~~~~~~~~
#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE
BASELINE | 0.100000 | 0.1000 | 0.0200000 | 0.033333
~~~~~~~~~~~~~~~~~tubular_bells~~~~~~~~~~~~~~~~~
#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE
BASELINE | 0.000000 | 0.0000 | 0.0000000 | 0.000000
~~~~~~~~~~~~~~~~~~~~~horn~~~~~~~~~~~~~~~~~~~~~
#EXP | ACCURACY | RECALL | PRECISION | F1-SCORE
BASELINE | 0.200000 