In [1]:
import librosa as lb
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import os
from tqdm import tqdm
import re

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
csv_path = '/Users/rajeshr/Desktop/all_data_with_date.csv'
file = pd.read_csv(csv_path)

In [4]:
dataset_type = []
for i in range(2199):
    k = file['filepath'][i].split('/')[1]
    dataset_type.append(k)
    
dataset_type = pd.DataFrame(dataset_type)
dataset_type["filepath"] = file['filepath']
dataset_type["tonic"] = file['tonic']
dataset_type.head()

Unnamed: 0,0,filepath,tonic
0,CM,TonicDataset/CM/audio/402f49e2-5957-4b24-9229-...,161.748
1,CM,TonicDataset/CM/audio/0640aaa2-45c8-48ef-b1ed-...,161.743
2,CM,TonicDataset/CM/audio/90c3b772-189c-411e-aea5-...,154.871
3,CM,TonicDataset/CM/audio/c5976719-6376-4522-b62e-...,161.845
4,CM,TonicDataset/CM/audio/6f4cc2d5-b062-40f9-bdf1-...,156.456


In [5]:
cm, tonic_cm = [], []
features = []

for i in tqdm(range(0,10)):
    if dataset_type[0][i] == 'CM':
        y, fs = lb.load(dataset_type["filepath"][i], sr=8000, mono=True)
        if len(y)/fs == 180:

            spect = lb.stft(y)
            harmonic, percussive = lb.decompose.hpss(spect, margin=16)
            freqs = lb.fft_frequencies(sr=fs)
            harms = [1, 2]
            salience = lb.salience(np.abs(harmonic), freqs=freqs, harmonics=harms)
            salience = np.nan_to_num(salience)
            
            features.append( np.dstack((np.abs(spect), np.abs(harmonic), np.abs(salience))) )
            
            tonic_cm.append(dataset_type["tonic"][i])

100%|███████████████████████████████████████████| 10/10 [00:46<00:00,  4.63s/it]


In [6]:
a = len(features)
l = int(a*0.8)

feature = np.array(features)
groundTruth = np.array(tonic_cm)

xtrain, ytrain, xtest, ytest = feature[:l], groundTruth[:l], feature[l:], groundTruth[l:]

In [7]:
xtest.shape, ytest.shape

((2, 1025, 2813, 3), (2,))

In [8]:
from keras.layers import Input, Dense, Conv3D, Conv2D, MaxPooling2D, MaxPooling3D, \
Flatten, Dropout, BatchNormalization, Reshape
from keras.models import Model

In [9]:
batch_size = 16
epochs = 20
inp = Input(shape=(1025, 2813, 3, 1))
print(inp.shape)

(None, 1025, 2813, 3, 1)


In [10]:
print("INPUT", inp.shape)


# Conv1 #
x = Conv3D(64, (15, 15, 1), activation='relu', input_shape=inp.shape)(inp)
x = BatchNormalization()(x)
x = MaxPooling3D(pool_size=(3, 3, 1))(x)
x = Dropout(0.30)(x)
print("1st Layer", x.shape)

# Conv2 #
x = Conv3D(32, (9, 9, 2), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling3D(pool_size=(3, 3, 1))(x)
x = Dropout(0.30)(x)
print("2nd Layer", x.shape)

# Conv 3 #
x = Conv3D(16, (5, 5, 2), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling3D(pool_size=(3, 3, 1))(x)
x = Dropout(0.30)(x)
print("3rd Layer", x.shape)

x = Flatten()(x)
print("Flatten Layer", x.shape)

x = Dense(100, activation='relu')(x)
print("Dense Layer", x.shape)

x = Dense(1, activation='relu')(x)
print("Output Layer", x.shape)

INPUT (None, 1025, 2813, 3, 1)
1st Layer (None, 337, 933, 3, 64)
2nd Layer (None, 109, 308, 2, 32)
3rd Layer (None, 35, 101, 1, 16)
Flatten Layer (None, 56560)
Dense Layer (None, 100)
Output Layer (None, 1)


In [None]:
tonic_id = Model(inputs = inp, outputs = x)
tonic_id.compile(loss='MeanSquaredError', optimizer = 'adam', metrics='accuracy') 
tonic_id_train = tonic_id.fit(xtrain, ytrain, epochs = epochs ,batch_size=batch_size)

Epoch 1/20


2022-12-15 17:10:39.182679: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [None]:
test_eval = tonic_id.evaluate(xtest,  ytest, verbose=0)
print('Test', test_eval)

In [None]:
plt.plot(tonic_id_train.history['accuracy'])
plt.legend(['training'], loc = 'upper left')
plt.show()

In [None]:
plt.plot(tonic_id_train.history['loss'])
plt.legend(['training'], loc = 'upper left')
plt.show()

In [None]:
predictions = tonic_id.predict(x_test, verbose=1)
predictions.shape