In [None]:
import os
import librosa
import soundfile as sf
import subprocess
import pickle
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
   tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
paths = {'files': "../../../assets/temp",
         "pickels": "../../../assets/audio_sentiment_data_v2/pickles",
         "models": "../../../assets/audio_sentiment_data_v2/models",}

In [None]:
files = os.listdir(paths['files'])
files.remove('.gitignore')

In [None]:
pickle_in = open(f"{paths['pickels']}/scaler.pickle","rb")
scaler = pickle.load(pickle_in)
scaler

In [None]:
pickle_in = open(f"{paths['pickels']}/labels.pickle","rb")
labels = pickle.load(pickle_in)
labels.classes_

In [None]:
model_name = "hyperband_tuned_best_model_[0.4309597909450531, 0.8404908180236816]"
model = tf.keras.models.load_model(f"{paths['models']}/{model_name}")

In [None]:
model.summary()

In [None]:
def feature_extraction(y):
    rmse= np.mean(librosa.feature.rms(y=y))
    spec_cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)

    data_features = [rmse,
                    spec_cent,
                    spec_bw, 
                    rolloff, 
                    zcr, 
                    chroma_stft[0],
                    chroma_stft[1],
                    chroma_stft[2],
                    chroma_stft[3],
                    chroma_stft[4],
                    chroma_stft[5],
                    chroma_stft[6],
                    chroma_stft[7],
                    chroma_stft[8],
                    chroma_stft[9],
                    chroma_stft[10],
                    chroma_stft[11],
                    mfcc[0],
                    mfcc[1],
                    mfcc[2],
                    mfcc[3],
                    mfcc[4],
                    mfcc[5],
                    mfcc[6],
                    mfcc[7],
                    mfcc[8],
                    mfcc[9],
                    mfcc[10],
                    mfcc[11],
                    mfcc[12],
                    mfcc[13],
                    mfcc[14],
                    mfcc[15],
                    mfcc[16],
                    mfcc[17],
                    mfcc[18],
                    mfcc[19]
                    ]
    return data_features

In [None]:
def scale_features(X):
    return scaler.transform(X)

In [None]:
# output = pd.DataFrame(columns['file','1','2','3','4','5','6','7','8','9','10'])

# for file files:
audio, sr = librosa.load(r"D:\Projects\BEProject\deep-audiobook-tuner\assets\audiobooks\clip_1.wav", res_type='kaiser_fast', sr=22050*2)

buffer = 3 * sr

samples_total = len(audio)
samples_wrote = 0

predictions = []

while samples_wrote < samples_total:

    #check if the buffer is not exceeding total samples 
    if buffer > (samples_total - samples_wrote):
        buffer = samples_total - samples_wrote

    block = audio[samples_wrote : (samples_wrote + buffer)]

    data_features = np.array(feature_extraction(block))

    scaled_features = scale_features(data_features.reshape(1, -1))

    predictions.append(model.predict(scaled_features))

    samples_wrote += buffer

In [None]:
out = []
for preds in predictions:
    preds = preds.argmax(axis=1)
    preds = preds.astype(int).flatten()
    preds = labels.inverse_transform((preds))
    out.append(preds)
out

In [None]:
a = np.squeeze(predictions, axis=None)
l = len(a)
a = a.sum(axis=0)
a = a/l
a