In [93]:
import pandas as pd
from tqdm.notebook import tqdm
import os
import librosa
import numpy as np
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
import IPython.display as ipd
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [108]:
BASE_PATH = "/media/aneesh/USB1000/Zurich_Urban_Sounds"
RECORDER = "TASCAM_RECORDER"
SEGMENT_DIR = "audio_segments"
filenames = [
    f for f in os.listdir(os.path.join(BASE_PATH, RECORDER, SEGMENT_DIR)) if ".wav" in f
]
silence = []
classification = []
confidence = []

In [111]:
len(filenames)

59228

In [107]:
import multiprocessing as mp
print("Number of processors: ", mp.cpu_count())

Number of processors:  8


In [95]:
class_mapping = {
    3: "dog_bark",
    2: "children_playing",
    1: "car_horn",
    0: "air_conditioner",
    9: "street_music",
    6: "gun_shot",
    8: "siren",
    5: "engine_idling",
    7: "jackhammer",
    4: "drilling",
}

In [35]:
def get_model():
    model = Sequential()
    model.add(
        Conv2D(
            64,
            kernel_size=5,
            strides=1,
            padding="Same",
            activation="relu",
            input_shape=(36, 5, 1),
        )
    )
    model.add(MaxPooling2D(padding="same"))
    model.add(Conv2D(128, kernel_size=5, strides=1, padding="same", activation="relu"))
    model.add(MaxPooling2D(padding="same"))
    model.add(Dropout(0.3))
    model.add(Flatten())
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.3))
    model.add(Dense(10, activation="softmax"))
    model.compile(
        optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
    )
    model.load_weights("saved_model")
    return model


model = get_model()


def get_features(base_path, recorder, segment_dir, filename):
    y, sr = librosa.load(os.path.join(base_path, recorder, segment_dir, filename))
    mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=36).T, axis=0)
    melspectrogram = np.mean(
        librosa.feature.melspectrogram(y=y, sr=sr, n_mels=36, fmax=8000).T, axis=0
    )
    chroma_stft = np.mean(
        librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=36).T, axis=0
    )
    chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=36).T, axis=0)
    chroma_cens = np.mean(
        librosa.feature.chroma_cens(y=y, sr=sr, n_chroma=36).T, axis=0
    )
    melspectrogram.shape, chroma_stft.shape, chroma_cq.shape, chroma_cens.shape, mfccs.shape
    features = np.reshape(
        np.vstack((mfccs, melspectrogram, chroma_stft, chroma_cq, chroma_cens)), (36, 5)
    )
    return features

In [72]:
temp = get_features(BASE_PATH, RECORDER, SEGMENT_DIR, filenames[0])


def analyse(features):
    prob = model.predict(features.reshape(-1, 36, 5, 1))
    category = np.argmax(prob)
    conf = prob[:, category].squeeze()
    #     if len(conf)==1:
    #         conf =conf[0]
    return category, conf

In [74]:
filenames = [
    f for f in os.listdir(os.path.join(BASE_PATH, RECORDER, SEGMENT_DIR)) if ".wav" in f
]
silence = []
classification = []
confidence = []

In [75]:
for i in tqdm(range(100)):
    file = filenames[i]
    features = get_features(BASE_PATH, RECORDER, SEGMENT_DIR, file)
    category, conf = analyse(features)
    confidence.append(conf)
    classification.append(category)

HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))




In [88]:
high_confidence_index = np.argwhere(np.array(confidence) > 0.8)
np.array(filenames[:100])[high_confidence_index]
np.array(classification)[high_confidence_index]

array([[5],
       [9],
       [5],
       [5],
       [5],
       [5],
       [5]])

In [106]:
@interact()
def visualize_high_confidence(indx=[i for i in range(len(high_confidence_index))]):
    idx = high_confidence_index[indx][0]
    print(idx)
    filename = filenames[idx]
    print(filename)
    category = class_mapping[classification[idx]]
    conf = confidence[idx]
    print(f"category : {category}, confidence :{conf}")
    display(ipd.Audio(os.path.join(BASE_PATH, RECORDER,SEGMENT_DIR,filename)))

interactive(children=(Dropdown(description='indx', options=(0, 1, 2, 3, 4, 5, 6), value=0), Output()), _dom_cl…