In [1]:
import io
import numpy as np
import tensorflow as tf
import pandas as pd
from scipy.io import wavfile
from python_speech_features import mfcc
import matplotlib.pyplot as plt
!pip install umap-learn
import umap
import seaborn as sns
from tqdm import tqdm, tqdm_notebook
from pydub import AudioSegment


import warnings
warnings.filterwarnings("ignore")

import tensorflow_addons as tfa



In [2]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(
        256,
        input_shape=(32,13),
        return_sequences=True
    ),
    tf.keras.layers.LSTM(
        256
    ),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        256, 
        activation=None
    ), 
    tf.keras.layers.Lambda(
        lambda x: tf.math.l2_normalize(x, axis=1)
    )
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 32, 256)           276480    
_________________________________________________________________
lstm_1 (LSTM)                (None, 256)               525312    
_________________________________________________________________
flatten (Flatten)            (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 256)               65792     
_________________________________________________________________
lambda (Lambda)              (None, 256)               0         
Total params: 867,584
Trainable params: 867,584
Non-trainable params: 0
_________________________________________________________________


In [3]:
!ls /data/UrbanSound8K/processed_data/

preprocessed_data.npy  processed_data.npy  processed_labels.json


In [4]:
ProcessedData = np.load(
    "/data/UrbanSound8K/processed_data/processed_data.npy",
    allow_pickle=True
)

In [5]:
metadata_path = "/data/UrbanSound8K/metadata/UrbanSound8K.csv"

df = pd.read_csv(metadata_path)

In [6]:
df.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [7]:
filepath = "/data/UrbanSound8K/audio/fold{fold}/{filename}"

len(df.index)

8732

In [None]:
test_file = filepath.format(
    fold=5,
    filename="100032-3-0-0.wav"
)

fs, sample = wavfile.read(test_file)

Data = []
Labels = []
Lengths = []

exception_count = 0
idx = 0
for row in tqdm(df.iterrows()):
    row=row[1]
    filename = filepath.format(
        fold=row['fold'],
        filename=row['slice_file_name']
    )
    try:
        audio = AudioSegment.from_wav(filename)
        fs, sample = audio.frame_rate, \
            np.array(audio.get_array_of_samples(), dtype=float)
        data = mfcc(signal=sample, samplerate=fs)
        Lengths.append(data.shape[0])
        Data.append(data)
        Labels.append(row['class'])
        idx += 1
    except Exception as e:
        print(f"Exception: {filename}")
        exception_count += 1

print(f"Number of Exceptions: {exception_count}")




























































































































































































































































































































681it [01:30, 22.93it/s][A















696it [04:07,  3.91s/it][A

In [None]:
Data_ = np.array(Data)

np.save("/home/jovyan/work/preprocessed_data.npy", Data_)
import json

In [None]:
plt.hist(Lengths)

In [None]:
from pydub import AudioSegment

audio = AudioSegment.from_wav(filename)

audio.frame_rate

print(type(audio.get_array_of_samples()))

_data = mfcc(
    signal=np.array(audio.get_array_of_samples(),dtype=float), 
    samplerate=audio.frame_rate,
    nfft=2048+512
)

print(_data.shape)

In [None]:
_Data = tf.keras.preprocessing.sequence.pad_sequences(
    Data,
    maxlen=1024,
    dtype='float32',
    padding='pre',
    truncating='pre',
    value=0.0
)

from random import randint

Map = {}
l = 0
Label = []

for label in Labels:
    if label not in Map:
        Map[label] = l
        l += 1
    Label.append(Map[label])

In [None]:
np.save("/home/jovyan/work/processed_data.npy", _Data)
import json
json.dump(Label, open("/home/jovyan/work/processed_labels.json", 'w'))

In [None]:
plt.hist(Lengths)

In [None]:
from pydub import AudioSegment

audio = AudioSegment.from_wav(filename)

audio.frame_rate

print(type(audio.get_array_of_samples()))

_data = mfcc(
    signal=np.array(audio.get_array_of_samples(),dtype=float), 
    samplerate=audio.frame_rate,
    nfft=2048+512
)

print(_data.shape)

In [None]:
_Data = tf.keras.preprocessing.sequence.pad_sequences(
    Data,
    maxlen=1024,
    dtype='float32',
    padding='pre',
    truncating='pre',
    value=0.0
)

from random import randint

Map = {}
l = 0
Label = []

for label in Labels:
    if label not in Map:
        Map[label] = l
        l += 1
    Label.append(Map[label])

In [None]:
np.save("/home/jovyan/work/processed_data.npy", _Data)
import json
json.dump(Label, open("/home/jovyan/work/processed_labels.json", 'w'))

In [None]:
_Data.shape

In [None]:
X = zip(_Data, Label)

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((_Data, Label))
dataset = dataset.shuffle(1024).batch(32)
dataset

In [None]:
# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tfa.losses.TripletSemiHardLoss())


In [None]:
# Train the network
history = model.fit(
    dataset,
    epochs=40
)

In [None]:
model.save("urban_sounds.tf")

In [None]:
X = model.predict(_Data)

In [None]:
X.shape

In [None]:
import umap

reducer = umap.UMAP()
embedding = reducer.fit_transform(X)
embedding.shape

In [None]:
plt.scatter(embedding[:,0], embedding[:,1], c=[sns.color_palette()[x] for x in Label])