# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import sys
import datetime
import numpy as np
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, BatchNormalization, \
    Add, Input, Conv1D, MaxPooling1D, Flatten, \
    Lambda

from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter
from sklearn.metrics import classification_report

from tqdm.auto import tqdm

In [3]:
os.chdir("/content/drive/MyDrive/Projects/AlarmWaterClassification")

# Work with the data

In [None]:
# %cd AOtherNoiseProject/
# !pip install -q pydub
# !python clip.py
# %cd ..

/content/drive/MyDrive/Work/Justin Gatau Maules/AlarmWaterClassification/AOtherNoiseProject
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
error: XDG_RUNTIME_DIR not set in the environment.
ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory
ALSA lib confmisc.c:1334:(snd_func_refer) error evaluating name
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory
ALSA lib conf.c:5701:(snd_config_expand) Evaluate error: No such file or directory
ALSA lib pcm.c:2664:(snd_pcm_open_noupdate) Unknown PCM default
ALSA lib confmisc.c:855:(parse_card) can

In [None]:
# !python data_arranger.py


Discovered Raw files

Total number of files: 28892
Number of files in each class:
Alarm: 1357
Alarm_clock: 903
Alert_bad: 50
audioset-smoke_alarm: 343
bathtub_filling_or_washing: 197
Bell: 2354
Bird: 899
Busy_signal: 324
Car_alarm: 609
Cellphone_buzz_vibrating_alert: 195
Clock: 236
Dial_tone: 322
dishes_pots_and_pans: 355
door: 504
Echo: 404
Fill_with_liquid: 161
Fire_alarm: 918
Inside large room or hall: 71
Inside public space: 30
microwave_oven: 116
Other sourceless: 16
Ping: 44
Shower: 29
Siren: 784
smoke_detector_smoke_alarm: 88
Stream_river: 523
Telephone_bell_ringing: 702
toilet_flush: 370
Water: 2965
Waterfall: 246
Water_tap_faucet: 537
WaterWhiteNoise: 3600
OthersRandom: 8640

Arranging files to data/train

Copying files:  33% 9615/28892 [1:44:48<3:30:07,  1.53it/s]
Total number of files: 25961
Number of files in each class:
Alarm: 8149
Water: 8197
Other: 9615

Collecting test sample

Sampling Other files: 100% 480/480 [00:04<00:00, 99.50it/s] 
Files have been sampled and move

In [4]:
!pip install -q noisereduce
!python data_preprocessor.py

2024-08-02 13:13:45.428481: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-02 13:13:45.459708: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-02 13:13:45.468830: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Extracting train Alarm: 100% 8149/8149 [18:53<00:00,  7.19it/s]
Extracting train Water: 100% 8197/8197 [25:24<00:00,  5.38it/s]
Extracting train Other: 100% 9135/9135 [46:20<00:00,  3.29it/s]
Extracting test Alarm: 100% 427/427 [00:42<00:00, 10.16it/s]
Extracting test Water: 100% 430/430 [01:08<00:00,  6.31it/s]
  audio_data, sample_rate = librosa.load(file, sr=1

# Utilies

In [4]:
def print_M(conf_M, class_names):
    s = "activity," + ",".join(class_names)
    print(s)
    for i, row in enumerate(conf_M):
        print(class_names[i] + "," + ",".join(map(str, row)))

def print_M_P(conf_M, class_names):
    s = "activity," + ",".join(class_names)
    print(s)
    for i, row in enumerate(conf_M):
        total = sum(row)
        percentages = [str(round(val / total, 2)) if total > 0 else '0' for val in row]
        print(class_names[i] + "," + ",".join(percentages))

def showResult(result, y_test, class_names):
    predictions = [np.argmax(y) for y in result]
    expected = [np.argmax(y) for y in y_test]

    num_labels = y_test.shape[1]
    conf_M = [[0] * num_labels for _ in range(num_labels)]

    for e, p in zip(expected, predictions):
        conf_M[e][p] += 1

    print_M(conf_M, class_names)
    print_M_P(conf_M, class_names)

def load_weight(path):
    model = load_model(path)
    print(model.summary())
    return model

# Model

In [5]:
def build_improved_model(input_shape, num_labels):
    model = tf.keras.models.Sequential([
        Input(shape=(input_shape, 1)),
        Conv1D(32, 6, activation='relu'),
        MaxPooling1D(pool_size=(3)),
        Conv1D(16, 3, activation='relu'),
        MaxPooling1D(pool_size=(3)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(18, activation='relu'),
        Dropout(0.5),
        Dense(num_labels, activation='softmax')
    ])

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

# Data

## Data loading

In [6]:
model_weight_out = os.path.join('weights', 'exp_model_16k_1.3.weights.h5')

############### Loading the datasets #####################

print('\nLoading the data\n')

featuresPath = "data/features/"

class_names = np.load(os.path.join(featuresPath, 'class_names.npy'))

X_train = np.load(os.path.join(featuresPath, 'X_train.npy'))
y_train = np.load(os.path.join(featuresPath, 'y_train.npy'))

X_val = np.load(os.path.join(featuresPath, 'X_val.npy'))
y_val = np.load(os.path.join(featuresPath, 'y_val.npy'))

X_test = np.load(os.path.join(featuresPath, 'X_test.npy'))
y_test = np.load(os.path.join(featuresPath, 'y_test.npy'))

num_labels = y_train.shape[1]


Loading the data



## Data Balancing

In [7]:
print("\nBalancing the data\n")

print("Train Class distribution before balancing:", Counter(np.argmax(y_train, axis=1)))

# Upsampling using SMOTE
smote = SMOTE(sampling_strategy={1: 9000})
oversampled_features, oversampled_labels = smote.fit_resample(X_train, y_train)

# Downsampling using RandomUnderSampler
undersampler = RandomUnderSampler(sampling_strategy={0: 7300, 2: 7300})
undersampled_features, undersampled_labels = undersampler.fit_resample(
    oversampled_features, oversampled_labels)

print("Train Class distribution after balancing:", Counter(
    np.argmax(undersampled_labels, axis=1)))

X_train = undersampled_features
y_train = undersampled_labels


Balancing the data

Train Class distribution before balancing: Counter({2: 8221, 1: 7377, 0: 7334})
Train Class distribution after balancing: Counter({1: 9000, 0: 7300, 2: 7300})


# Training

In [None]:
###################### Training the model ###########################3
print("\nTraining the model\n")

model = build_improved_model(X_train.shape[1], num_labels)
# model.summary()

def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

callback = LearningRateScheduler(scheduler)

model.fit(X_train, y_train, batch_size=32, epochs=30,
          validation_data=(X_val, y_val),
          callbacks=[callback])


Training the model

Epoch 1/30
[1m738/738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 23ms/step - accuracy: 0.6885 - loss: 0.6723 - val_accuracy: 0.6230 - val_loss: 52.9455 - learning_rate: 0.0010
Epoch 2/30
[1m738/738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 15ms/step - accuracy: 0.8840 - loss: 0.2973 - val_accuracy: 0.5771 - val_loss: 139.5504 - learning_rate: 0.0010
Epoch 3/30
[1m738/738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 15ms/step - accuracy: 0.9075 - loss: 0.2285 - val_accuracy: 0.5787 - val_loss: 142.1783 - learning_rate: 0.0010
Epoch 4/30
[1m738/738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.9216 - loss: 0.1999 - val_accuracy: 0.5657 - val_loss: 143.1733 - learning_rate: 0.0010
Epoch 5/30
[1m738/738[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.9374 - loss: 0.1627 - val_accuracy: 0.5900 - val_loss: 199.6329 - learning_rate: 0.0010
Epoch 6/30
[1m738/738[0m [3

# Testing

In [None]:
print("\nTesting the model\n")

result = model.predict(X_test)

cnt, cnt_alarm, cnt_other, cnt_water = 0, 0, 0, 0
alarm_num, other_num, water_num = (sum(np.argmax(y_test, axis=1) == 0),
                                    sum(np.argmax(y_test, axis=1) == 1),
                                    sum(np.argmax(y_test, axis=1) == 2))

for i in range(len(y_test)):
    pred = np.argmax(result[i])
    if np.argmax(y_test[i]) == pred:
        cnt += 1
        if pred == 0:
            cnt_alarm += 1
        elif pred == 1:
            cnt_other += 1
        else:
            cnt_water += 1

acc = round(cnt * 100 / len(y_test), 2)
acc_alarm = round(cnt_alarm * 100 / alarm_num, 2)
acc_other = round(cnt_other * 100 / other_num, 2)
acc_water = round(cnt_water * 100 / water_num, 2)

print(
    f"Total Accuracy: {acc}%, Alarm Accuracy: {acc_alarm}%, Others Accuracy: {acc_other}%, Water Accuracy: {acc_water}%")

showResult(result, y_test, class_names)

print("\n")
print(classification_report(
    np.argmax(y_test, axis=1),
    np.argmax(result, axis=1),
    target_names=list(class_names)
))

# Saving

In [None]:
# if not os.path.exists("Models"):
#     os.makedirs("Models")
# path = os.path.join("Models", f"audio_NN_New{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}_acc_{acc}")
# model_json = model.to_json()
# with open(f"{path}.json", "w") as json_file:
#     json_file.write(model_json)
# model.save_weights(f"{path}.weights.h5")

# if not os.path.exists("weights"):
#     os.makedirs("weights")
# model.save(model_weight_out, overwrite=True, include_optimizer=False)



In [None]:
# model_path = 'weights/94%_model_16k_1.3.weights.h5'
# model.save(model_path, overwrite=True, include_optimizer=False)
# loaded_model = tf.keras.models.load_model(model_path)