In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import librosa
import os
import numpy as np
from sklearn.model_selection import train_test_split

def prepare_data(samples, num_of_samples=176400, num_of_common=44100):
    data = []
    for offset in range(0, len(samples), num_of_common):
        start = offset
        end = offset + num_of_samples
        chunk = samples[start:end]
        if len(chunk) == num_of_samples:
            data.append(chunk)
    return data

folder_paths = {
    'car': '/content/drive/MyDrive/sound/car',
    'bike': '/content/drive/MyDrive/sound/bike',
    'bird': '/content/drive/MyDrive/sound/bird',
    'wind': '/content/drive/MyDrive/sound/wind',
    'rain': '/content/drive/MyDrive/sound/rain',
    'crowd': '/content/drive/MyDrive/sound/crowd',
    'chatter': '/content/drive/MyDrive/sound/chatter',
    'park': '/content/drive/MyDrive/sound/park'
}

wav_files = {category: [f for f in os.listdir(folder) if f.endswith('.wav')] for category, folder in folder_paths.items()}

max_files = {
    'car': 150,
    'bike': 1000,
    'bird': 15,
    'wind': 15,
    'rain': 14,
    'crowd': 15,
    'chatter': 12,
    'park': 25
}

categories = {category: [] for category in folder_paths.keys()}

for category, files in wav_files.items():
    files = files[:max_files[category]]
    for file in files:
        file_path = os.path.join(folder_paths[category], file)
        samples, sample_rate = librosa.load(file_path, sr=44100)
        processed_data = prepare_data(samples)
        categories[category].extend(processed_data)

audio = np.concatenate([categories[category] for category in folder_paths.keys()])
labels = np.concatenate([
    np.full(len(categories[category]), idx) for idx, category in enumerate(folder_paths.keys())
])

x_tr, x_val, y_tr, y_val = train_test_split(audio, labels, stratify=labels, test_size=0.1, random_state=777, shuffle=True)

In [None]:
import numpy as np

unique_labels, counts = np.unique(labels, return_counts=True)

class_names = ['car', 'bike', 'bird', 'wind', 'rain', 'crowd', 'chatter', 'park']

print("Number of samples per class:")
for label, count in zip(unique_labels, counts):
    print(f"{class_names[int(label)]}: {count}")


In [4]:
from keras.layers import *
from keras.models import *
from keras.callbacks import *
from keras.utils import to_categorical
from keras import backend as K

def cnn(x_tr, num_classes=3):
    K.clear_session()
    inputs = Input(shape=(x_tr.shape[1], x_tr.shape[2]))

    conv = Conv1D(8, 13, padding='same', activation='relu')(inputs)
    conv = Dropout(0.3)(conv)
    conv = MaxPooling1D(2)(conv)

    conv = Conv1D(16, 11, padding='same', activation='relu')(conv)
    conv = Dropout(0.3)(conv)
    conv = MaxPooling1D(2)(conv)

    conv = GlobalMaxPool1D()(conv)

    conv = Dense(16, activation='relu')(conv)

    outputs = Dense(num_classes, activation='softmax')(conv)

    model = Model(inputs, outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    model_checkpoint = ModelCheckpoint('best_model.hdf5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

    return model, model_checkpoint

In [5]:
from scipy import signal

In [None]:
def log_specgram(audio, sample_rate, eps=1e-10):
   nperseg  = 1764
   noverlap = 441
   freqs, times, spec = signal.spectrogram(audio,fs=sample_rate,
                           nperseg=nperseg,noverlap=noverlap,detrend=False)
   return freqs, times, np.log(spec.T.astype(np.float32) + eps)

def extract_spectrogram_features(x_tr):
 features=[]
 for i in x_tr:
   _, _, spectrogram = log_specgram(i, sample_rate)
   mean = np.mean(spectrogram, axis=0)
   std = np.std(spectrogram, axis=0)
   spectrogram = (spectrogram - mean) / std
   features.append(spectrogram)
 return np.array(features)

x_tr_features  = extract_spectrogram_features(x_tr)
x_val_features = extract_spectrogram_features(x_val)

from keras.utils import to_categorical

y_tr_encoded = to_categorical(y_tr, num_classes=8)
y_val_encoded = to_categorical(y_val, num_classes=8)

model, mc = cnn(x_tr_features, num_classes=len(folder_paths))
history = model.fit(x_tr_features, y_tr_encoded, epochs=20, callbacks=[mc], batch_size=32, validation_data=(x_val_features, y_val_encoded))

In [None]:
sample_rate = 44100
ind = 6
test_audio = x_val[ind]

test_audio_features = extract_spectrogram_features([test_audio])

feature = test_audio_features[0]
prob = model.predict(feature.reshape(1, *feature.shape))

print("Class probabilities:")
for class_name, probability in zip(class_names, prob[0]):
    print(f"{class_name}: {probability:.4f}")

pred_index = np.argmax(prob, axis=1)[0]

predicted_class = class_names[pred_index]

print("\nPrediction:", predicted_class)

In [None]:
from IPython.display import Audio

Audio(data=test_audio, rate=sample_rate)

In [None]:
sample_rate = 44100
ind = 0
test_audio = x_val[ind]

test_audio_features = extract_spectrogram_features([test_audio])

feature = test_audio_features[0]
prob = model.predict(feature.reshape(1, *feature.shape))

print("Class probabilities:")
for class_name, probability in zip(class_names, prob[0]):
    print(f"{class_name}: {probability:.4f}")

pred_index = np.argmax(prob, axis=1)[0]

predicted_class = class_names[pred_index]

print("\nPrediction:", predicted_class)

In [None]:
Audio(data=test_audio, rate=sample_rate)

In [11]:
import tensorflow as tf

new_model = tf.keras.models.load_model("best_model.hdf5")
converter = tf.lite.TFLiteConverter.from_keras_model(new_model)
tflite_model = converter.convert()
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)