In [2]:
import librosa
import librosa.display
import numpy as np
import tensorflow
import matplotlib.pyplot as plt
#%pip install noisereduce
import noisereduce as nr
import soundfile as sf
import os
import pandas as pd
import time
from IPython.display import clear_output
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Flatten, TimeDistributed, Reshape
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

ModuleNotFoundError: No module named 'matplotlib'

In [2]:
def extract_spectrogram(file_path, duration = 4): 
    #cargar audios
    Audio, Sample = librosa.load(file_path)
    numMuestras = duration*Sample
    #gggg
    #ajuste la duracion 
    if len(Audio)>numMuestras:
        Audio = Audio[:Sample*4]
    else:
        silence = np.zeros(numMuestras-len(Audio))
        Audio = np.concatenate([Audio,silence])
    
    #guardado y generacion de espectograma
    spect = librosa.stft(Audio)
    spectInDb = librosa.amplitude_to_db(spect)
    return spectInDb

def importAudios(audio_path, metadata, features, labels, top = 2999):
    #inicializo variables para contabilizar el numero de audios
    numGun=0
    numNoGun=0
    
    for index, row in metadata.iterrows():
        file_name = os.path.join(audio_path, f"fold{row['fold']}", row['slice_file_name'])
    
        class_id = row['classID']
        # Etiquetar como '1' si classID es 6, de lo contrario como '0'
        class_label = 1.0 if class_id == 6 else 0.0
        if numGun>top: 
            print("skip")
            break
        if numNoGun>top and class_label != 1.0:
            continue
        numGun, numNoGun = numGun + (class_label == 1), numNoGun + (class_label != 1)
        # Extraer características y etiquetas
        data = extract_spectrogram(file_name) 
        features.append(data)
        labels.append(class_label) 
    print(f"lungitud: {len(features)}")
    print(f"num G:{numGun}, num no G:{numNoGun}")
    return features, labels

def extract_properties(file_path):
    audio, dsfds = librosa.load(file_path)
    spect = librosa.stft(audio)
    spect_db = librosa.amplitude_to_db(spect, ref=np.max)
    n_bands, n_frames = spect_db.shape
    
    return n_bands, n_frames

In [None]:
metadata = pd.read_csv('D:/Workspace/tesis/audioMetadata/archive/UrbanSound8K2.csv')
audio_path = 'D:/Workspace/tesis/audioMetadata/archive'
print(metadata.head())

In [None]:
features = []
labels = []
features, labels = importAudios(audio_path, metadata, features, labels)
n_bands, n_frames = extract_properties(audio_path +"/fold11/0a07b229-7d2b-4d2b-8f32-c94cbc7b1487_chan0_v0.wav")
print(f"{n_bands}, { n_frames}")

In [None]:
print(f"{features[1].shape}")
all_same_shape = all(f.shape == (1025, 173) for f in features)
print(f"All arrays have the shape (1025, 173): {all_same_shape}")

In [6]:
# Convertir las listas a arrays de numpy 
X = np.array(features)
y = np.array(labels)

In [None]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(len(X_train), len(X_test))
print(len(y_train), len(y_test))

In [None]:
# Crear el modelo CRNN
model = Sequential()

# Parte CNN
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(1025, 173, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(1, 2)))
print(model.layers[-1].output)
model.add(Flatten())
# Capa de salida
print(model.layers[-1].output)
model.add(Dense(1, activation='sigmoid'))
# Compilar el modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print("ok")
print(model.summary())

In [None]:
# Entrenar el modelo
model.fit(X_train, y_train, epochs=5, batch_size=10)

# Guardar el modelo entrenado
model.save('gunshot_detection_model_NEW.h5')

# Evaluar el modelo en el conjunto de prueba
score = model.evaluate(X_test, y_test, verbose=0)
print(f'Accuracy: {score[1]}')

# Predicciones en el conjunto de prueba
y_pred = (model.predict(X_test) > 0.5).astype("int32")

In [None]:
# Generar la matriz de confusión
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=['not_gun_shot', 'gun_shot'], columns=['not_gun_shot', 'gun_shot'])

# Visualizar la matriz de confusión
plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Reporte de clasificación
report = classification_report(y_test, y_pred, target_names=['not_gun_shot', 'gun_shot'])
print(report)