# Importación de librerías

In [7]:
import pandas as pd
import numpy as np

import os
import librosa
import librosa.display

import matplotlib.pyplot as plt
import seaborn as sns


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Lectura del dataset

In [9]:
urbansound8k_path = "../../data/raw/UrbanSound8K/"

# Primer dataset - Carpeta con los audios
urbansound8k_audio_path = "audio/"

# Segundo dataset - Archivo CSV con metadatos sobre los audios
urbansound8k_metadata_path_file = "metadata/UrbanSound8K.csv"

# Dataset CSV con metadatos generados a partir de los audios
dataset_urbansound8k_path_file = "../../data/processed/metadata_urbansound8k_adic.csv"

dataset_urbansound8k_df = pd.read_csv("../../data/processed/metadata_urbansound8k_adic.csv", sep=";")

dataset_urbansound8k_df.head(3)

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class,path,duracion,tasa_muestreo,desplazamiento_dc,cantidad_canales,rango_dinamico,energia,tasa_cruce_cero,entropia_espectograma,avg_centroide_espectral,avg_tonalidad
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark,../../data/raw/UrbanSound8K/audio/fold5/100032...,0.317551,44100,5e-06,2,1.966949,757.0395,1947,6.716659,2503.702337,0
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing,../../data/raw/UrbanSound8K/audio/fold5/100263...,4.0,44100,6.7e-05,2,0.077789,9.032963,27607,9.991457,2339.221804,0
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing,../../data/raw/UrbanSound8K/audio/fold5/100263...,4.0,44100,9.7e-05,2,0.045868,4.803137,20971,9.728703,2070.826863,0


# Extraccion caracteristicas

In [10]:
def extract_mfcc(file_path, n_mfcc=13):
    signal, sr = librosa.load(file_path, sr=None, mono=True)  # Cargar el archivo de audio
    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc)  # Extraer MFCC
    mfccs = np.mean(mfccs.T, axis=0)  # Tomar el promedio de los coeficientes MFCC en el tiempo
    return mfccs

In [11]:
# Extraer MFCCs para todos los archivos
mfcc_features = []
labels = []
for index, row in dataset_urbansound8k_df.iterrows():
    mfcc = extract_mfcc(row['path'])
    mfcc_features.append(mfcc)
    labels.append(row['classID'])

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


In [12]:
# Convertir a numpy arrays
X = np.array(mfcc_features)
y = np.array(labels)


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelos de Clasificacion

## > KNeighborsClassifier

In [14]:
# Crear el modelo KNN
knn = KNeighborsClassifier(n_neighbors=3)

# Entrenar el modelo
knn.fit(X_train, y_train)

# Realizar predicciones
y_pred = knn.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8586147681740126
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.93      0.89       203
           1       0.75      0.73      0.74        86
           2       0.74      0.87      0.80       183
           3       0.90      0.80      0.85       201
           4       0.92      0.81      0.86       206
           5       0.89      0.93      0.91       193
           6       0.75      0.82      0.78        72
           7       0.90      0.95      0.93       208
           8       0.90      0.94      0.92       165
           9       0.87      0.74      0.80       230

    accuracy                           0.86      1747
   macro avg       0.85      0.85      0.85      1747
weighted avg       0.86      0.86      0.86      1747

