# Desafio 1: Classificação de Áudio com CNN e MFCC

Este notebook contém a implementação completa para o **Desafio 1**, focado na classificação de sons ambientais do dataset **ESC-10** utilizando uma **Rede Neural Convolucional (CNN)**.

## 1. Configuração do Ambiente e Importações

In [None]:
import numpy as np
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import kagglehub

# Configurações de áudio
Fs = 44100
n_mfcc = 40
max_len = 431
classes_names = ["chainsaw", "crackling_fire", "dog", "rain", "sea_waves", "clock_tick", "crying_baby", "helicopter", "rooster", "sneezing"]

## 2. Carregamento e Processamento dos Dados
Extraímos os MFCCs mantendo a estrutura 2D para que a CNN possa processá-los como imagens.

In [None]:
def extract_mfcc_2d(file_path, n_mfcc=40, max_len=431):
    audio, sr = librosa.load(file_path, sr=Fs)
    target_length = 5 * Fs
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    if mfcc.shape[1] < max_len:
        mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])))
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc

# Download do dataset
path = kagglehub.dataset_download("sreyareddy15/esc10rearranged")
base_path = os.path.join(path, "Data")

X, y = [], []
for label in classes_names:
    folder_path = os.path.join(base_path, label)
    files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]
    for f in files:
        mfcc = extract_mfcc_2d(os.path.join(folder_path, f))
        if mfcc is not None:
            X.append(mfcc)
            y.append(label)

X = np.array(X)[..., np.newaxis]
le = LabelEncoder()
y_encoded = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

## 3. Arquitetura da Rede Neural Convolucional (CNN)

In [None]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(n_mfcc, max_len, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(classes_names), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

## 4. Treinamento e Visualização dos Gráficos

In [None]:
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Plotagem dos Gráficos
plt.figure(figsize=(14, 5))

# Gráfico de Acurácia
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Treino', color='blue')
plt.plot(history.history['val_accuracy'], label='Validação', color='orange')
plt.title('Acurácia do Modelo')
plt.xlabel('Época')
plt.ylabel('Acurácia')
plt.legend()
plt.grid(True)

# Gráfico de Perda (Loss)
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Treino', color='blue')
plt.plot(history.history['val_loss'], label='Validação', color='orange')
plt.title('Perda do Modelo')
plt.xlabel('Época')
plt.ylabel('Perda')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()