# Desafio 1: Classifica√ß√£o de √Åudio com CNN Otimizada

Este notebook apresenta a solu√ß√£o completa e otimizada para o **Desafio 1**, utilizando uma **Rede Neural Convolucional (CNN)** com **Batch Normalization** e t√©cnicas de produ√ß√£o para classificar sons do dataset **ESC-10**.

## üöÄ Melhorias Implementadas:
1.  **Batch Normalization**: Estabiliza o treinamento e acelera a converg√™ncia.
2.  **Callbacks de Produ√ß√£o**: 
    *   `EarlyStopping`: Evita overfitting interrompendo o treino no momento ideal.
    *   `ReduceLROnPlateau`: Ajusta a taxa de aprendizado para um ajuste fino nos pesos.
    *   `ModelCheckpoint`: Garante que a melhor vers√£o do modelo seja salva.
3.  **Arquitetura Robusta**: Camadas convolucionais com padding 'same' e maior capacidade nas camadas densas.

### 1. Importa√ß√µes e Configura√ß√µes

In [None]:
import numpy as np
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import kagglehub

# Configura√ß√µes Globais
FS = 44100
N_MFCC = 40
MAX_LEN = 431
CLASSES = ["chainsaw", "crackling_fire", "dog", "rain", "sea_waves", 
           "clock_tick", "crying_baby", "helicopter", "rooster", "sneezing"]

### 2. Processamento de Dados
Extra√ß√£o de MFCCs 2D com normaliza√ß√£o de tempo.

In [None]:
def extract_features(file_path):
    try:
        audio, _ = librosa.load(file_path, sr=FS)
        target_len = 5 * FS
        # Pad ou truncar para 5 segundos
        audio = np.pad(audio, (0, target_len - len(audio))) if len(audio) < target_len else audio[:target_len]
        mfcc = librosa.feature.mfcc(y=audio, sr=FS, n_mfcc=N_MFCC)
        # Garantir shape fixo (N_MFCC, MAX_LEN)
        if mfcc.shape[1] < MAX_LEN:
            mfcc = np.pad(mfcc, ((0, 0), (0, MAX_LEN - mfcc.shape[1])))
        else:
            mfcc = mfcc[:, :MAX_LEN]
        return mfcc
    except Exception as e:
        print(f"Erro ao processar {file_path}: {e}")
        return None

# Download do dataset
path = kagglehub.dataset_download("sreyareddy15/esc10rearranged")
base_path = os.path.join(path, "Data")

X, y = [], []
for label in CLASSES:
    folder_path = os.path.join(base_path, label)
    files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]
    for f in files:
        mfcc = extract_features(os.path.join(folder_path, f))
        if mfcc is not None:
            X.append(mfcc)
            y.append(label)

X = np.array(X)[..., np.newaxis]
le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, 
                                                    random_state=42, stratify=y_encoded)
print(f"Dados carregados: {X.shape[0]} amostras.")

### 3. Defini√ß√£o do Modelo Otimizado

In [None]:
def build_cnn(input_shape, num_classes):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        
        layers.Conv2D(32, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(64, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(128, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        
        layers.Flatten(),
        layers.Dense(256),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

model = build_cnn((N_MFCC, MAX_LEN, 1), len(CLASSES))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

### 4. Treinamento com Callbacks

In [None]:
my_callbacks = [
    callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5),
    callbacks.ModelCheckpoint('best_model.keras', save_best_only=True)
]

history = model.fit(X_train, y_train, epochs=50, batch_size=32, 
                    validation_data=(X_test, y_test), callbacks=my_callbacks)

### 5. Avalia√ß√£o e Gr√°ficos

In [None]:
# Plotagem dos Gr√°ficos
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Treino')
plt.plot(history.history['val_accuracy'], label='Valida√ß√£o')
plt.title('Acur√°cia do Modelo')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Treino')
plt.plot(history.history['val_loss'], label='Valida√ß√£o')
plt.title('Perda do Modelo')
plt.legend()
plt.grid(True)

plt.show()

# Relat√≥rio Final
y_pred = np.argmax(model.predict(X_test), axis=1)
print("\n--- Relat√≥rio de Classifica√ß√£o ---")
print(classification_report(y_test, y_pred, target_names=CLASSES))