In [None]:
!unzip audios-20250626T230813Z-1-001.zip -d audios

In [None]:
!mkdir working
!mkdir working/audio-images

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os, wave, pylab, itertools
from pathlib import Path
from scipy import signal
from scipy.io import wavfile
from sklearn.metrics import confusion_matrix

# setando os caminhos
INPUT_DIR = 'audios/'
OUTPUT_DIR = 'working/'

# Print names of 10 WAV files from the input path
parent_list = os.listdir(INPUT_DIR)
for i in range(10):
    print(parent_list[i])

In [None]:
for i in range(5):
    signal_wave = wave.open(os.path.join(INPUT_DIR, parent_list[i]), 'r')
    sample_rate = 16000
    sig = np.frombuffer(signal_wave.readframes(sample_rate), dtype=np.int16)

    plt.figure(figsize=(12,12))
    plot_a = plt.subplot(211)
    plot_a.set_title(parent_list[i])
    plot_a.plot(sig)
    plot_a.set_xlabel('taxa do exemplo * tempo')
    plot_a.set_ylabel('energy')

    plot_b = plt.subplot(212)
    plot_b.specgram(sig, NFFT=1024, Fs=sample_rate, noverlap=900)
    plot_b.set_xlabel('Time')
    plot_b.set_ylabel('Frequency')

plt.show()

In [None]:
# função de utilidade para obter informações de som e taxa de quadros
def get_wav_info(wav_file):
    wav = wave.open(wav_file, 'r')
    frames = wav.readframes(-1)
    sound_info = pylab.frombuffer(frames, 'int16')
    frame_rate = wav.getframerate()
    wav.close()
    return sound_info, frame_rate

# para todo audio, vamos fazer o espectrograma e salvar com a label que ele pertence
if not os.path.exists(os.path.join(OUTPUT_DIR, 'audio-images')):
    os.mkdir(os.path.join(OUTPUT_DIR, 'audio-images'))

for filename in os.listdir(INPUT_DIR):
    if "wav" in filename:
        file_path = os.path.join(INPUT_DIR, filename)
        file_stem = Path(file_path).stem
        target_dir = f'class_{file_stem[0]}'
        dist_dir = os.path.join(os.path.join(OUTPUT_DIR, 'audio-images'), target_dir)
        file_dist_path = os.path.join(dist_dir, file_stem)
        if not os.path.exists(file_dist_path + '.png'):
            if not os.path.exists(dist_dir):
                os.mkdir(dist_dir)
            file_stem = Path(file_path).stem
            sound_info, frame_rate = get_wav_info(file_path)
            pylab.specgram(sound_info, Fs=frame_rate)
            pylab.savefig(f'{file_dist_path}.png')
            pylab.close()

# printar as 10 classes que tem no data
path_list = os.listdir(os.path.join(OUTPUT_DIR, 'audio-images'))
print("Classes: \n")
for i in range(10):
    print(path_list[i])

# nome dos arquivos para a classe 1
path_list = os.listdir(os.path.join(OUTPUT_DIR, 'audio-images/class_1'))
print("\nA few example files: \n")
for i in range(10):
    print(path_list[i])

In [None]:
# constantes
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
BATCH_SIZE = 32
N_CHANNELS = 3
N_CLASSES = 10

# criando um dataset contendo o espectrograma de treino
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
                                             batch_size=BATCH_SIZE,
                                             validation_split=0.2,
                                             directory=os.path.join(OUTPUT_DIR, 'audio-images'),
                                             shuffle=True,
                                             color_mode='rgb',
                                             image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
                                             subset="training",
                                             seed=0)

# criando um dataset contendo o espectrograma de validação
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
                                             batch_size=BATCH_SIZE,
                                             validation_split=0.2,
                                             directory=os.path.join(OUTPUT_DIR, 'audio-images'),
                                             shuffle=True,
                                             color_mode='rgb',
                                             image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
                                             subset="validation",
                                             seed=0)

In [None]:
# gerando rapida visualização dos dados
plt.figure(figsize=(12, 12))
for images, labels in train_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")
plt.show()

antes de podermos construir nosso modelo e começar o treinamento, nós precisamos aplicar um simples augmentation. Nós vamos reecriar a escala do nosso input de (0, 255) para ser (0, 1)

In [None]:
# Function to prepare our datasets for modelling
def prepare(ds, augment=False):
    # Define our one transformation
    rescale = tf.keras.Sequential([tf.keras.layers.Rescaling(1./255)])
    flip_and_rotate = tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.RandomRotation(0.2)
    ])

    # Apply rescale to both datasets and augmentation only to training
    ds = ds.map(lambda x, y: (rescale(x, training=True), y))
    if augment: ds = ds.map(lambda x, y: (flip_and_rotate(x, training=True), y))
    return ds

train_dataset = prepare(train_dataset, augment=False)
valid_dataset = prepare(valid_dataset, augment=False)

In [None]:
# criando a CNN com o keras para ser rapido
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, N_CHANNELS)))
model.add(tf.keras.layers.Conv2D(32, 3, strides=2, padding='same', activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(N_CLASSES, activation='softmax'))

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.RMSprop(),
    metrics=['accuracy'],
)

# Train model for 10 epochs, capture the history
history = model.fit(train_dataset, epochs=10, validation_data=valid_dataset)

In [None]:
# Plot the loss curves for training and validation.
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values)+1)

plt.figure(figsize=(8,6))
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Plot the accuracy curves for training and validation.
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(acc_values)+1)

plt.figure(figsize=(8,6))
plt.plot(epochs, acc_values, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc_values, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Compute the final loss and accuracy
final_loss, final_acc = model.evaluate(valid_dataset, verbose=0)
print("Final loss: {0:.6f}, final accuracy: {1:.6f}".format(final_loss, final_acc))

In [None]:
predictions = model.predict(valid_dataset)
predicted_classes = np.argmax(predictions, axis=1)

# Obter as labels verdadeiras
true_labels = []
for _, labels in valid_dataset:
    true_labels.extend(labels.numpy())
true_labels = np.array(true_labels)

# Criar matriz de confusão
cm = confusion_matrix(true_labels, predicted_classes)

plt.figure(figsize=(10, 8))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Matriz de Confusão')
plt.colorbar()
tick_marks = np.arange(N_CLASSES)
plt.xticks(tick_marks, [f'Classe {i}' for i in range(N_CLASSES)], rotation=45)
plt.yticks(tick_marks, [f'Classe {i}' for i in range(N_CLASSES)])
plt.ylabel('Classe Verdadeira')
plt.xlabel('Classe Predita')

# Adicionar valores nas células
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], 'd'),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()
plt.show()

In [None]:
# 2. Gráfico combinado de Loss e Accuracy
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss
ax1.plot(epochs, loss_values, 'bo-', label='Training loss', linewidth=2)
ax1.plot(epochs, val_loss_values, 'ro-', label='Validation loss', linewidth=2)
ax1.set_title('Training and Validation Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Accuracy
ax2.plot(epochs, acc_values, 'bo-', label='Training accuracy', linewidth=2)
ax2.plot(epochs, val_acc_values, 'ro-', label='Validation accuracy', linewidth=2)
ax2.set_title('Training and Validation Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# 3. Distribuição de Confiança das Predições
confidence_scores = np.max(predictions, axis=1)

plt.figure(figsize=(10, 6))
plt.hist(confidence_scores, bins=30, alpha=0.7, color='skyblue', edgecolor='black')
plt.title('Distribuição de Confiança das Predições')
plt.xlabel('Score de Confiança')
plt.ylabel('Frequência')
plt.grid(True, alpha=0.3)
plt.axvline(np.mean(confidence_scores), color='red', linestyle='--',
           label=f'Média: {np.mean(confidence_scores):.3f}')
plt.legend()
plt.show()

In [None]:
class_accuracy = []
for i in range(N_CLASSES):
    class_mask = (true_labels == i)
    class_pred = predicted_classes[class_mask]
    class_true = true_labels[class_mask]
    if len(class_true) > 0:
        accuracy = np.mean(class_pred == class_true)
        class_accuracy.append(accuracy)
    else:
        class_accuracy.append(0)

plt.figure(figsize=(10, 6))
bars = plt.bar(range(N_CLASSES), class_accuracy, color='lightgreen', alpha=0.7)
plt.title('Acurácia por Classe')
plt.xlabel('Classe')
plt.ylabel('Acurácia')
plt.xticks(range(N_CLASSES), [f'Classe {i}' for i in range(N_CLASSES)])
plt.grid(True, alpha=0.3, axis='y')

# Adicionar valores nas barras
for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
             f'{height:.3f}', ha='center', va='bottom')

plt.ylim(0, 1.1)
plt.show()

In [None]:
# 5. Heatmap de Precisão, Recall e F1-Score
from sklearn.metrics import classification_report
import seaborn as sns

# Gerar relatório de classificação
report = classification_report(true_labels, predicted_classes,
                             target_names=[f'Classe {i}' for i in range(N_CLASSES)],
                             output_dict=True)

# Extrair métricas
metrics_df = pd.DataFrame(report).transpose()
metrics_df = metrics_df.drop(['accuracy', 'macro avg', 'weighted avg'])
metrics_df = metrics_df[['precision', 'recall', 'f1-score']]

plt.figure(figsize=(8, 6))
sns.heatmap(metrics_df, annot=True, cmap='YlOrRd', fmt='.3f', cbar_kws={'label': 'Score'})
plt.title('Métricas por Classe (Precision, Recall, F1-Score)')
plt.ylabel('Classes')
plt.xlabel('Métricas')
plt.tight_layout()
plt.show()

In [None]:
# 6. Resumo das Métricas
print("\n" + "="*50)
print("RESUMO DAS MÉTRICAS DO MODELO")
print("="*50)
print(f"Acurácia Final: {final_acc:.4f}")
print(f"Loss Final: {final_loss:.4f}")
print(f"Confiança Média: {np.mean(confidence_scores):.4f}")
print(f"Melhor Época (Validation Accuracy): {np.argmax(val_acc_values) + 1}")
print(f"Melhor Validation Accuracy: {np.max(val_acc_values):.4f}")
print(f"Overfitting Score: {max(acc_values) - max(val_acc_values):.4f}")
print("="*50)

## YOLO

In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [None]:
!ls /gdrive/MyDrive/

In [None]:
import gdown

file_id = '1q088GgS16uhe5B4o8oFwfWuH9uzhnaWE'
url = f'https://drive.google.com/drive/folders/1q088GgS16uhe5B4o8oFwfWuH9uzhnaWE?usp=sharing'

gdown.download_folder(url, quiet=True, use_cookies=False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import glob
import shutil
import random
from pathlib import Path

INPUT_IMAGES = "/content/drive/MyDrive/working/audio-images"
DATASET_DIR = "dataset"
CLASSES = sorted(os.listdir(INPUT_IMAGES))  # Ex: ['class_0', 'class_1', ..., 'class_9']

# Criar estrutura de diretórios
for split in ['train', 'val']:
    os.makedirs(f"{DATASET_DIR}/images/{split}", exist_ok=True)
    os.makedirs(f"{DATASET_DIR}/labels/{split}", exist_ok=True)

# Divisão e criação dos arquivos
for i, class_dir in enumerate(CLASSES):
    img_paths = glob.glob(os.path.join(INPUT_IMAGES, class_dir, '*.png'))
    random.shuffle(img_paths)
    split_point = int(0.8 * len(img_paths))
    train_imgs, val_imgs = img_paths[:split_point], img_paths[split_point:]

    for split, imgs in zip(['train', 'val'], [train_imgs, val_imgs]):
        for img_path in imgs:
            # Copiar imagem
            img_name = Path(img_path).name
            dst_img_path = f"{DATASET_DIR}/images/{split}/{img_name}"
            shutil.copyfile(img_path, dst_img_path)

            # Criar label ocupando toda a imagem (classe_id x_center y_center width height)
            label_name = img_name.replace(".png", ".txt")
            with open(f"{DATASET_DIR}/labels/{split}/{label_name}", "w") as f:
                f.write(f"{i} 0.5 0.5 1.0 1.0\n")  # ocupa toda a imagem


In [None]:
%%writefile dataset/dataset.yaml
path: ../dataset
train: images/train
val: images/val
nc: 10
names:
  - class_0
  - class_1
  - class_2
  - class_3
  - class_4
  - class_5
  - class_6
  - class_7
  - class_8
  - class_9


In [None]:
!pip install ultralytics

In [None]:
import torch

# Verificar se CUDA (GPU) está disponível
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

In [None]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')

model.to(device)

history = model.train(
    data='dataset/dataset.yaml',
    epochs=10,
    imgsz=256,
    batch=16,
    device=device,
)

In [None]:
import matplotlib.pyplot as plt

# Dados de treinamento (substitua os valores pelos dados reais do seu treino)
epochs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Métricas durante as épocas
box_loss = [0.272, 0.1832, 0.1644, 0.1511, 0.1324, 0.1147, 0.101, 0.08938, 0.08272, 0.07154]
cls_loss = [2.723, 1.873, 1.411, 1.054, 0.8269, 0.69, 0.5685, 0.4876, 0.3936, 0.3545]
dfl_loss = [0.9615, 0.9175, 0.8979, 0.8855, 0.8696, 0.8736, 0.8697, 0.8624, 0.859, 0.8567]
precision = [0.138, 0.365, 0.593, 0.865, 0.814, 0.93, 0.945, 0.972, 0.983, 0.975]
recall = [0.757, 0.589, 0.698, 0.744, 0.868, 0.915, 0.948, 0.963, 0.982, 0.99]
map50 = [0.227, 0.384, 0.72, 0.894, 0.928, 0.982, 0.988, 0.988, 0.994, 0.994]
map50_95 = [0.227, 0.382, 0.702, 0.889, 0.925, 0.981, 0.988, 0.987, 0.993, 0.994]

# Plotando os gráficos

fig, axs = plt.subplots(3, 2, figsize=(14, 10))

# Box loss
axs[0, 0].plot(epochs, box_loss, marker='o', color='b', label='Box Loss')
axs[0, 0].set_title('Box Loss')
axs[0, 0].set_xlabel('Epochs')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].grid(True)

# Class loss
axs[0, 1].plot(epochs, cls_loss, marker='o', color='r', label='Class Loss')
axs[0, 1].set_title('Class Loss')
axs[0, 1].set_xlabel('Epochs')
axs[0, 1].set_ylabel('Loss')
axs[0, 1].grid(True)

# DFL loss
axs[1, 0].plot(epochs, dfl_loss, marker='o', color='g', label='DFL Loss')
axs[1, 0].set_title('DFL Loss')
axs[1, 0].set_xlabel('Epochs')
axs[1, 0].set_ylabel('Loss')
axs[1, 0].grid(True)

# Precision
axs[1, 1].plot(epochs, precision, marker='o', color='c', label='Precision')
axs[1, 1].set_title('Precision')
axs[1, 1].set_xlabel('Epochs')
axs[1, 1].set_ylabel('Precision')
axs[1, 1].grid(True)

# Recall
axs[2, 0].plot(epochs, recall, marker='o', color='m', label='Recall')
axs[2, 0].set_title('Recall')
axs[2, 0].set_xlabel('Epochs')
axs[2, 0].set_ylabel('Recall')
axs[2, 0].grid(True)

# mAP50
axs[2, 1].plot(epochs, map50, marker='o', color='y', label='mAP50')
axs[2, 1].set_title('mAP50')
axs[2, 1].set_xlabel('Epochs')
axs[2, 1].set_ylabel('mAP50')
axs[2, 1].grid(True)

# Ajustar layout
plt.tight_layout()
plt.show()


In [None]:
import shutil

# Caminho da pasta no Google Drive
drive_path = '/content/drive/MyDrive/working/'

# Copiar a pasta 'working' para o Google Drive
shutil.copytree('working', drive_path)

In [None]:
!mkdir /content/drive/MyDrive/computer-vision

In [None]:
# Salvar os pesos do modelo no Google Drive
model.save('/content/drive/MyDrive/computer-vision/model_weights.h5')

In [None]:
history.

In [None]:
history.to_df()

In [None]:
import matplotlib.pyplot as plt

# Gerar gráficos de loss
history_dict = history
loss_values = history_dict.loss
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)

# Salvar gráfico de loss
plt.figure(figsize=(8, 6))
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('/content/drive/MyDrive/working/loss_graph.png')  # Salvar no Google Drive
plt.close()

# Gerar gráfico de accuracy
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']

# Salvar gráfico de accuracy
plt.figure(figsize=(8, 6))
plt.plot(epochs, acc_values, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc_values, 'b', label='Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('/content/drive/MyDrive/working/accuracy_graph.png')  # Salvar no Google Drive
plt.close()

In [None]:
model.info()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Dados fornecidos
metrics = {
    'Precision': np.array([0.9983677553068719] * 10),
    'Recall': np.array([0.9969186941123175] * 10),
    'mAP50': np.array([0.9949999999999999] * 10),
    'mAP50-95': np.array([0.995] * 10)
}

# Chaves das classes
class_names = ['class_0', 'class_1', 'class_2', 'class_3', 'class_4', 'class_5', 'class_6', 'class_7', 'class_8', 'class_9']

# Fitness
fitness = 0.995

# Plotting Precision, Recall, mAP50, mAP50-95 por classe
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Precision
axes[0, 0].bar(class_names, metrics['Precision'], color='b')
axes[0, 0].set_title('Precision per Class')
axes[0, 0].set_xlabel('Classes')
axes[0, 0].set_ylabel('Precision')
axes[0, 0].tick_params(axis='x', rotation=90)

# Recall
axes[0, 1].bar(class_names, metrics['Recall'], color='g')
axes[0, 1].set_title('Recall per Class')
axes[0, 1].set_xlabel('Classes')
axes[0, 1].set_ylabel('Recall')
axes[0, 1].tick_params(axis='x', rotation=90)

# mAP50
axes[1, 0].bar(class_names, metrics['mAP50'], color='r')
axes[1, 0].set_title('mAP50 per Class')
axes[1, 0].set_xlabel('Classes')
axes[1, 0].set_ylabel('mAP50')
axes[1, 0].tick_params(axis='x', rotation=90)

# mAP50-95
axes[1, 1].bar(class_names, metrics['mAP50-95'], color='c')
axes[1, 1].set_title('mAP50-95 per Class')
axes[1, 1].set_xlabel('Classes')
axes[1, 1].set_ylabel('mAP50-95')
axes[1, 1].tick_params(axis='x', rotation=90)

# Ajuste de layout
plt.tight_layout()
plt.show()

# Plotting Fitness
plt.figure(figsize=(8, 6))
plt.bar(['Fitness'], [fitness], color='purple')
plt.title('Fitness Metric')
plt.ylabel('Fitness')
plt.show()
