In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import os
from collections import Counter


import sounddevice as sd
import soundfile as sf



In [2]:
from dataset_preprocessing.audio_dataset_integration import AudioDatasetIntegrator


def audio_integrator():
    datasets_path = [
        ("Dataset_teste", "../Dataset_teste"),
    ]

    for dataset_name, dataset_path in datasets_path:
        output_path = f"h5_files/{dataset_name}_dataset.h5"
        print(f"Processing {dataset_name} dataset")
        integrator = AudioDatasetIntegrator(dataset_path, output_path)
        integrator.process_audio_files()
        integrator.annotate_audio_files()
        print(f"{dataset_name} dataset processed and annotated")


In [3]:
from dataset_preprocessing.audio_feature_extractor import AudioFeatureExtractor


def audio_extractor():
    datasets_path = [
        ("Dataset_teste", "h5_files/Dataset_teste_dataset.h5"),
    ]

    for dataset_name, dataset_path in datasets_path:
        output_path = f"h5_files/{dataset_name}_feature.h5"
        print(f"Processing {dataset_name} dataset")
        feature_extractor = AudioFeatureExtractor(dataset_path, output_path)
        feature_extractor.extract_features(n_fft=640)
        print(f"{dataset_name} dataset processed and annotated")



In [4]:


def gravar_audio(path, duracao):
    # Configuração do dispositivo de áudio
    samplerate = 44100  # Taxa de amostragem em Hz
    channels = 1       # Número de canais de áudio (estéreo)

    # Gravação do áudio
    print(f"Gravando áudio por {duracao} segundos...")
    gravacao = sd.rec(int(duracao * samplerate), samplerate=samplerate, channels=channels)
    sd.wait()  # Aguarda a gravação ser concluída

    # Salvando o arquivo de áudio
    sf.write(path, gravacao, samplerate)

    print(f"Áudio gravado e salvo em: {path}")



In [5]:
# Exemplo de uso
caminho_arquivo = "../Dataset_teste/teste.wav"
duracao_gravacao = 10  # Duração em segundos

gravar_audio(caminho_arquivo, duracao_gravacao)


Gravando áudio por 10 segundos...
Áudio gravado e salvo em: ../Dataset_teste/teste.wav


In [6]:
audio_integrator()
audio_extractor()

Processing Dataset_teste dataset


Walking through directories: 1it [00:02,  2.10s/it]
Annotating audio files: 100%|██████████| 1/1 [00:00<00:00, 1002.94it/s]


Dataset_teste dataset processed and annotated
Processing Dataset_teste dataset


Extracting features: 100%|██████████| 1/1 [00:00<00:00, 13.47it/s]

Dataset_teste dataset processed and annotated





In [7]:
import h5py

file_path = "h5_files/Dataset_teste_feature.h5"
with h5py.File(file_path, "r") as hf:
    inputs = hf["features"]['0'][()]
    inputs = torch.tensor(inputs, dtype=torch.float).reshape(1,1,251,29)
    print(inputs.shape)
    

torch.Size([1, 1, 251, 29])


In [10]:
annotation_dict = {
    0: "Nada",
    1: "Grito",
    2: "Violência Física",
    3: "Tiro",

}

num_classes = 4

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model_folder_path = "models/modelos_finais"

def load_model(model_path):
    model = models.resnet101()
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)  # ajustar a última camada para 6 classes
    model.conv1 = nn.Conv2d(
        1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
    )  # ajustar a primeira camada para 1 canal
    model = model.to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

models_list = [load_model(os.path.join(model_folder_path, model_name)) for model_name in os.listdir(model_folder_path) if model_name.endswith('.pth')]

preds_all = []

with torch.no_grad():
    inputs = inputs.to(device)
    
    for model in models_list:
        outputs = model(inputs)
        probabilities = F.softmax(outputs, dim=1)
        _, preds = torch.max(probabilities, 1)
        preds_all.append(preds.cpu().numpy()[0])
    
    final_pred = max(set(preds_all), key = preds_all.count) # class with max votes

    print('Final prediction:', annotation_dict[final_pred])

    # Count of votes for each class
    count_votes = Counter(preds_all)

    for class_index in annotation_dict:
        print(f"Votes for class {annotation_dict[class_index]}: {count_votes[class_index]}")

cuda
Final prediction: Nada
Votes for class Nada: 2
Votes for class Grito: 2
Votes for class Violência Física: 0
Votes for class Tiro: 1
