In [1]:
import torch
import torchvision.models as models

In [2]:
from dataset_preprocessing.audio_dataset_integration import AudioDatasetIntegrator


def audio_integrator():
    datasets_path = [
        ("Dataset_teste", "../Dataset_teste"),
    ]

    for dataset_name, dataset_path in datasets_path:
        output_path = f"h5_files/{dataset_name}_dataset.h5"
        print(f"Processing {dataset_name} dataset")
        integrator = AudioDatasetIntegrator(dataset_path, output_path)
        integrator.process_audio_files()
        integrator.annotate_audio_files()
        print(f"{dataset_name} dataset processed and annotated")


In [3]:
from dataset_preprocessing.audio_feature_extractor import AudioFeatureExtractor


def audio_extractor():
    datasets_path = [
        ("Dataset_teste", "h5_files/Dataset_teste_dataset.h5"),
    ]

    for dataset_name, dataset_path in datasets_path:
        output_path = f"h5_files/{dataset_name}_feature.h5"
        print(f"Processing {dataset_name} dataset")
        feature_extractor = AudioFeatureExtractor(dataset_path, output_path)
        feature_extractor.extract_features(n_fft=640)
        print(f"{dataset_name} dataset processed and annotated")



In [4]:
import sounddevice as sd
import soundfile as sf


def gravar_audio(path, duracao):
    # Configuração do dispositivo de áudio
    samplerate = 44100  # Taxa de amostragem em Hz
    channels = 1       # Número de canais de áudio (estéreo)

    # Gravação do áudio
    print(f"Gravando áudio por {duracao} segundos...")
    gravacao = sd.rec(int(duracao * samplerate), samplerate=samplerate, channels=channels)
    sd.wait()  # Aguarda a gravação ser concluída

    # Salvando o arquivo de áudio
    sf.write(path, gravacao, samplerate)

    print(f"Áudio gravado e salvo em: {path}")



In [68]:
# Exemplo de uso
caminho_arquivo = "../Dataset_teste/teste.wav"
duracao_gravacao = 10  # Duração em segundos

gravar_audio(caminho_arquivo, duracao_gravacao)


Gravando áudio por 10 segundos...
Áudio gravado e salvo em: ../Dataset_teste/teste.wav


In [69]:
audio_integrator()
audio_extractor()

Processing Dataset_teste dataset


Walking through directories: 1it [00:00, 137.00it/s]
Annotating audio files: 100%|██████████| 1/1 [00:00<00:00, 1004.38it/s]


Dataset_teste dataset processed and annotated
Processing Dataset_teste dataset


Extracting features: 100%|██████████| 1/1 [00:00<00:00, 35.67it/s]

Dataset_teste dataset processed and annotated





In [70]:
import h5py

file_path = "h5_files/Dataset_teste_feature.h5"
with h5py.File(file_path, "r") as hf:
    inputs = hf["features"]['0'][()]
    inputs = torch.tensor(inputs, dtype=torch.float).reshape(1,1,251,29)
    print(inputs.shape)
    

torch.Size([1, 1, 251, 29])


In [71]:
import torch.nn as nn
import torch.nn.functional as F

annotation_dict = {
    0: "Nada",
    1: "Grito",
    2: "Violência Física",
    3: "Tiro",
    4: "Explosão",
    5: "Grito de Esforço",
}

num_classes = 4

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = models.resnet101()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)  # ajustar a última camada para 6 classes
model.conv1 = nn.Conv2d(
    1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
)  # ajustar a primeira camada para 1 canal
model = model.to(device)
model.load_state_dict(torch.load("models/model_resnet101_5_Folds.pth"))

# Avaliando o modelo neste fold
model.eval()

with torch.no_grad():
    inputs = inputs.to(device)

    outputs = model(inputs)
    probabilities = F.softmax(outputs, dim=1)
    _, preds = torch.max(probabilities, 1)

    print(probabilities)
    preds = preds.cpu().numpy()[0]
    if preds == 0:
        print('Nada')
    else:
        print('Violência!')
    print(annotation_dict[preds])


cuda
tensor([[0.5838, 0.0201, 0.3907, 0.0055]], device='cuda:0')
Nada
Nada
