In [1]:
!pip install python_speech_features
!pip install pydub

Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: python_speech_features
  Building wheel for python_speech_features (setup.py) ... [?25l[?25hdone
  Created wheel for python_speech_features: filename=python_speech_features-0.6-py3-none-any.whl size=5870 sha256=9e7580df21a64ced32e05590111d37f05e9699409a6657492758f9f71b3230bc
  Stored in directory: /root/.cache/pip/wheels/5a/9e/68/30bad9462b3926c29e315df16b562216d12bdc215f4d240294
Successfully built python_speech_features
Installing collected packages: python_speech_features
Successfully installed python_speech_features-0.6
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [2]:
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import os

def set_seed(seed_value=42):
    """모든 모듈의 난수 생성기 시드를 고정합니다."""
    random.seed(seed_value)  # 파이썬 내장 random 모듈
    np.random.seed(seed_value)  # NumPy
    torch.manual_seed(seed_value)  # CPU를 위한 PyTorch 함수
    torch.cuda.manual_seed(seed_value)  # GPU를 위한 PyTorch 함수
    torch.cuda.manual_seed_all(seed_value)  # 멀티 GPU를 위한 PyTorch 함수
    torch.backends.cudnn.deterministic = True  # cudnn 알고리즘의 동작을 결정적으로 만듦
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed_value)

set_seed(42)  # 원하는 시드 값으로 호출

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import read
from IPython.lib.display import Audio
from scipy.fftpack import fft, ifft
from scipy.io import loadmat
import scipy.signal as sgnl
import scipy.io.wavfile as wav
import sys
import wave
import operator
import scipy
from python_speech_features import mfcc

In [4]:
import torch
import torch.nn as nn
# CUDA 사용 가능 여부 확인 및 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [5]:
from google.colab import drive
drive.mount('/content/drive')
# 현재 디렉토리 설정
%cd /content/drive/My Drive/birdcall

Mounted at /content/drive
/content/drive/My Drive/birdcall


In [8]:
import numpy as np
import torch
import torch.nn as nn
from scipy.io import wavfile
from python_speech_features import mfcc
import os
import numpy as np
from scipy.io import wavfile, loadmat
from scipy.fft import fft, fftfreq, ifftshift
from scipy.fftpack import fftshift
from python_speech_features import mfcc
import torch
from torch.utils.data import TensorDataset, DataLoader

# Label to index mapping dictionary as provided
labels_index = {
    'Velvet_Scoter_Melanitta_fusca': 0,
    'Long_tailed_Duck_Clangula_hyemalis': 1,
    'Leach\'s_Storm_Petrel_Hydrobates_leucorhous': 2,
    'Horned_Grebe_Podiceps_auritus': 3,
    'Great_Bustard_Otis_tarda': 4,
    'European_Turtle_Dove_Streptopelia_turtur': 5,
    'Black_legged_Kittiwake_Rissa_tridactyla': 6,
    'Balearic_Shearwater_Puffinus_mauretanicus': 7,
    'Atlantic_Puffin_Fratercula_arctica': 8,
    'Aquatic_Warbler_Acrocephalus_paludicola': 9
}

# Invert the dictionary to map indices to labels
index_to_labels = {v: k for k, v in labels_index.items()}

# 모델 클래스 정의
class LSTMSoundClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers=3):
        super(LSTMSoundClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, hidden_size * 2)
        self.fc2 = nn.Linear(hidden_size * 2, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out[:, -1, :])
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

def apply_fourier_transform(input_audio, Fs):
    N = len(input_audio)
    f_transform = fftshift(fft(input_audio, N))
    frequencies = np.linspace(-Fs/2, Fs/2, N)
    return f_transform, frequencies

import scipy.signal as signal


def apply_lowpass_filter(input_audio, b, a):
    filtered_audio = signal.lfilter(b, a, input_audio)
    return filtered_audio

def extract_mfcc_features(filtered_audio, Fs):
    mfcc_features = mfcc(filtered_audio, Fs)
    return mfcc_features

# 사전 처리 함수 정의
def preprocess_audio(file_path, b, a):
    # WAV 파일 로드
    Fs, input_audio = wavfile.read(file_path)

    # 저주파 필터링 적용
    filtered_audio = apply_lowpass_filter(input_audio, b, a)

    # MFCC 추출
    mfcc_features = mfcc(filtered_audio, samplerate=Fs, numcep=13)

    return mfcc_features


# 사용할 모델의 하이퍼파라미터 정의
num_classes = 10  # 예시로 10개의 클래스가 있다고 가정합니다. 실제 클래스 수로 변경해주세요.
input_size = 13  # MFCC 특성의 크기 (numcep의 값과 일치해야 함)
hidden_size = 512
num_layers = 3

# 모델 인스턴스 생성 및 학습된 가중치 불러오기
model = LSTMSoundClassifier(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes, num_layers=num_layers)
model.load_state_dict(torch.load('./3.23.pth'))  # 모델 경로를 'model_path.pth'에 맞게 수정하세요.
model.eval()

# 필터 계수 불러오기 - 'highpass(500).mat' 파일에 해당하는 경로를 설정하세요.
filter_data = loadmat('./low_filter/highpass(500).mat')  # 필터 경로를 'filter_path.mat'에 맞게 수정하세요.
Coeffs = filter_data['ba'].astype(np.float64)
b = Coeffs[0, :]
a = 1

# 추론할 오디오 파일 경로 설정
test_file_path = './test/Black_Legged_Kittiwake_2_TEST.wav'  # 테스트 오디오 파일 경로를 설정하세요.

# 오디오 파일 사전 처리
test_feature = preprocess_audio(test_file_path, b, a)
test_feature_tensor = torch.tensor([test_feature], dtype=torch.float)


# 추론 실행
with torch.no_grad():
    output = model(test_feature_tensor)
    probabilities = torch.softmax(output, dim=1)
    predicted_index = probabilities.argmax(dim=1).item()
    predicted_prob = probabilities.max(dim=1).values.item()

    # Use the index to get the associated label from our dictionary
    predicted_label = index_to_labels[predicted_index]

    print(f"Predicted label: {predicted_label}, Probability: {predicted_prob}")

Predicted label: Black_legged_Kittiwake_Rissa_tridactyla, Probability: 0.8979588150978088
