In [9]:
###Wav 5sec로 나누기###
import os
import wave
import numpy as np

# Define the directory containing the wav files
input_dir = 'data_val/노이즈'
output_dir = 'output_wav_segments'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Function to split wav files
def split_wav(file_path, segment_duration=5):
    with wave.open(file_path, 'rb') as wav_file:
        params = wav_file.getparams()
        framerate = wav_file.getframerate()
        num_frames = wav_file.getnframes()
        total_duration = num_frames / float(framerate)
        
        # Read the audio frames
        audio_frames = wav_file.readframes(num_frames)
        audio_data = np.frombuffer(audio_frames, dtype=np.int16)
        
        # Calculate number of samples per segment
        samples_per_segment = framerate * segment_duration * params.nchannels
        
        # Split the audio into segments
        file_name = os.path.basename(file_path).replace('.wav', '')
        num_segments = int(total_duration // segment_duration)
        
        for i in range(num_segments):
            start_sample = i * samples_per_segment
            end_sample = start_sample + samples_per_segment
            
            segment_data = audio_data[start_sample:end_sample]
            segment_file_name = f"{file_name}_segment_{i*segment_duration}s_to_{(i+1)*segment_duration}s.wav"
            
            # Save each segment as a new wav file
            with wave.open(os.path.join(output_dir, segment_file_name), 'wb') as segment_wav:
                segment_wav.setparams(params)
                segment_wav.writeframes(segment_data.tobytes())
                
            print(f"Exported {segment_file_name}")

# Iterate through all wav files in the directory
for file_name in os.listdir(input_dir):
    if file_name.endswith('.wav'):
        file_path = os.path.join(input_dir, file_name)
        split_wav(file_path)

print("WAV file splitting complete.")

WAV file splitting complete.


In [3]:
###wav합기기###
import os
import wave
import numpy as np
from scipy.io import wavfile
from scipy.signal import resample

# Function to convert incompatible files to match the first file's parameters
def convert_wav_file(file_path, target_params):
    with wave.open(file_path, 'rb') as wav_file:
        original_params = wav_file.getparams()
        framerate = wav_file.getframerate()
        num_channels = wav_file.getnchannels()
        num_frames = wav_file.getnframes()
        sample_width = wav_file.getsampwidth()

        # Read and reshape the audio frames
        audio_data = np.frombuffer(wav_file.readframes(num_frames), dtype=np.int16)

        # Resample the audio if the sample rates don't match
        if framerate != target_params.framerate:
            audio_data = resample(audio_data, int(len(audio_data) * target_params.framerate / framerate))

        # Reshape to match the number of channels
        if num_channels != target_params.nchannels:
            audio_data = np.tile(audio_data, (target_params.nchannels, 1)).T

        # Convert sample width if necessary
        if sample_width != target_params.sampwidth:
            audio_data = (audio_data * (2 ** (8 * target_params.sampwidth - 1))).astype(np.int16)
        
        return audio_data

# Define the directory containing the wav files
input_dir = 'data_val/여자목소리'
output_file = 'concatenated_output.wav'

# Function to concatenate wav files
def concatenate_wav_files(input_dir, output_file):
    wav_files = [f for f in os.listdir(input_dir) if f.endswith('.wav')]
    wav_files.sort()  # Sort files to concatenate in order

    if not wav_files:
        print("No WAV files found in the directory.")
        return

    # Open the first file to get parameters
    first_wav_path = os.path.join(input_dir, wav_files[0])
    with wave.open(first_wav_path, 'rb') as first_wav:
        params = first_wav.getparams()
        framerate = first_wav.getframerate()
        
        # Create an array to hold the combined audio data
        combined_audio_data = np.frombuffer(first_wav.readframes(first_wav.getnframes()), dtype=np.int16)

    # Loop through remaining wav files and concatenate them
    for wav_file in wav_files[1:]:
        file_path = os.path.join(input_dir, wav_file)
        with wave.open(file_path, 'rb') as wav_f:
            if wav_f.getparams() != params:
                print(f"Converting {wav_file} to match parameters.")
                audio_data = convert_wav_file(file_path, params)
            else:
                audio_data = np.frombuffer(wav_f.readframes(wav_f.getnframes()), dtype=np.int16)
            
            combined_audio_data = np.concatenate((combined_audio_data, audio_data))

    # Write the concatenated audio data to a new wav file
    with wave.open(output_file, 'wb') as output_wav:
        output_wav.setparams(params)
        output_wav.writeframes(combined_audio_data.tobytes())
    
    print(f"Concatenation complete. Output saved to {output_file}")

# Concatenate all wav files in the directory
concatenate_wav_files(input_dir, output_file)


Converting 1_0164.wav to match parameters.
Converting 1_0165.wav to match parameters.
Converting 1_0166.wav to match parameters.
Converting 1_0167.wav to match parameters.
Converting 1_0168.wav to match parameters.
Converting 1_0169.wav to match parameters.
Converting 1_0170.wav to match parameters.
Converting 1_0171.wav to match parameters.
Converting 1_0172.wav to match parameters.
Converting 1_0173.wav to match parameters.
Converting 1_0174.wav to match parameters.
Converting 1_0175.wav to match parameters.
Converting 1_0176.wav to match parameters.
Converting 1_0177.wav to match parameters.
Converting 1_0178.wav to match parameters.
Converting 1_0179.wav to match parameters.
Converting 1_0180.wav to match parameters.
Converting 1_0181.wav to match parameters.
Converting 1_0182.wav to match parameters.
Converting 1_0183.wav to match parameters.
Converting 1_0184.wav to match parameters.
Converting 1_0185.wav to match parameters.
Converting 1_0186.wav to match parameters.
Converting 

In [4]:
import sounddevice as sd
from scipy.io.wavfile import write
import numpy as np

def record_audio(filename, duration=5, fs=16000):
    """
    오디오를 녹음하여 .wav 파일로 저장하는 함수
    Args:
        filename: 저장할 파일명 (경로 포함)
        duration: 녹음 시간 (초)
        fs: 샘플 레이트 (기본값 16kHz)
    """
    print("Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
    sd.wait()  # 녹음 종료를 기다림
    write(filename, fs, recording)  # 녹음된 파일을 저장
    print(f"Recording saved as {filename}")

In [12]:
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

# 모델 로드 (compile=False로 옵티마이저 정보 무시)
best_model = load_model('best_model.h5', compile=False)

# 모델을 다시 컴파일
best_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# 테스트 데이터 평가
test_loss, test_accuracy = best_model.evaluate(x_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

NameError: name 'x_test' is not defined

In [8]:
import sounddevice as sd
from scipy.io.wavfile import write
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras.models import load_model
import numpy as np

# 1. 오디오 녹음 및 저장
def record_audio(filename, duration=5, fs=16000):
    """
    오디오를 녹음하여 PCM 형식의 .wav 파일로 저장하는 함수
    Args:
        filename: 저장할 파일명 (경로 포함)
        duration: 녹음 시간 (초)
        fs: 샘플 레이트 (기본값 16kHz)
    """
    print("Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')  # PCM 형식으로 녹음 (int16)
    sd.wait()  # 녹음 종료를 기다림
    write(filename, fs, recording)  # PCM 형식으로 녹음된 파일을 저장
    print(f"Recording saved as {filename}")

# 2. 오디오 파일 로드 및 전처리
def load_wav_16k_mono(filename):
    """
    16kHz로 리샘플링된 mono 오디오 파일을 불러오는 함수
    Args:
        filename: 불러올 .wav 파일 경로
    Returns:
        리샘플링된 오디오 데이터 (Tensor)
    """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

def preprocess_wav(wav):
    """
    오디오 데이터를 스펙트로그램으로 변환하는 함수
    Args:
        wav: 오디오 Tensor
    Returns:
        스펙트로그램 Tensor
    """
    wav = wav[:80000]  # 길이를 5초(16kHz 샘플링일 경우 80,000 샘플)로 맞춤
    zero_padding = tf.zeros([80000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav], 0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)  # (시간, 주파수, 1) 형태로 확장
    spectrogram = tf.image.resize(spectrogram, [500, 161])  # 모델 입력 크기에 맞춰 리사이즈
    spectrogram = tf.expand_dims(spectrogram, axis=0)  # 배치 차원 추가
    return spectrogram

# 3. 모델을 불러와서 예측
def predict_audio(filename, model_path='best_model.h5'):
    """
    오디오 파일을 불러와서 저장된 모델로 pos/neg를 예측하는 함수
    Args:
        filename: 예측할 .wav 파일 경로
        model_path: 불러올 모델 경로
    """
    # 오디오 파일 로드 및 전처리
    wav = load_wav_16k_mono(filename)
    spectrogram = preprocess_wav(wav)
    
    # 저장된 모델 불러오기
    model = load_model(model_path)
    
    # 예측
    prediction = model.predict(spectrogram)
    
    # 결과 해석
    if prediction >= 0.5:
        print(f"{filename}: Positive (Crying sound detected)")
    else:
        print(f"{filename}: Negative (Noise detected)")

# 4. 전체 과정 실행
if __name__ == "__main__":
    # 5초 동안 오디오 녹음
    recorded_filename = 'recorded_audio.wav'
    record_audio(recorded_filename, duration=5)  # PCM 형식으로 바로 저장
    
    # 모델을 사용해 예측
    predict_audio(recorded_filename, model_path='best_model.h5')

ValueError: Unknown optimizer: Custom>Adam. Please ensure this object is passed to the `custom_objects` argument. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.