In [4]:
import torch
import torchaudio
import numpy as np
import matplotlib.pyplot as plt

# 입력 .wav 파일 읽기
filename = "input.wav"
waveform, sample_rate = torchaudio.load(filename)

# 파라미터 설정
L = 80  # 프레임 이동 크기
N = 128  # FFT 크기
P = 11  # 이동 평균 필터 길이

# 이동 평균 필터 정의
h = np.ones(P)

# 윈도우 생성
hann_window = np.hanning(48)
window = np.concatenate([hann_window[:24], np.ones(80), hann_window[24:]])

# 주파수 영역에서 필터 준비
H = np.fft.fft(h, N)

# 출력 신호 초기화
output = np.zeros(waveform.shape[1] + N - L)

# 오버랩-애드 방식
prev_frame = np.zeros(24)
for i in range(0, waveform.shape[1], L):
    # 프레임 준비
    current_frame = waveform[0, i:i+L]
    if len(current_frame) < L:
        current_frame = np.pad(current_frame, (0, L - len(current_frame)))

    frame = np.concatenate([prev_frame, current_frame, np.zeros(24)])

    # 윈도우 적용
    windowed_frame = frame * window

    # FFT
    X = np.fft.fft(windowed_frame, N)

    # 주파수 영역에서 필터와 곱하기
    Y = X * H

    # IFFT
    y = np.fft.ifft(Y).real

    # 오버랩-애드 방식으로 출력 신호에 추가
    if i + N <= output.shape[0]:
        output[i:i+N] += y[:N]
    else:
        output[i:] += y[:output.shape[0]-i]

    # 이전 프레임 업데이트
    prev_frame = current_frame[-24:]

# 출력 신호 정규화
output = output / np.max(np.abs(output))

# 출력 신호 저장
output_tensor = torch.from_numpy(output).unsqueeze(0)
torchaudio.save("output3.wav", output_tensor, sample_rate)


In [None]:
import torch
import torchaudio
import torch.nn.functional as F
import numpy as np

# Load the input .wav file
waveform, sample_rate = torchaudio.load('input.wav')
waveform = waveform[0]  

# LPF filter coefficients (moving average filter)
LPF = torch.ones(11) / 11.0

# Parameters
L = 80           # Frame shift
N = 128          # Analysis window length
overlap = N - L  # Overlap length, 48
window_length = 128
window_overlap = 24
window = torch.hann_window(window_length, periodic=True)

# Padding zeros to the waveform for handling the last frame
padding = torch.zeros(N - overlap)
waveform = torch.cat((waveform, padding))

# Initialize output signal
output_signal = torch.zeros_like(waveform)

# Process each frame using overlap-add method
for i in range(0, waveform.size(0) - N, L):
    frame = waveform[i:i + N]
    hann_window = torch.cat((window[window_overlap:], torch.ones(N - window_length), window[:window_overlap]))
    windowed_frame = frame * hann_window
    frame_fft = torch.fft.fft(windowed_frame)
    
    if i == 99 * L:
        ms = torch.abs(frame_fft)

    lpf_fft = torch.fft.fft(LPF.to(frame.device), N)
    filtered_frame_fft = frame_fft * lpf_fft
    filtered_frame = torch.real(torch.fft.ifft(filtered_frame_fft))
    output_signal[i:i + N] += filtered_frame

# Remove padding from output signal
output_signal = output_signal[:len(output_signal) - (N - overlap)]
# Save the output as .wav file
torchaudio.save('output3.wav', output_signal.unsqueeze(0), sample_rate)