# 선언

In [1]:
import torch

device = torch.device('mps:0' if torch.backends.mps.is_available() else 'cpu')

In [2]:
print (f"PyTorch version:{torch.__version__}")
print(f"MPS 장치를 지원하도록 build 되었는지: {torch.backends.mps.is_built()}")
print(f"MPS 장치가 사용 가능한지: {torch.backends.mps.is_available()}")
!python -c 'import platform;print(platform.platform())'

PyTorch version:2.4.0
MPS 장치를 지원하도록 build 되었는지: True
MPS 장치가 사용 가능한지: True
macOS-14.6.1-arm64-arm-64bit


In [3]:
import os

import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np

from scipy.io import wavfile
import soundfile as sf

from sklearn.preprocessing import MinMaxScaler

# 데이터 불러오기

In [None]:
import random

folder_path = '/Users/junggwonhee/Desktop/programing/오아시스_해커톤/project/data/극한_소리_데이터/Training/Sound'
file_list = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.wav')]

random.shuffle(file_list)

half_length = len(file_list) // 100
file_list = file_list[:half_length]

# 데이터 전처리

In [5]:
# def find_loudest_segment(data, sample_rate, segment_duration=5):
#     segment_length = int(sample_rate * segment_duration)
#     max_amplitude = 0
#     loudest_segment = None
    
#     for i in range(0, len(data) - segment_length, sample_rate):
#         segment = data[i:i + segment_length]
#         amplitude = np.sum(segment ** 2)
#         if amplitude > max_amplitude:
#             max_amplitude = amplitude
#             loudest_segment = segment
    
#     return loudest_segment

# all_segments = []

# for file_path in file_list:
#     data, sample_rate = sf.read(file_path)
#     loudest_segment = find_loudest_segment(data, sample_rate)
#     if loudest_segment is not None:
#         all_segments.append(loudest_segment)

# scaler = MinMaxScaler(feature_range=(-1, 1))
# all_segments = [scaler.fit_transform(segment.reshape(-1, 1)).flatten() for segment in all_segments]
# all_data = np.concatenate(all_segments)
def find_loudest_segment(data, sample_rate, segment_duration=5):
    segment_length = int(sample_rate * segment_duration)
    max_amplitude = 0
    loudest_segment = None

    for i in range(0, len(data) - segment_length, sample_rate):
        segment = data[i:i + segment_length]
        amplitude = np.sum(segment ** 2)
        if amplitude > max_amplitude:
            max_amplitude = amplitude
            loudest_segment = segment

    return loudest_segment

all_segments = []

for file_path in file_list:
    data, sample_rate = sf.read(file_path)
    if data.ndim > 1:  # 다차원 배열인 경우 (스테레오)
        data = data.mean(axis=1)  # 스테레오 데이터를 모노로 변환
    loudest_segment = find_loudest_segment(data, sample_rate)
    if loudest_segment is not None:
        all_segments.append(loudest_segment)

scaler = MinMaxScaler(feature_range=(-1, 1))
all_segments = [scaler.fit_transform(segment.reshape(-1, 1)).flatten() for segment in all_segments if segment.size > 0]

min_length = min(len(segment) for segment in all_segments)  # 가장 짧은 세그먼트 길이 찾기
all_segments = [segment[:min_length] for segment in all_segments]  # 모든 세그먼트를 동일한 길이로 자르기

all_data = np.concatenate(all_segments)


# 하이퍼파라미터 선언

In [6]:
sequence_length = 100 
input_size = 1        
hidden_size = 64      
output_size = 1       
num_layers = 2       
learning_rate = 0.001 
num_epochs = 10        
batch_size = 32   

# 시계열 데이터를 슬라이딩 윈도우 기법으로 변환

In [7]:
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:i + seq_length]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

X, y = create_sequences(all_data, sequence_length)
X = torch.tensor(X, dtype=torch.float32).view(-1, sequence_length, input_size)
y = torch.tensor(y, dtype=torch.float32).view(-1, output_size)

: 

# 데이터셋 및 데이터로더 정의

In [None]:
dataset = torch.utils.data.TensorDataset(X, y)
dataloader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, shuffle = True)

# LSTM 모델 정의

In [None]:
class LSTMNoiseCancelling(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMNoiseCancelling, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        h_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        c_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        lstm_out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(lstm_out[:, -1, :])
        return out

# 모델 초기화, 손실 함수 및 옵티마이저 정의

In [None]:
model = LSTMNoiseCancelling(input_size, hidden_size, output_size, num_layers)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(num_epochs):
    for i, (inputs, targets) in enumerate(dataloader):
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# 테스트 데이터 추출 및 예측

In [None]:
test_folder_path = '/Users/junggwonhee/Desktop/programing/오아시스_해커톤/project/data/극한_소리_데이터/Validation/sound'
test_file_list = [os.path.join(folder_path, f) for f in os.listdir(test_folder_path) if f.endswith('.wav')]

for test_file_path in test_file_list:
    sample_rate, data = wavfile.read(test_file_path)
    test_data = MinMaxScaler(feature_range=(-1, 1)).fit_transform(test_data.reshape(-1, 1)).flatten()
    X_test, _ = create_sequences(test_data, sequence_length)
    X_test = torch.tensor(X_test, dtype=torch.float32).view(-1, sequence_length, input_size)
    
    # 모델 예측
    with torch.no_grad():
        predicted = model(X_test)
        anti_phase = -predicted  # 역위상 생성

    print(f"File: {test_file_path}")
    print("Predicted:", predicted)
    print("Anti-phase:", anti_phase)