In [76]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import Dataset, DataLoader
import os

In [77]:
def collate_fn(batch):
    waveforms, sample_rates = zip(*batch)
    lengths = [waveform.size(1) for waveform in waveforms]
    max_length = max(lengths)

    padded_waveforms = [torch.nn.functional.pad(waveform, (0, max_length - waveform.size(1))) for waveform in waveforms]

    return torch.stack(padded_waveforms), sample_rates, lengths

In [79]:
class AudioDataset(Dataset):
    def __init__(self, data_folder):
        self.data_folder = data_folder
        self.file_list = os.listdir(data_folder)

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_path = os.path.join(self.data_folder, self.file_list[idx])
        waveform, sample_rate = torchaudio.load(file_path)
        return waveform, sample_rate

In [90]:
class UNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UNet, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
        )
        self.bottleneck = nn.Sequential(
            nn.Conv1d(256, 512, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose1d(512, 256, kernel_size=3, stride=1, padding=1),  # Adjusted
            nn.ReLU(inplace=True),
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(256, 128, kernel_size=3, stride=1, padding=1),  # Adjusted
            nn.ReLU(inplace=True),
            nn.ConvTranspose1d(128, 64, kernel_size=3, stride=1, padding=1),  # Adjusted
            nn.ReLU(inplace=True),
            nn.ConvTranspose1d(64, out_channels, kernel_size=3, stride=1, padding=1, output_padding=0),  # Adjusted
            nn.Tanh()  
        )

    def forward(self, x, lengths):
        x1 = self.encoder(x)
        x2 = self.bottleneck(x1)
        lengths = [length // 2 for length in lengths]

        x3 = self.decoder(x2)
        return x3



In [91]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [92]:
input_channels = 1  
output_channels = 1  
hidden_channels = 64 

In [93]:
model = UNet(input_channels, output_channels).to(device)

In [94]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [95]:
data_folder = r"C:\Users\dharu\OneDrive\Desktop\NUSProject\classes\airplane"
dataset = AudioDataset(data_folder)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=collate_fn)

In [98]:
num_epochs = 10 

for epoch in range(num_epochs):
    for batch in dataloader:
        input_data, sample_rates, lengths = batch
        input_data = input_data.to(device)

        print(f'Input dimensions: {input_data.size()}')

        output = model(input_data, lengths)
        print(f'Output dimensions: {output.size()}')
        



Input dimensions: torch.Size([32, 1, 220500])
Output dimensions: torch.Size([32, 1, 13782])
Input dimensions: torch.Size([8, 1, 220500])
Output dimensions: torch.Size([8, 1, 13782])
Input dimensions: torch.Size([32, 1, 220500])
Output dimensions: torch.Size([32, 1, 13782])
Input dimensions: torch.Size([8, 1, 220500])
Output dimensions: torch.Size([8, 1, 13782])
Input dimensions: torch.Size([32, 1, 220500])
Output dimensions: torch.Size([32, 1, 13782])
Input dimensions: torch.Size([8, 1, 220500])
Output dimensions: torch.Size([8, 1, 13782])
Input dimensions: torch.Size([32, 1, 220500])
Output dimensions: torch.Size([32, 1, 13782])
Input dimensions: torch.Size([8, 1, 220500])
Output dimensions: torch.Size([8, 1, 13782])
Input dimensions: torch.Size([32, 1, 220500])
Output dimensions: torch.Size([32, 1, 13782])
Input dimensions: torch.Size([8, 1, 220500])
Output dimensions: torch.Size([8, 1, 13782])
Input dimensions: torch.Size([32, 1, 220500])
Output dimensions: torch.Size([32, 1, 13782]

In [97]:
model.eval

<bound method Module.eval of UNet(
  (encoder): Sequential(
    (0): Conv1d(1, 64, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): ReLU(inplace=True)
    (2): Conv1d(64, 128, kernel_size=(3,), stride=(2,), padding=(1,))
    (3): ReLU(inplace=True)
    (4): Conv1d(128, 256, kernel_size=(3,), stride=(2,), padding=(1,))
    (5): ReLU(inplace=True)
  )
  (bottleneck): Sequential(
    (0): Conv1d(256, 512, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): ReLU(inplace=True)
    (2): ConvTranspose1d(512, 256, kernel_size=(3,), stride=(1,), padding=(1,))
    (3): ReLU(inplace=True)
  )
  (decoder): Sequential(
    (0): ConvTranspose1d(256, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): ReLU(inplace=True)
    (2): ConvTranspose1d(128, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (3): ReLU(inplace=True)
    (4): ConvTranspose1d(64, 1, kernel_size=(3,), stride=(1,), padding=(1,))
    (5): Tanh()
  )
)>