In [1]:
from pathlib import Path
from collections import Counter
from torch.utils.data import Dataset, DataLoader
import torchaudio
import torch
import torch.optim as optim
from torch import nn
from torch.functional import F
import math
import re

In [2]:
DIR_DATA = Path("/Users/dmatekenya/LEARNING-LOCAL/coursera-pytorch/data/ESC-50-master/audio/")
aud_files = [i for i in DIR_DATA.iterdir() if i.suffix == ".wav"]

In [3]:
class ESC50(Dataset):

    def __init__(self,path):
        # Get directory listing from path
        files = list(path.iterdir())
        # Iterate through the listing and create a list of tuples (filename, label)
        self.items = [(f, int(re.findall(r'\d+', f.parts[-1].split("-")[-1])[0])) for f in files]
        self.length = len(self.items)

    def __getitem__(self, index):
        filename, label = self.items[index]
        audio_tensor, sample_rate = torchaudio.load(filename)
        return audio_tensor, label

    def __len__(self):
        return self.length

In [4]:
test_esc50 = ESC50(DIR_DATA.joinpath("train"))
tensor, label = list(test_esc50)[0]

In [5]:
bs = 64
train_esc50 = ESC50(DIR_DATA.joinpath("train"))
valid_esc50 = ESC50(DIR_DATA.joinpath("valid"))
test_esc50  = ESC50(DIR_DATA.joinpath("test"))

train_loader = torch.utils.data.DataLoader(train_esc50, batch_size = bs,
                shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid_esc50, batch_size = bs,
                shuffle = True)
test_loader  = torch.utils.data.DataLoader(test_esc50, batch_size = bs,
                shuffle = True)

In [6]:
class AudioNet(nn.Module):
    def __init__(self):
        super(AudioNet, self).__init__()
        self.conv1 = nn.Conv1d(1, 128, 80, 4)
        self.bn1 = nn.BatchNorm1d(128)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(128, 128, 3)
        self.bn2 = nn.BatchNorm1d(128)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(128, 256, 3)
        self.bn3 = nn.BatchNorm1d(256)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(256, 512, 3)
        self.bn4 = nn.BatchNorm1d(512)
        self.pool4 = nn.MaxPool1d(4)
        self.avgPool = nn.AvgPool1d(30)
        self.fc1 = nn.Linear(512, 10)
        #self.bn_fc1 = nn.BatchNorm1d(10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.pool4(x)
        x = self.avgPool(x)
        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        #x = self.bn_fc1(x)
        return x

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
audio_net = AudioNet()
audio_net.to(device)

AudioNet(
  (conv1): Conv1d(1, 128, kernel_size=(80,), stride=(4,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(128, 128, kernel_size=(3,), stride=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,))
  (bn3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv1d(256, 512, kernel_size=(3,), stride=(1,))
  (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool4): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (avgPool): AvgPool1d(kernel_size=(30,), stride=(3

In [8]:
def find_lr(model, loss_fn, optimizer, init_value=1e-8, final_value=10.0):
    number_in_epoch = len(train_loader) - 1
    update_step = (final_value / init_value) ** (1 / number_in_epoch)
    lr = init_value
    optimizer.param_groups[0]["lr"] = lr
    best_loss = 0.0
    batch_num = 0
    losses = []
    log_lrs = []
    for x, y in train_loader:
        batch_num += 1
        # inputs, labels = data
        # inputs, labels = inputs, labels
        optimizer.zero_grad()
        outputs = model(x)
        print(outputs.shape, "====>", y.shape)
        loss = loss_fn(outputs, y)

        # Crash out if loss explodes

        if batch_num > 1 and loss > 4 * best_loss:
            return log_lrs[10:-5], losses[10:-5]

        # Record the best loss

        if loss < best_loss or batch_num == 1:
            best_loss = loss

        # Store the values

        losses.append(loss)
        log_lrs.append(math.log10(lr))

        # Do the backward pass and optimize

        loss.backward()
        optimizer.step()

        # Update the lr for the next step and store

        lr *= update_step
        optimizer.param_groups[0]["lr"] = lr
    return log_lrs[10:-5], losses[10:-5]

In [9]:
torch.save(audio_net.state_dict(), "audionet.pth")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(audio_net.parameters(), lr=0.001)
logs,losses = find_lr(audio_net, criterion, optimizer)
plt.plot(logs,losses)

torch.Size([64, 7, 10]) ====> torch.Size([64])


RuntimeError: Expected target size [64, 10], got [64]

In [28]:
xx = torch.Tensor([[ 0.1578,  0.0440, -0.1067,  0.2767,  0.3650, -0.2182,  0.3496,  0.2335,
          0.0498,  0.1919],
        [-0.0070,  0.0271, -0.0268,  0.2868,  0.3516, -0.1395,  0.3821,  0.3145,
          0.1229,  0.1458],
        [-0.3674, -0.0540,  0.1793,  0.2377,  0.3492, -0.1011,  0.3829,  0.4043,
          0.3496,  0.1117],
        [-0.2983, -0.0893,  0.2048,  0.1758,  0.3875, -0.1026,  0.4330,  0.3774,
          0.3390,  0.1602],
        [ 0.0198,  0.0008,  0.0238,  0.2398,  0.3958, -0.2059,  0.3762,  0.2661,
          0.1393,  0.1569],
        [ 0.1468,  0.0350, -0.0970,  0.2642,  0.3462, -0.2147,  0.3657,  0.2377,
          0.0698,  0.1592],
        [ 0.1005,  0.0045, -0.0455,  0.2572,  0.3750, -0.2092,  0.4024,  0.2566,
          0.1059,  0.1475]])

In [29]:
xx.dim()

2

In [90]:
x = torch.Tensor([12,  0, 49,  8, 14, 43, 41,  5, 34, 35, 16, 29, 13, 10, 33,  9, 45,  9,
        23, 42, 40, 10, 39,  0, 49, 10,  7,  6, 16, 35, 16, 36,  3, 34,  7,  4,
        25, 30, 36, 15, 40, 24, 17, 12, 21, 49,  9, 33, 12,  0, 26, 41, 38, 15,
        14, 30, 48, 22, 17, 29, 28, 36, 19, 40])

In [98]:
xx = x.unsqueeze(1)
x.dim()

1

In [None]:
lr = 1e-5
model.load("audionet.pth")
import torch.optim as optim
optimizer = optim.Adam(audionet.parameters(), lr=lr)”

Excerpt From
Programming PyTorch for Deep Learning
Ian Pointer
This material may be protected by copyright.