In [5]:
class Dataset
    def transform(self, fpath):
        """Returns normalized log(melspectrogram) and log(magnitude) from `sound_file`.
        Args:
        sound_file: A string. The full path of a sound file.
        Returns:
        mel: A 2d array of shape (T, n_mels) <- Transposed
        mag: A 2d array of shape (T, 1+n_fft/2) <- Transposed
        """

        # Loading sound file
        y, sr = librosa.load(fpath, sr=self.sr)

        hop_length = int(sr * self.frame_shift)
        win_length = int(sr * self.frame_length)
        # Trimming
        if self.trim:
            y, _ = librosa.effects.trim(y)

        # Preemphasis
        y = np.append(y[0], y[1:] - self.preemphasis * y[:-1])

        # stft
        linear = librosa.stft(y=y,
                            n_fft=self.n_fft,
                            hop_length=hop_length,
                            win_length=win_length)

        # magnitude spectrogram
        mag = np.abs(linear)  # (1+n_fft//2, T)

        # mel spectrogram
        mel_basis = librosa.filters.mel(self.sr, self.n_fft, self.n_mels)  # (n_mels, 1+n_fft//2)
        mel = np.dot(mel_basis, mag)  # (n_mels, t)

        # to decibel
        mel = 20 * np.log10(np.maximum(1e-5, mel))
        mag = 20 * np.log10(np.maximum(1e-5, mag))

        # normalize
        mel = np.clip((mel - self.ref_db + self.max_db) / self.max_db, 1e-8, 1)# ref_db=20, max_db=100
        mag = np.clip((mag - self.ref_db + self.max_db) / self.max_db, 1e-8, 1)

        # Transpose
        mel = mel.T.astype(np.float32)  # (T, n_mels)
        mag = mag.T.astype(np.float32)  # (T, 1+n_fft//2)

        return mel, mag

True
False


In [24]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

In [60]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.data = torch.FloatTensor(x)
        self.lable = torch.LongTensor(y)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return (self.data[idx], self.lable[idx])

X_train = np.random.randn(10000, 20)
Y_train = np.zeros(10000,)#((np.sum(X_train[:, :10], axis=1))>0).astype(int)
X_dev = np.random.randn(1000, 20)
Y_dev = np.zeros(1000,)#((np.sum(X_dev, axis=1))>0).astype(int)
print(Y_train[:10])
print(Y_dev[:10])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [61]:
dataset_train = MyDataset(X_train, Y_train)
dataset_dev = MyDataset(X_dev, Y_dev)
dataloader_train = DataLoader(dataset_train, batch_size=20,
                        shuffle=True, num_workers=4)

dataloader_dev = DataLoader(dataset_dev, batch_size=20,
                        shuffle=True, num_workers=4)

class MyModel(nn.Module):
    def __init__(self, input_dim, num_class):
        super(MyModel, self).__init__()
        self.network = nn.Sequential(
                       nn.Linear(input_dim, input_dim),
                       nn.Dropout(p=0.5),
                       nn.ReLU(),
                       nn.Linear(input_dim, input_dim),
                       nn.ReLU(),
                       nn.Linear(int(input_dim), num_class)
                        )
    def forward(self, X):
        out = self.network(X)
        return out
        
model = MyModel(20, 2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
num_epoch = 10
for epoch in range(num_epoch):
    model.train()
    for (x, y) in dataloader_train:
        optimizer.zero_grad()
        logits = model(x)
        loss = F.cross_entropy(logits, y)
        loss.backward()
        optimizer.step()
    
    model.eval()
    tot = 0
    correct = 0
    for (x, y) in dataloader_dev:
        logits = model(x)
        predict = logits.argmax(dim=1)
        tot += x.size(0)
        correct += (predict == y).float().sum()
    acc = correct / tot
    print(f'epoch:{epoch} acc：{acc}')

epoch:0 acc：1.0
epoch:1 acc：1.0
epoch:2 acc：1.0
epoch:3 acc：1.0
epoch:4 acc：1.0
epoch:5 acc：1.0
epoch:6 acc：1.0
epoch:7 acc：1.0
epoch:8 acc：1.0
epoch:9 acc：1.0
