In [1]:
import torch
import numpy as np
import pickle

In [2]:
def make_chunks(mfccs, labels, window_size=64, stride=32):
    chunk = []
    chunk_labels = []
    for mfcc, label in zip(mfccs, labels):
        # (W - F + 2P)/S + 1
        for start in range(0, int((mfcc.shape[1] - window_size)/stride)+1): 
            chunk.append(mfcc[:, start * stride:(start * stride + window_size)])
            chunk_labels.append(label)
    return(chunk, chunk_labels)

train_Y, test_Y = pickle.load(open('train_labels.dump', 'rb'))
train_X = pickle.load(open('train_mfcc.dump', 'rb'))
test_X = pickle.load(open('test_mfcc.dump', 'rb'))

train_X, train_Y = make_chunks(train_X, train_Y)
test_X, test_Y = make_chunks(test_X, test_Y)
train_X = np.array(train_X)
test_X =  np.array(test_X)
train_Y = np.array(train_Y)
test_Y = np.array(test_Y)

In [3]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(train_X.reshape([train_X.shape[0], -1]))
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(train_X.reshape([train_X.shape[0], -1]), train_Y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [4]:
y_pred = classifier.predict(test_X.reshape([test_X.shape[0], -1]))

In [6]:
np.sum(np.equal(y_pred, test_Y)) / len(test_Y)

0.4215841343229103

In [3]:
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
    def __init__(self, data, label, transform=None):
        self.data = torch.from_numpy(data).float()
        self.label = torch.from_numpy(label).long()
        self.transform = transform
        
    def __getitem__(self, index:int):
        x = self.data[index]
        y = self.label[index]
        
        if self.transform:
            x = self.transform(x)
        
        return x[np.newaxis, ...], y
    
    def __len__(self):
        return len(self.data)

dataset = MyDataset(train_X, train_Y)
loader =  DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    num_workers=8,
    pin_memory=torch.cuda.is_available()
)

idx, (data, label) = next(enumerate(loader))
print(data.shape)
print(label)
#for batch_idx, (data, target) in enumerate(loader):
    #print('Batch idx {}, data shape {}, target shape {}'.format(batch_idx, data.shape, target.shape))

torch.Size([32, 1, 13, 64])
tensor([5, 5, 0, 0, 0, 2, 1, 6, 5, 6, 0, 0, 6, 3, 2, 0, 3, 1, 0, 0, 7, 0, 1, 0,
        0, 1, 2, 0, 0, 3, 3, 0])


In [5]:
import torchvision.models as models
#resnet18 = models.resnet18()
model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, bias=False),
    torch.nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=(1,2)),
    
    torch.nn.Conv2d(32, 32, kernel_size=(2, 2), stride=(1, 1), padding=(1,1), bias=False),
    torch.nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(),
    torch.nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1,1), bias=False),
    torch.nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    
    torch.nn.Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1,1), bias=False),
    torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(),
    torch.nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1,1), bias=False),
    torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
#    torch.nn.AdaptiveAvgPool2d(output_size=(1, 1)),
    torch.nn.Flatten(),
    torch.nn.Linear(
        in_features=2304,
        out_features=128
    ),
    torch.nn.ReLU(),
    torch.nn.Linear(
        in_features=128,
        out_features=8
    )
)
#print(model(data).shape)
#print(resnet18)

In [6]:
model = model.cuda()
optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(60):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(loader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
    print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))
    running_loss = 0.0

[1,  1013] loss: 0.809
[2,  1013] loss: 0.726
[3,  1013] loss: 0.660
[4,  1013] loss: 0.598
[5,  1013] loss: 0.538
[6,  1013] loss: 0.482
[7,  1013] loss: 0.423
[8,  1013] loss: 0.370
[9,  1013] loss: 0.315
[10,  1013] loss: 0.269
[11,  1013] loss: 0.224
[12,  1013] loss: 0.191
[13,  1013] loss: 0.157
[14,  1013] loss: 0.137
[15,  1013] loss: 0.122
[16,  1013] loss: 0.104
[17,  1013] loss: 0.094
[18,  1013] loss: 0.086
[19,  1013] loss: 0.079
[20,  1013] loss: 0.073
[21,  1013] loss: 0.066
[22,  1013] loss: 0.069
[23,  1013] loss: 0.060
[24,  1013] loss: 0.058
[25,  1013] loss: 0.059
[26,  1013] loss: 0.058
[27,  1013] loss: 0.052
[28,  1013] loss: 0.050
[29,  1013] loss: 0.049
[30,  1013] loss: 0.047
[31,  1013] loss: 0.045
[32,  1013] loss: 0.041
[33,  1013] loss: 0.045
[34,  1013] loss: 0.044
[35,  1013] loss: 0.033
[36,  1013] loss: 0.039
[37,  1013] loss: 0.042
[38,  1013] loss: 0.037
[39,  1013] loss: 0.039
[40,  1013] loss: 0.032
[41,  1013] loss: 0.033
[42,  1013] loss: 0.038
[

In [7]:
test_X = test_X
test_X_tensor = np.expand_dims(test_X, 1)
test_X_tensor = torch.from_numpy(test_X_tensor).float().cuda()
model.eval()
prediction = model(test_X_tensor)

In [8]:
prob = torch.nn.functional.softmax(prediction, dim=1)
final = torch.argmax(prob, dim=1).cpu().numpy()
print(final)
print(test_Y)
print(np.sum(np.equal(final, test_Y)) / len(test_Y))

[7 3 3 ... 3 6 6]
[2 2 2 ... 4 4 4]
0.43253437157805086


In [10]:
idx, (data, label) = next(enumerate(loader))
data = model(data.cuda())
prob = torch.nn.functional.softmax(data, dim=1)
final = torch.argmax(prob, dim=1).cpu().numpy()
label = label.numpy()
print(final)
print(label)
print(np.equal(final, label).sum() / len(label))

[1 2 2 0 7 0 5 1 0 0 2 2 1 4 1 7 2 0 0 7 4 0 5 0 0 1 0 2 3 2 0 0]
[1 2 2 0 7 0 5 1 0 0 2 2 1 4 1 7 2 0 0 7 4 0 5 0 0 1 0 2 3 2 0 0]
1.0
