In [1]:
import tools as t
import h5py

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage
import multiprocessing as mp
import os
import cv2

import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [4]:
test_files_path = "../ESC-10 Metadata/ESC10TestData.npy"
train_files_path = "../ESC-10 Metadata/ESC10TrainData.npy"
test_labels_path = "../ESC-10 Metadata/ESC10TestLabel.npy"
train_labels_path = "../ESC-10 Metadata/ESC10TrainLabel.npy"
database_path = "../ESC-10 Metadata/ESC10.hdf5"

In [5]:
total_sounds, total_labels = t.get_dataset(test_files_path, train_files_path, test_labels_path, train_labels_path)

In [6]:
# class labels
label_list = ["baby cry", "chainsaw", "clock tick", "dogbark", "fire cracking", "helicopter", "sneezing", "rain", "rooster", "sea waves"]

In [7]:
# getting random train, test, validation classes from the whole meta dataset in 8:1:1 ratio
classes = torch.randperm(10)
train_classes, val_classes, test_classes = classes[:7], classes[0], classes[7:]
print(train_classes, val_classes, test_classes)

tensor([6, 1, 7, 5, 4, 2, 3]) tensor(6) tensor([0, 9, 8])


In [8]:
train_set = t.LoadData(total_sounds, total_labels, database_path, train_classes, transform = None)
val_set = t.LoadData(total_sounds, total_labels, database_path, val_classes, transform = None)
test_set = t.LoadData(total_sounds, total_labels, database_path, test_classes, transform = None)

In [9]:
train_sounds, train_labels = train_set.get_labels_n_sounds()
val_sounds, val_labels = val_set.get_labels_n_sounds()
test_sounds, test_labels = test_set.get_labels_n_sounds()

In [10]:
n_way = 5
k_shot = 5

In [11]:
train_batches = t.BatchSampler(n_way, k_shot, train_sounds, train_labels, include_query = True, shuffle = True)
val_batches = t.BatchSampler(1, 3, val_sounds, val_labels, include_query = True, shuffle = True)
test_batches = t.BatchSampler(1, 3, test_sounds, test_labels, include_query = True, shuffle = True)

Building model

In [12]:
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
    
    def forward(self, x):
        return x.view(x.size(0), -1)

In [13]:
def load_protonet_conv(**kwargs):
    x_dim = kwargs['x_dim']
    hid_dim = kwargs['hid_dim']
    z_dim = kwargs['z_dim']
    
    def conv_block(in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.MaxPool2d(2)
            )

    encoder = nn.Sequential(
    conv_block(x_dim, hid_dim),
    conv_block(hid_dim, hid_dim),
    conv_block(hid_dim, hid_dim),
#     conv_block(hid_dim, hid_dim),
    conv_block(hid_dim, z_dim),
    Flatten()
    )
    
    return ProtoNet(encoder)
#     return encoder

In [14]:
def euclidean_dist(x, y):
    n = x.size(0)
    m = y.size(0)
    d = x.size(1)
    assert d == y.size(1)
    x = x.unsqueeze(1).expand(n, m, d)
    y = y.unsqueeze(0).expand(n, m, d)
    
    return torch.pow(x - y, 2).sum(2)

In [15]:
class ProtoNet(nn.Module):
    def __init__(self, encoder):
        super(ProtoNet, self).__init__()
        self.encoder = encoder.cuda()
        self.encoder = encoder
        
    def set_forward_loss(self, batch_indices):
        batch_imgs = []
        for ind in batch_indices:
            batch_imgs.append(train_sounds[ind])
        
        support_sounds, query_sounds, support_targets, query_targets = t.split_batch(batch_imgs, batch_indices, n_way, k_shot)
        support_sg, query_sg = [], []
        f = h5py.File(database_path, 'r')
        
        for sd in support_sounds:
            sg = f[sd][()]
            sg = np.float32(sg)
            support_sg.append(np.array(sg))
        for sd in query_sounds:
            sg = f[sd][()]
            sg = np.float32(sg)
            query_sg.append(np.array(sg))
        support_sg, query_sg = np.array(support_sg), np.array(query_sg)
            
        iss = support_sg.shape
        support_sg = np.reshape(support_sg, (iss[0], 1, iss[1], iss[2]))
        iss = query_sg.shape
        query_sg = np.reshape(query_sg, (iss[0], 1, iss[1], iss[2]))
        support_protos = {}
        
        support_sg = torch.tensor(np.float32(support_sg)).to("cuda")
        query_sg = torch.tensor(np.float32(query_sg)).to("cuda")
        support_feats = self.encoder(support_sg)
        query_feats = self.encoder(query_sg)
        
        target_inds = torch.arange(0, n_way).view(n_way, 1, 1).expand(n_way, k_shot, 1).long()
        target_inds = Variable(target_inds, requires_grad=False)
        target_inds = target_inds.cuda()
        
        for i in range(0, support_feats.shape[0], k_shot):
            support_protos[train_labels[support_targets[i]]] = support_feats[i: i + k_shot].mean(dim = 0)

        support_protos_tensor = torch.stack([p for p in support_protos.values()], dim = 0)
        dists = euclidean_dist(query_feats, support_protos_tensor)
        
        log_p_y = F.log_softmax(-dists, dim=1).view(n_way, k_shot, -1)
#         log_p_y.max(1).indices.requires_grad = True
#         int_targs = torch.tensor([int(i) for i in target_inds])
        
        loss_val = -log_p_y.gather(2, target_inds).squeeze().view(-1).mean()
        _, y_hat = log_p_y.max(2)
        acc_val = torch.eq(y_hat, target_inds.squeeze()).float().mean()
        
        return loss_val, {"loss": loss_val.item(), "acc": acc_val.item(), "y_hat": y_hat}

In [16]:
from tqdm import tqdm_notebook
from tqdm import tnrange

In [17]:
def train(model, optimizer, n_way, k_shot, train_sounds, train_labels, max_epoch, epoch_size):
    scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.5, last_epoch=-1)
    epoch = 0 #epochs done so far
    stop = False #status to know when to stop

    while epoch < max_epoch and not stop:
        running_loss = 0.0
        running_acc = 0.0
        
        for episode in tnrange(epoch_size, desc="Epoch {:d} train".format(epoch+1)):
            train_batches = t.BatchSampler(n_way, k_shot, train_sounds, train_labels, include_query = True, shuffle = True)
            tb = iter(train_batches)
            sample = next(tb)
            optimizer.zero_grad()
            loss, output = model.set_forward_loss(sample)
            running_loss += output['loss']
            running_acc += output['acc']
            loss.backward()
            optimizer.step()
        epoch_loss = running_loss / epoch_size
        epoch_acc = running_acc / epoch_size
        print('Epoch {:d} -- Loss: {:.4f} Acc: {:.4f}'.format(epoch+1,epoch_loss, epoch_acc))
        epoch += 1
        scheduler.step()

In [18]:
model = load_protonet_conv(
    x_dim=1,
    hid_dim=64,
    z_dim=64,
    )

In [19]:
optimizer = optim.Adam(model.parameters(), lr = 0.001)

n_way = 5
k_shot = 5

max_epoch = 4
epoch_size = 2000

train(model, optimizer, n_way, k_shot, train_sounds, train_labels, max_epoch, epoch_size)

  for episode in tnrange(epoch_size, desc="Epoch {:d} train".format(epoch+1)):


Epoch 1 train:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch 1 -- Loss: 8.6777 Acc: 0.9251


Epoch 2 train:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch 2 -- Loss: 0.1455 Acc: 0.9615


Epoch 3 train:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch 3 -- Loss: 0.0767 Acc: 0.9771


Epoch 4 train:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch 4 -- Loss: 0.0476 Acc: 0.9848


In [20]:
torch.save(model.state_dict(), "way5shot1_fullspec_model1")

In [21]:
def test(model, optimizer, n_way, k_shot, test_sounds, test_labels, test_episode):
    running_loss = 0.0
    running_acc = 0.0
    
    for episode in tnrange(test_episode):
        test_batches = t.BatchSampler(n_way, k_shot, test_sounds, test_labels, include_query = True, shuffle = True)
        tb = iter(test_batches)
        sample = next(tb)
        
        loss, output = model.set_forward_loss(sample)
        running_loss += output['loss']
        running_acc += output['acc']
    avg_loss = running_loss / test_episode
    avg_acc = running_acc / test_episode
    print('Test results -- Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, avg_acc))

In [22]:
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [35]:
n_way = 3
k_shot = 5

test_episode = 200

# model.load_state_dict(torch.load("way5shot5_fullspec_model1"))
test(model, optimizer, n_way, k_shot, test_sounds, test_labels, test_episode)

  for episode in tnrange(test_episode):


  0%|          | 0/200 [00:00<?, ?it/s]

Test results -- Loss: 4.3240 Acc: 0.6000
