Create a model using built-in library from Pytorch.
This code closely follows nn_tutorial notebook.

In [1]:
import csv, torch, math, os, pickle
from torch import nn
from torch import optim
import torch.nn.functional as F

global chroma_shape, epochs, train_bs, validate_bs, lr, n_class

In [2]:
## read attr and tar in .pkl files
## Return: (1) a list of padded attr arrays
##         (2) a list of paddrd tar arrays
def read_data():
    att_file = open(r'temp_chroma_attr.pkl', 'rb')
    x_train = pickle.load(att_file)
    
    att_file.close()
    
    tar_file = open(r'temp_chroma_tar.pkl', 'rb')
    str_y_train = pickle.load(tar_file)
    
    tar_file.close()
    
    y_train = torch.tensor([0 if s == 's' else (1 if s == 'x' else 2) for s in str_y_train])
    
    #print([(x,s) for x, s in zip(str_y_train, y_train)])
    
    ## need .float so that it has the same type as weights in the model
    return torch.tensor(x_train).float(), y_train

x_train, y_train = read_data()


In [3]:
print(x_train.shape)
x_train[0:3, :, :].shape
y = x_train.float()
print(type(y[0]))

torch.Size([1568, 12, 60])
<class 'torch.Tensor'>


In [4]:
def accuracy(out, yb):
    #print('======acc====')
    #print(out, ' | ', yb)
    preds = torch.argmax(out, dim = 1)
    return (preds == yb).float().mean()

In [11]:
## Get the shape of a padded instance for model construction
chroma_shape = x_train[0].shape

lr = 0.05
epochs = 100
train_bs = 20
validate_bs = train_bs*2
train_n = len(x_train)
n_class = 3

loss_func = F.cross_entropy

Since there are three classes, we set D_out to 3. n is total number of instances and c is the number of attributes in each instance. We use a loss function from torch.nn.functional.

In [6]:
class SoundRecognition_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = torch.nn.Linear(16*3*27, n_class)

    def forward(self, xb):
        #print(type(xb), len(xb))
        
        ## input of size (bs, 1, 12, 60)
        xb = xb.view(-1, 1, chroma_shape[0], chroma_shape[1])
        #print('xb shape', xb.shape)
        
        ## (bs, 1, 12, 60) >> (bs, 16, 10, 58)
        xb = F.relu(self.conv1(xb))
        #print('---xb shape2', xb.shape)
        
        ## (bs, 16, 10, 58) >> (bs, 16, 8, 56)
        xb = F.relu(self.conv2(xb))
        #print('---xb shape3', xb.shape)
        
        ## (bs, 16, 8, 56) >> (bs, 16, 6, 54)
        xb = F.relu(self.conv3(xb))
        #print('---xb shape4', xb.shape)
        
        ## (bs, 16, 6, 54) >> (bs, 16, 3, 27)
        xb = self.pool(xb)
        #print('---xb shape5', xb.shape)
        
        ## reshape for fully connected
        xb = xb.view(-1, 16*3*27)
        
        ## (bs, 16*3*27) >> (bs, 3)
        xb = self.fc1(xb)
        #print('---xb shape6', xb.shape)
        #print('=====', xb.shape)
        return xb.view(-1, xb.size(1))

## Get the model and optim object that will be used to update model parameters
def get_model():
    model = SoundRecognition_CNN()
    return model, optim.SGD(model.parameters(), lr = lr)

In [12]:
model, opt = get_model()

def fit():
    for epoch in range(epochs):
        
        #print('Training')
        model.train()
        for i in range((train_n - 1) // train_bs + 1):
            
            start_i = i * train_bs
            end_i = start_i + train_bs
            xb = x_train[start_i:end_i, :, :]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            #print('pred: ', pred, ' | yb: ', yb)
            loss = loss_func(pred, yb)

            loss.backward()
            opt.step()
            opt.zero_grad()
        
            #print('Acc: ', accuracy(pred, yb))
        
        ####validate at each epoch
        #print('Validating')
        model.eval()
        loss = []
        acc = []
        with torch.no_grad():

            for i in range((train_n - 1) // train_bs + 1):
                start_i = i * train_bs
                end_i = start_i + train_bs
                xb = x_train[start_i:end_i, :, :]
                yb = y_train[start_i:end_i]
                pred = model(xb)
                loss.append(loss_func(pred, yb))
                acc.append(accuracy(pred, yb))
        
        #print(loss)
        valid_loss = sum(loss)
        
        if (epoch + 1)% 10 == 0:
            print('Epoch: ', epoch + 1, ' | Loss: ', valid_loss, ' | Accuracy: ', sum(acc)/len(acc))
    print('Train Finished')

fit()


Epoch:  10  | Loss:  tensor(64.3285)  | Accuracy:  tensor(0.6937)
Epoch:  20  | Loss:  tensor(56.3347)  | Accuracy:  tensor(0.7111)
Epoch:  30  | Loss:  tensor(54.0619)  | Accuracy:  tensor(0.7079)
Epoch:  40  | Loss:  tensor(61.2457)  | Accuracy:  tensor(0.6573)
Epoch:  50  | Loss:  tensor(40.4599)  | Accuracy:  tensor(0.7981)
Epoch:  60  | Loss:  tensor(35.1271)  | Accuracy:  tensor(0.8285)
Epoch:  70  | Loss:  tensor(26.5453)  | Accuracy:  tensor(0.8772)
Epoch:  80  | Loss:  tensor(52.0822)  | Accuracy:  tensor(0.8047)
Epoch:  90  | Loss:  tensor(16.6633)  | Accuracy:  tensor(0.9272)
Epoch:  100  | Loss:  tensor(13.4108)  | Accuracy:  tensor(0.9424)
Train Finished
