Create a model using built-in library from Pytorch.
This code closely follows nn_tutorial notebook.

In [1]:
# import os
# from google.colab import drive
# drive.mount('/content/gdrive')
# !pwd
# os.chdir('gdrive/My Drive/PyHack2019/')
# !pwd
# !ls

In [2]:
import csv, math, os, pickle, torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import IPython.display as ipd

global chroma_shape, epochs, train_bs, validate_bs, lr, n_class

# if torch.cuda.is_available():
#   device = 'cuda'
# else:
#   device = 'cpu'
  
# print(device)

In [3]:
def accuracy(out, yb):
    ##get the index with the max
    preds = torch.argmax(out, dim = 1)
    return (preds == yb).float().mean()


In [4]:
##Load record voice that is saved at temp_record.pkl

att_file = open(r'temp_record.pkl', 'rb')
x_validate = pickle.load(att_file)  
att_file.close()
#print(x_validate.shape)
ipd.Audio('temp_sound_record.mp3', rate = 22050)

In [5]:
if len(x_validate) == 13:
    chroma_shape = x_validate.shape
else:
    chroma_shape = x_validate[0].shape


#epochs = 50
#train_bs = 20
#validate_bs = train_bs*2
#n_train = chroma_shape[0]
n_validate = chroma_shape[0]
n_class = 9
loss_func = F.cross_entropy

In [6]:
chroma_shape

(13, 239)

Since there are three classes, we set D_out to 3. n is total number of instances and c is the number of attributes in each instance. We use a loss function from torch.nn.functional.

In [7]:
class SoundRecognition_CNN(nn.Module):
  
    def __init__(self, dropout_rate):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        
        self.dropout = nn.Dropout(p = dropout_rate)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = torch.nn.Linear(16*3, n_class)

    def forward(self, xb):
        #print(type(xb), len(xb))
        
        ## input of size (bs, 1, 13, 60)
        #print(xb.shape)
        xb = xb.view(-1, 1, chroma_shape[0], chroma_shape[1])
        #print('xb shape', xb.shape)
        
        ## (bs, 1, 13, 60) >> (bs, 16, 11, 58)
        xb = self.dropout(F.relu(self.conv1(xb)))
        #print('---xb shape2', xb.shape)
        
        ## (bs, 16, 11, 58) >> (bs, 16, 9, 56)
        xb = self.dropout(F.relu(self.conv2(xb)))
        #print('---xb shape3', xb.shape)
        
        ## (bs, 16, 9, 56) >> (bs, 16, 7, 54)
        xb = self.dropout(F.relu(self.conv3(xb)))
        #print('---xb shape4', xb.shape)
        
        ## (bs, 16, 7, 54) >> (bs, 16, 3, 27)
        xb = self.pool(xb)
        #print('---xb shape5', xb.shape)
        
        ## reshape for fully connected
        xb = xb.view(-1, 16*3*116)
        #print(xb.shape)
        ## (bs, 16*3*27) >> (bs, 3)
        xb = self.fc1(xb)
        #print('---xb shape6', xb.shape)
        #print('=====', xb.shape)
        return xb.view(-1, xb.size(1))

## Get the model and optim object that will be used to update model parameters
def get_model(dropout_rate, weight_decay, lr):
    model = SoundRecognition_CNN(dropout_rate)
    return model, optim.Adam(model.parameters(), weight_decay = weight_decay, lr = lr)

In [8]:
def validate(trained_model, validate_bs, n_validate):
    trained_model.eval()
    loss = []
    acc = []
    with torch.no_grad():

        for i in range((n_validate - 1) // validate_bs + 1):
            start_i = i * validate_bs
            end_i = start_i + validate_bs
            #print(xb.shape)
            xb = x_validate[start_i:end_i, :, :]
            yb = y_validate[start_i:end_i]
            pred = trained_model(xb)
            loss.append(loss_func(pred, yb))
            acc.append(accuracy(pred, yb))

        #print(loss)
        valid_loss = sum(loss)
    return valid_loss, sum(acc)/len(acc)

def classify_raw_out(pred):
    
    p = torch.argmax(pred, dim = 1)
    class_map = ['c', 'ch', 'q', 's', 'sh', 'x', 'z', 'zh', 'none']
    return class_map[p]
    

In [9]:
### read in all models

audio_dir = '/Users/panchanok/Desktop/PyHack2019/PyHack2019/model9/'
model_files = [audio_dir + d for d in os.listdir(audio_dir)]

In [10]:
# Model class must be defined somewhere
###'nine_early_stop_mfcc_noise_trained.pt'
decision = []
for mf in model_files:
    model = torch.load(mf, map_location=torch.device('cpu'))
    y = model(torch.tensor(x_validate))
    decision.append(classify_raw_out(y))



In [12]:
decision


['none',
 'none',
 'none',
 'none',
 'ch',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 'none',
 's',
 'none',
 'none',
 'none',
 'none',
 'q']

In [16]:
model_files[len(model_files)-6]

'/Users/panchanok/Desktop/PyHack2019/PyHack2019/model9/mfcc_m_1000_dr0.9_wc1e-05_lr0.0001.pt'