In [1]:
import os

## switch to the root dir

In [2]:
os.chdir('..')
os.listdir()

['.git',
 '.gitignore',
 '.idea',
 'data_loader',
 'data_processor',
 'jupyter',
 'loggers',
 'main.py',
 'models',
 'original_data',
 'processed_data',
 'README.md',
 'saved_models',
 'trainers']

In [3]:
from data_processor.data_processor_toolkit import load_numpy_arrays, numpy_to_tensor
from models.Linear import LogisticRegression
import trainers.SGD_trainer
from loggers.statistics_loggers import plot_numerical_arrays
import torch
from torch import nn
from speechbrain.lobes.models.CRDNN import CRDNN

The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.


## Load preprocessed data

In [4]:
X_train, y_train, X_val, y_val = load_numpy_arrays(['data_train_X.npy','data_train_y.npy', 'data_val_X.npy','data_val_y.npy']
                  , path_prefix='processed_data/processed_waves/4-genres/')

In [5]:
X_train, y_train, X_val, y_val = numpy_to_tensor([X_train, y_train, X_val, y_val])

In [6]:
y_train = y_train.type(torch.int64)
y_val = y_val.type(torch.int64)

In [7]:
print(X_train.shape)
print(y_train.shape)

torch.Size([640, 501, 40])
torch.Size([640])


In [8]:
print(X_train[0])
print(y_train[0])

tensor([[-2.2604, -2.6280, -3.0352,  ..., -3.4321, -3.3666, -3.3074],
        [-2.5652, -3.0801, -3.3855,  ..., -2.7342, -2.6954, -2.6208],
        [-2.7137, -2.6825, -2.8931,  ..., -2.6997, -2.6482, -2.5789],
        ...,
        [-1.4595, -0.1884,  0.0382,  ..., -1.2723, -0.8131, -1.0031],
        [-1.1837, -0.1374,  0.1658,  ..., -1.1047, -0.7848, -0.9523],
        [-0.2428, -0.1665, -0.1290,  ..., -0.1786, -0.1607, -0.1630]])
tensor(1)


In [9]:
print(X_val.shape)
print(y_val.shape)

torch.Size([80, 501, 40])
torch.Size([80])


## Load model

1. define model under the folder models
2. import and define the model

In [10]:
class CNN2d(nn.Module):
    def __init__(self):
        super(CNN2d, self).__init__()
        self.reLU = nn.ReLU()
        
        self.conv1 = nn.Conv2d(
            in_channels = 1,
            out_channels = 4,
            kernel_size = (3, 3),
            stride = (1, 1),
            padding = (1, 1)
        )
        self.bn1=nn.BatchNorm2d(4)
        self.pool1 = nn.MaxPool2d(kernel_size=4, stride=2)
        self.conv2 = nn.Conv2d(
            in_channels = 4,
            out_channels = 16,
            kernel_size = (3, 3),
            stride = (1, 1),
            padding = (1, 1)
        )
        self.bn2=nn.BatchNorm2d(16)
        self.pool2 = nn.MaxPool2d(kernel_size=(4, 4), stride=(2,2))
        self.conv3 = nn.Conv2d(
            in_channels = 16,
            out_channels = 16,
            kernel_size = (3, 3),
            stride = (2, 1),
            padding = (1, 1)
        )
        self.bn3=nn.BatchNorm2d(16)
        self.pool3 = nn.MaxPool2d(kernel_size=(4, 4), stride=(2,2))
        self.dropout = nn.Dropout(0.3)
        
        self.linear1 = nn.Linear(1440, 64)
        self.linear2 = nn.Linear(64, 4)
        #self.linear3 = nn.Linear(50, 4)
        
        
    
    def forward(self, x):
        out = self.reLU(self.bn1(self.conv1(x)))
        out = self.pool1(out)
        #out = self.dropout(out)
        out = self.reLU(self.bn2(self.conv2(out)))
        out = self.pool2(out)
        #out = self.dropout(out)
        #print(out.shape)
        out = self.reLU(self.bn3(self.conv3(out)))
        #print(out.shape)
        out = self.pool3(out)
        #out = self.dropout(out)
        #print(out.shape)
        out = torch.flatten(out, 1)
        out = self.reLU(self.linear1(out))
        out = self.reLU(self.linear2(out))
        return out

In [11]:
# eg. MLP
model = CNN2d()

In [12]:
n_feat = X_train.shape[2]
n_time = X_train.shape[1]

In [13]:
X_a = X_train[:10].reshape((-1, 1, n_time, n_feat))
print(X_a.shape)
pred = model(X_a)
print(pred.shape)

torch.Size([10, 1, 501, 40])
torch.Size([10, 4])


## Define loss and optimizer

In [14]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

## Train

use trainers.SGD_trainer.train or define a trainer

parameters of SGD_trainer.train
- model
- train_array: a tuple (X_train, y_train, X_val, y_val)
- loss
- optimizer
- batch_size
- num_epoch
- device

In [15]:
X_train_2d = X_train.reshape((-1, 1, n_time, n_feat))
print(X_train.shape)
X_val_2d = X_val.reshape((-1, 1, n_time, n_feat))
training_loss_array, training_accuracy_array, validation_loss_array, validation_accuracy_array = trainers.SGD_trainer.train(model, (X_train_2d, y_train, X_val_2d, y_val), 
                                                                                                                            loss, optimizer, batch_size=10, num_epoch=60, device='cuda')

torch.Size([640, 501, 40])
Epoch  0: loss on final training batch: 1.3863
training accuracy: 25.00% validation accuracy: 26.25%
Epoch  1: loss on final training batch: 1.3863
training accuracy: 25.00% validation accuracy: 26.25%
Epoch  2: loss on final training batch: 1.3863
training accuracy: 25.00% validation accuracy: 26.25%


KeyboardInterrupt: 

## Plot

plot_numerical_arrays: plot multiple arrays with the same length

parameters:
- num_arrays: numerical arrays with the same length
- labels: labels of each array(with the same order of num_arrays)
- xlabel
- ylabel
- title

In [None]:
plot_numerical_arrays([training_loss_array[50:], validation_loss_array[50:]], ['training loss', 'validation loss'], 
                      xlabel='batches', ylabel='loss', title='train and validation losses for logistic regression')

In [None]:
plot_numerical_arrays([training_accuracy_array, validation_accuracy_array], ['training accuracy', 'validation accuracy'], 
                      xlabel='batches', ylabel='accuracy', title='train and validation accuracies for logistic regression')

## Save model

Save model in 'saved_models/saved_modelname_wave.pt'

In [None]:
torch.save(model.state_dict(), 'saved_models/saved_CNN2d_wave.pt')