In [1]:
# Task A: Train RNN on FSDD 
# - full precision 32 float
# - close to comparable designs (current deviation -6%, fixable by scaling network and tuning hyperparameters)
# - RNN layer sizing (64x64x2+64x2)x4bytes = ~33kB < 36 kB from Task B constraint

In [2]:
import torch
import yaml

from utils import get_rec_paths, load_data, train_model, validate_model

In [3]:
# mainly adapted from https://github.com/saztorralba/CNNWordReco due to following
# - deeplake / hub version broken -> replaced with original wavs (cloned orig repo: https://github.com/Jakobovski/free-spoken-digit-dataset)
# - logmel suitable for detection of spoken speech -> normalized, resampled, high-pass filtered, time axis scaling

In [4]:
# Load from YAML file
with open("config.yaml", "r") as f:
    args = yaml.safe_load(f)

# labels and paths in pd frame
data = get_rec_paths('./free-spoken-digit-dataset/recordings')

# load train, val, test data
trainset, validset, trainlabels, validlabels, testset, testlabels = load_data(data,True,**args)
print(trainset.shape, validset.shape, testset.shape)

torch.Size([2430, 20, 20]) torch.Size([270, 20, 20]) torch.Size([300, 20, 20])


In [5]:
# model def 
class FSDNN_RNN(torch.nn.Module):
    def __init__(self, input_channels, hidden_size, num_layers, output_size):
        super(FSDNN_RNN, self).__init__()
        self.rnn = torch.nn.RNN(input_size=input_channels, 
                          hidden_size=hidden_size, 
                          num_layers=num_layers, 
                          batch_first=True)  # (batch, seq, features)
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)  # RNN output
        out = self.fc(out[:, -1, :])  # Take last time step for classification
        return out

In [6]:
# model init
model = FSDNN_RNN(args['xsize'], args['rnn_hidden'], args['rnn_layers'], args['rnn_outputs'])
#model.load_state_dict(torch.load('chkpt_1.pt', weights_only=True)) #load pretrained 

# Training Setup
optimizer = torch.optim.RMSprop(model.parameters(), lr=args['learning_rate'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.5)
criterion = torch.nn.CrossEntropyLoss()

In [7]:
# Train from scratch
acc_best = 0
for ep in range(1,args['epochs']+1):

    # training
    loss = train_model(trainset,trainlabels,model,optimizer,criterion,**args)
    scheduler.step()
    acc = validate_model(validset,validlabels,model,**args)

    # save best model
    if acc > acc_best:
        acc_best = acc
        torch.save(model.state_dict(), 'chkpt_1.pt')    

    # display progress
    if ep % 10 == 0:
        print('Epoch {0:d} of {1:d}. Training loss: {2:.2f}, Validation accuracy: {3:.2f}%'.format(ep,args['epochs'],loss,acc))
    

Epoch 10 of 1000. Training loss: 0.95, Validation accuracy: 64.44%
Epoch 20 of 1000. Training loss: 0.69, Validation accuracy: 80.37%
Epoch 30 of 1000. Training loss: 0.48, Validation accuracy: 87.41%
Epoch 40 of 1000. Training loss: 0.49, Validation accuracy: 85.19%
Epoch 50 of 1000. Training loss: 0.39, Validation accuracy: 90.74%
Epoch 60 of 1000. Training loss: 0.34, Validation accuracy: 87.78%
Epoch 70 of 1000. Training loss: 0.29, Validation accuracy: 89.63%
Epoch 80 of 1000. Training loss: 0.25, Validation accuracy: 89.63%
Epoch 90 of 1000. Training loss: 0.26, Validation accuracy: 87.78%
Epoch 100 of 1000. Training loss: 0.23, Validation accuracy: 92.22%
Epoch 110 of 1000. Training loss: 0.28, Validation accuracy: 88.52%
Epoch 120 of 1000. Training loss: 0.23, Validation accuracy: 92.59%
Epoch 130 of 1000. Training loss: 0.22, Validation accuracy: 91.48%
Epoch 140 of 1000. Training loss: 0.17, Validation accuracy: 88.15%
Epoch 150 of 1000. Training loss: 0.18, Validation accura

KeyboardInterrupt: 

In [8]:
# load best model
# model = FSDNN_RNN(args['xsize'], args['rnn_hidden'], args['rnn_layers'], args['rnn_outputs'])
# model.load_state_dict(torch.load('chkpt_t1.pt', weights_only=True))

In [9]:
# check model size
params = [p.detach().cpu().numpy() for p in model.parameters()]
no_params = 0
for i, p in enumerate(params):
    print(f"Parameter {i}: shape {p.shape}")
    no_params += p.size
no_params

Parameter 0: shape (64, 20)
Parameter 1: shape (64, 64)
Parameter 2: shape (64,)
Parameter 3: shape (64,)
Parameter 4: shape (64, 64)
Parameter 5: shape (64, 64)
Parameter 6: shape (64,)
Parameter 7: shape (64,)
Parameter 8: shape (64, 64)
Parameter 9: shape (64, 64)
Parameter 10: shape (64,)
Parameter 11: shape (64,)
Parameter 12: shape (10, 64)
Parameter 13: shape (10,)


22794

In [10]:
# validate best model on testset
acc = validate_model(testset,testlabels,model,**args)
acc

92.0