In [None]:
import sys 
sys.path.append('..')

In [None]:
from fastai_resnet_audio.model import *
from fastai_resnet_audio.data import *
from fastai.vision.all import *

# fastai_resnet_audio tutorial

> Tutorial for fastai-resnet-audio - Dataset used: https://github.com/earthspecies/open_collaboration_on_audio_classification/blob/master/introduction.ipynb

In [None]:
path = untar_data('https://storage.googleapis.com/ml-animal-sounds-datasets/macaques_24414Hz.zip')
path

Path('/Users/florian/.fastai/data/macaques_24414Hz')

## DataLoaders

Create DataBlock and DataLoaders with AudioBlock and AudioTransforms

In [None]:
length = 0.5
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dblocks = DataBlock(blocks = (AudioBlock,CategoryBlock),
                 get_items=get_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=parent_label,
                 item_tfms=[AudioRandomCrop(length=length),
                            AudioFixLength(length=length),
                           ],
                 batch_tfms=[AudioAddNoise(device=device)]
                 )

dls=dblocks.dataloaders(path, bs=128)

In [None]:
dls.one_batch()

(TensorAudio([[[-7.4502e-02, -9.2339e-02, -1.0822e-01,  ...,  2.2105e-02,
            2.0925e-02,  2.0883e-02]],
 
         [[ 3.2130e-01,  3.5341e-01,  3.7791e-01,  ..., -3.3554e-02,
           -2.9873e-02, -2.7081e-02]],
 
         [[-2.2488e-02, -4.8285e-02, -7.2340e-02,  ..., -2.6424e-01,
           -2.5225e-01, -2.3800e-01]],
 
         ...,
 
         [[ 1.0335e-03, -8.5275e-04, -1.1653e-03,  ...,  1.9616e-04,
           -1.2397e-03,  1.9258e-04]],
 
         [[ 7.0962e-04,  8.3948e-04, -3.5992e-04,  ...,  3.3023e-04,
           -5.8667e-04, -7.5844e-04]],
 
         [[ 8.7482e-04,  7.2337e-04, -1.8100e-04,  ...,  1.1790e-03,
           -5.3216e-04,  1.7107e-04]]]),
 TensorCategory([6, 6, 6, 0, 6, 6, 6, 4, 6, 5, 5, 4, 4, 6, 0, 3, 6, 2, 6, 3, 2, 5, 2, 7,
         5, 3, 3, 6, 2, 0, 0, 2, 0, 2, 3, 5, 5, 3, 5, 6, 0, 0, 6, 5, 4, 7, 3, 3,
         2, 3, 3, 0, 5, 6, 4, 0, 3, 3, 5, 7, 0, 2, 5, 4, 4, 7, 5, 5, 3, 6, 5, 5,
         3, 6, 6, 1, 2, 0, 1, 3, 3, 1, 0, 6, 3, 1, 0, 0, 3, 2, 0, 7,

## Model

Create **model configuration** - available configurations are resnet1d18 and resnet1d34.

You have to adopt the **num_classes** parameter according to the number of classes of your dataset (8 classes for this dataset).

In [None]:
config = resnet1d18
config['num_classes'] = 8

In [None]:
config

{'block': fastai_resnet_audio.model.ResidualBlock,
 'layers': [2, 2, 2, 2],
 'in_channels': 64,
 'kernel_size': 15,
 'stride': 4,
 'num_classes': 8}

Create **model** using config

In [None]:
model = ResNetAudio(**config)
model

ResNetAudio(
  (0): Sequential(
    (0): Conv1d(1, 64, kernel_size=(31,), stride=(4,), padding=(15,), bias=False)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool1d(kernel_size=15, stride=4, padding=5, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): ResidualBlock(
        (conv1): Conv1d(64, 64, kernel_size=(15,), stride=(4,), padding=(7,), bias=False)
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv1d(64, 64, kernel_size=(15,), stride=(1,), padding=(7,), bias=False)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (downsample): Sequential(
          (0): Conv1d(64, 64, kernel_size=(15,), stride=(4,), padding=(7,), bias=False)
          (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
     

## Learner + Training

Creating the **learner and trainig** the model is straight forward.

In [None]:
learn = Learner(dls, model, metrics=accuracy, cbs=ShowGraphCallback())

In [None]:
learn.lr_find()

In [None]:
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

## Fine-Tune pretrained model on different dataset

**Steps**
- create DataLoaders
- create model with same config (num_classes) as the pretrained model
- create learner
- load pretrained model weights with learn.load("pretrained.pth")
- call **replace_head** with num_classes=number classes new dataset

Lets pretend the macaques dataset had 20 instead of 8 classes

In [None]:
path = untar_data('https://storage.googleapis.com/ml-animal-sounds-datasets/macaques_24414Hz.zip')
path

In [None]:
length = 0.5
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dblocks = DataBlock(blocks = (AudioBlock,CategoryBlock),
                 get_items=get_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=parent_label,
                 item_tfms=[AudioRandomCrop(length=length),
                            AudioFixLength(length=length),
                           ],
                 batch_tfms=[AudioAddNoise(device=device)]
                 )

dls=dblocks.dataloaders(path, bs=128)

Model was pretrained on dataset with 8 classes, so create config with 8 classes to load the pretrianed model.

In [None]:
config = resnet1d18
config['num_classes'] = 8

In [None]:
learn = Learner(dls, model, metrics=accuracy, cbs=ShowGraphCallback())

In [None]:
learn.load("saved_model")

After loading the pretrained weights we can **replace the last linear layer**. In this example for a dataset with **20 classes**.

In [None]:
replace_head(learn.model, num_classes=20)
model[-1][-1]