In [None]:
import sys 
sys.path.append('..')

In [None]:
from fastai2_resnet_audio.model import *
from fastai2_resnet_audio.data import *
from fastai2.vision.all import *

# fastai2-resnet-audio tutorial

> Tutorial for fastai2-resnet-audio - Dataset used: https://github.com/earthspecies/open_collaboration_on_audio_classification/blob/master/introduction.ipynb

In [None]:
path = untar_data('https://storage.googleapis.com/ml-animal-sounds-datasets/macaques_24414Hz.zip')
path

Path('/Users/florian/.fastai/data/macaques_24414Hz')

## DataLoaders

Create DataBlock and DataLoaders with AudioBlock and AudioTransforms

In [None]:
length = 0.5
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dblocks = DataBlock(blocks = (AudioBlock,CategoryBlock),
                 get_items=get_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=parent_label,
                 item_tfms=[AudioRandomCrop(length=length),
                            AudioFixLength(length=length),
                           ],
                 batch_tfms=[AudioAddNoise(device=device)]
                 )

dls=dblocks.dataloaders(path, bs=128)

In [None]:
dls.one_batch()

(TensorAudio([[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
            0.0000e+00,  0.0000e+00]],
 
         [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
            0.0000e+00,  0.0000e+00]],
 
         [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
            0.0000e+00,  0.0000e+00]],
 
         ...,
 
         [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
            0.0000e+00,  0.0000e+00]],
 
         [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
            0.0000e+00,  0.0000e+00]],
 
         [[ 1.3763e-02,  2.7649e-02,  4.1534e-02,  ...,  6.1035e-05,
           -1.0071e-03, -2.0752e-03]]]),
 TensorCategory([4, 3, 2, 1, 5, 1, 3, 3, 4, 0, 4, 1, 5, 5, 3, 3, 2, 0, 6, 2, 6, 1, 4, 7,
         0, 2, 5, 0, 3, 2, 7, 5, 6, 6, 6, 5, 6, 3, 3, 6, 7, 2, 3, 2, 0, 5, 0, 4,
         3, 6, 0, 4, 5, 4, 6, 3, 0, 0, 6, 1, 5, 3, 1, 1, 5, 1, 2, 3, 3, 6, 7, 2,
         2, 1, 5, 3, 6, 5, 5, 2, 2, 2, 6, 2, 1, 3, 7, 5, 5, 0, 6, 7,

## Model

Create **model configuration** - available configurations are resnet1d18 and resnet1d34.

You have to adopt the **num_classes** parameter according to the number of classes of your dataset (8 classes for this dataset).

In [None]:
config = resnet1d18
config['num_classes'] = 8

In [None]:
config

{'block': fastai2_resnet_audio.model.ResidualBlock,
 'layers': [2, 2, 2, 2],
 'in_channels': 64,
 'kernel_size': 15,
 'stride': 4,
 'num_classes': 8}

Create **model** using config

In [None]:
model = ResNetAudio(**config)

## Learner + Training

Creating the **learner and trainig** the model is straight forward.

In [None]:
learn = Learner(dls, model, metrics=accuracy, cbs=ShowGraphCallback())

In [None]:
learn.lr_find()

In [None]:
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

## Fine-Tune pretrained model on different dataset

**Steps**
- create DataLoaders
- create model with same config (num_classes) as the pretrained model
- create learner
- load pretrained model weights with learn.load("pretrained.pth")
- call **replace_head** with num_classes=number classes new dataset

Lets pretend the macaques dataset had 20 instead of 8 classes

In [None]:
path = untar_data('https://storage.googleapis.com/ml-animal-sounds-datasets/macaques_24414Hz.zip')
path

In [None]:
length = 0.5
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dblocks = DataBlock(blocks = (AudioBlock,CategoryBlock),
                 get_items=get_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=parent_label,
                 item_tfms=[AudioRandomCrop(length=length),
                            AudioFixLength(length=length),
                           ],
                 batch_tfms=[AudioAddNoise(device=device)]
                 )

dls=dblocks.dataloaders(path, bs=128)

Model was pretrained on dataset with 8 classes, so create config with 8 classes to load the pretrianed model.

In [None]:
config = resnet1d18
config['num_classes'] = 8

In [None]:
learn = Learner(dls, model, metrics=accuracy, cbs=ShowGraphCallback())

In [None]:
learn.load("saved_model")

After loading the pretrained weights we can **replace the last linear layer**. In this example for a dataset with **20 classes**.

In [None]:
replace_head(learn.model, num_classes=20)
model[-1][-1]

Linear(in_features=512, out_features=20, bias=True)