In [None]:
import sys 
sys.path.append('..')

In [None]:
from fastai2_resnet_audio.model import *
from fastai2_resnet_audio.data import *
from fastai2.vision.all import *

# fastai2-resnet-audio tutorial

> Tutorial for fastai2-resnet-audio - Dataset used: https://github.com/earthspecies/open_collaboration_on_audio_classification/blob/master/introduction.ipynb

In [None]:
path = untar_data('https://storage.googleapis.com/ml-animal-sounds-datasets/macaques_24414Hz.zip')
path

## DataLoaders

Create DataBlock and DataLoaders with AudioBlock and AudioTransforms

In [None]:
length = 0.5
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dblocks = DataBlock(blocks = (AudioBlock,CategoryBlock),
                 get_items=get_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=parent_label,
                 item_tfms=[AudioRandomCrop(length=length),
                            AudioFixLength(length=length),
                           ],
                 batch_tfms=[AudioAddNoise(device=device)]
                 )

dls=dblocks.dataloaders(path, bs=128)

In [None]:
dls.one_batch()

## Model

Create **model configuration** - available configurations are resnet1d18 and resnet1d34.

You have to adopt the **num_classes** parameter according to the number of classes of your dataset (8 classes for this dataset).

In [None]:
config = resnet1d18
config['num_classes'] = 8

In [None]:
config

Create **model** using config

In [None]:
model = ResNetAudio(**config)
model

ResNetAudio(
  (0): Sequential(
    (0): Conv1d(1, 64, kernel_size=(31,), stride=(4,), padding=(15,), bias=False)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool1d(kernel_size=15, stride=4, padding=5, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): ResidualBlock(
        (conv1): Conv1d(64, 64, kernel_size=(15,), stride=(4,), padding=(7,), bias=False)
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv1d(64, 64, kernel_size=(15,), stride=(1,), padding=(7,), bias=False)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (downsample): Sequential(
          (0): Conv1d(64, 64, kernel_size=(15,), stride=(4,), padding=(7,), bias=False)
          (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
     

## Learner + Training

Creating the **learner and trainig** the model is straight forward.

In [None]:
learn = Learner(dls, model, metrics=accuracy, splitter=splitter, cbs=ShowGraphCallback())

In [None]:
learn.lr_find()

KeyboardInterrupt: 

In [None]:
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

## Fine-Tune pretrained model on different dataset

**Steps**
- create DataLoaders
- create model with same config (num_classes) as the pretrained model
- create learner
- load pretrained model weights with learn.load("pretrained.pth")
- call **replace_head** with num_classes=number classes new dataset

Lets pretend the macaques dataset had 20 instead of 8 classes

In [None]:
path = untar_data('https://storage.googleapis.com/ml-animal-sounds-datasets/macaques_24414Hz.zip')
path

In [None]:
length = 0.5
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dblocks = DataBlock(blocks = (AudioBlock,CategoryBlock),
                 get_items=get_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=parent_label,
                 item_tfms=[AudioRandomCrop(length=length),
                            AudioFixLength(length=length),
                           ],
                 batch_tfms=[AudioAddNoise(device=device)]
                 )

dls=dblocks.dataloaders(path, bs=128)

Model was pretrained on dataset with 8 classes, so create config with 8 classes to load the pretrianed model.

In [None]:
config = resnet1d18
config['num_classes'] = 8

In [None]:
learn = Learner(dls, model, metrics=accuracy, cbs=ShowGraphCallback())

In [None]:
learn.load("saved_model")

After loading the pretrained weights we can **replace the last linear layer**. In this example for a dataset with **20 classes**.

In [None]:
replace_head(learn.model, num_classes=20)
model[-1][-1]