# Tutorial: Training a Voice Recognition Model¶

If you havent installed fastaudio do it uncommenting and executing the following cell

In [None]:
#!pip install git+https://github.com/fastaudio/fastaudio.git

In [None]:
from fastai.torch_basics import *
from fastai.basics import *
from fastai.data.all import *
from fastai.callback.all import *
from fastai.vision.all import *

from fastaudio.core.all import *
from fastaudio.augment.all import *

In [None]:
print(torch.ones(1))

In [None]:
speakers_folder = untar_data(URLs.SPEAKERS10, extract_func=tar_extract_at_filename)
speakers = speakers_folder.ls()
speakers[0]

## Datablock and Basic End to End Training on 10 Speakers¶

In [None]:
#crop 2s from the signal and turn it to a MelSpectrogram with no augmentation
cfg_voice = AudioConfig.Voice()
a2s = AudioToSpec.from_cfg(cfg_voice)

In [None]:
example = random.choice(speakers)
str(example).split('/')[-1][:5]

In [None]:
auds = DataBlock(blocks=(AudioBlock.from_folder(speakers_folder, crop_signal_to=2000), CategoryBlock),  
                 get_items=get_audio_files, 
                 splitter=RandomSplitter(),
                 item_tfms = a2s,
                 get_y=lambda x: str(x).split('/')[-1][:5]
                )

In [None]:
cats = [y for _,y in auds.datasets(speakers_folder)]

In [None]:
#verify categories are being correctly assigned
test_eq(min(cats).item(), 0)
test_eq(max(cats).item(), 9)

In [None]:
dbunch = auds.dataloaders(speakers_folder, bs=64)

<div class = "alert alert-block alert-info">Info:
Show batch is fixed now on nchannels, which is an object of AudioSpectrogram (part of sg settings but we overrode getattr to make it work like an attribute).</div>

In [None]:
dbunch.show_batch(max_n=9)

In [None]:
dbunch.one_batch()[0].shape

In [None]:
def alter_learner(learn, channels=1):
    learn.model[0][0].in_channels=channels
    learn.model[0][0].weight = torch.nn.parameter.Parameter(learn.model[0][0].weight[:,1,:,:].unsqueeze(1))

In [None]:
learn = Learner(dbunch, 
                xresnet18(),
                torch.nn.CrossEntropyLoss(), 
                metrics=[accuracy])
nchannels = dbunch.one_batch()[0].shape[1]
alter_learner(learn, nchannels)

In [None]:
from fastaudio.ci import skip_if_ci

# We don't do a full lr_find when running in the CI
@skip_if_ci
def run_lr_find():
    learn.lr_find()
    
run_lr_find()

In [None]:
@skip_if_ci
def run_learner():
    learn.fit_one_cycle(10, lr_max=slice(1e-2))

# We only validate the model when running in CI
run_learner()