In [1]:
!pip install pydub
!pip install fastaudio
# If you are using Colab, restart your notebook after running this cell.



# Scraping Xeno-Canto API

In [2]:
import pandas as pd
import requests
from tqdm import tqdm

country = 'brazil'
response = requests.get(f'https://www.xeno-canto.org/api/2/recordings?query=cnt:{country}')
js = response.json()

df = pd.DataFrame()
for n_page in tqdm(range(1, js["numPages"]+1)):
    response = requests.get(f'https://www.xeno-canto.org/api/2/recordings?query=cnt:{country}&page={n_page}')
    page_json = response.json()
    ids, files, file_names, ens, lengths, gens = [], [], [], [], [], []
    for recording in page_json["recordings"]:
        ids.append(recording["id"])
        files.append(recording["file"])
        file_names.append(recording["file-name"])
        ens.append(recording["en"])
        lengths.append(recording["length"])
        gens.append(recording["gen"])


    df_ = pd.DataFrame.from_records({'id': ids, "file": files, "file-name": file_names, "en": ens, "gen": gens, "length": lengths})
    df = df.append(df_)
    
df.to_csv("birds.csv", index=False)

100%|██████████| 118/118 [02:39<00:00,  1.35s/it]


#Downloading selected files

In [None]:
from pandas import read_csv
from urllib.request import urlretrieve
from os import listdir, mkdir, path as os_path
from tqdm.notebook import tqdm
from time import sleep

df = read_csv("birds.csv")

#Removing recordings that are not classified
df = df[df['en'] != 'Identity unknown'].copy()
df = df[df['en'] != 'Soundscape'].copy()
#Choosing the 25 most common species
counts = df['en'].value_counts()
chosen = counts.head(25).index
df = df[df["en"].isin(chosen)]
df.to_csv("to_download.csv", index=False)

print(df.shape)
print(len(df["en"].unique()), "different species")

audio_dir = 'audio_files/'
if not os_path.isdir(audio_dir):
    mkdir(audio_dir)

nots = []

for row in tqdm(df.iterrows(), total=df.shape[0]):
    url = row[1]["file"]
    f_name = str(row[1]["id"]) + row[1]["file-name"][-4:].lower()
    try:
        urlretrieve('https:'+url, filename=audio_dir + f_name)
    except Exception as e:
        print("\nRetrying:", url)
        print(e) 
        sleep(60)
        try:
            urlretrieve('https:'+url, filename=audio_dir + f_name)
        except Exception as ee:
            print("Not downloaded|", f_name)
            print(ee)
            nots.append(row[1]["id"])
            pass

if len(nots) > 0:
    with open('not_downloaded.txt', 'w') as f:
        for item in nots:
            f.write(str(item) + '\n')
    print(str(nots))
else:
    print('All files were successfully downloaded!')

# Preprocess audio files and split data

In [None]:
from os import listdir, mkdir, path as os_path
from sklearn.model_selection import train_test_split
from pydub import AudioSegment
from os import listdir, mkdir
import pandas as pd
from tqdm.notebook import tqdm

data_dir = './'
df = pd.read_csv(data_dir+'to_download.csv')

audio_dir = data_dir+'audio_files/'
downloaded_files = listdir(audio_dir)
#Check downloaded files
df['file-name'] = df['file-name'].str.lower()
df['filename'] = df['id'].astype(str) + df['file-name'].str[-4:]
downloaded = df[df['filename'].isin(downloaded_files)][['filename', 'en']]
downloaded.to_csv('downloaded.csv', index=False)
# print('Not downloaded:', len(df) - len(downloaded))

train, test = train_test_split(downloaded[['filename', 'en']], stratify=downloaded["en"], test_size=0.1)

def to_wav(filename, n_channels, sample_rate, input_dir, output_dir):
    input_type = filename[-4:].lower()
    if input_type == '.mp3':
        sound = AudioSegment.from_mp3(f"{input_dir}/{filename}")
        new_fn = fn[:-4] + '.wav'
    elif input_type == '.wav':
        sound = AudioSegment.from_wav(f"{input_dir}/{filename}")
        new_fn = filename
    sound = sound.set_channels(n_channels)
    sound.set_frame_rate(sample_rate)
    sound.export(f"{output_dir}/{new_fn}", format="wav")

wav_dir = data_dir+'wav_files/'
if not os_path.isdir(wav_dir):
    mkdir(wav_dir)
    mkdir(wav_dir + 'test/')
    mkdir(wav_dir + 'train/')

In [None]:
for fn in tqdm(test['filename']):
    to_wav(fn, n_channels=1, sample_rate=16000, input_dir=audio_dir, output_dir=wav_dir+'test')

for fn in tqdm(train['filename']):
    to_wav(fn, n_channels=1, sample_rate=16000, input_dir=audio_dir, output_dir=wav_dir+'train')

#Model

## Preparing dataframes

In [None]:
from os import listdir, mkdir
import pandas as pd
wav_dir = data_dir+'wav_files/'
downloaded = pd.read_csv(data_dir+'downloaded.csv')
downloaded['filename'] = downloaded['filename'].str[:-4] + '.wav'
print(downloaded.shape)
downloaded.columns = ['filename', 'category']
train_files = listdir(wav_dir+'train')
test_files = listdir(wav_dir+'test')
df = downloaded[downloaded['filename'].isin(train_files)]
df_test = downloaded[downloaded['filename'].isin(test_files)]
print(df.shape)
print(df_test.shape)

## Transforms

In [None]:
from fastai.vision.all import *
from fastaudio.core.all import *
from fastaudio.augment.all import *
from fastaudio.ci import skip_if_ci

torch.cuda.empty_cache()

path = Path('.')
seconds = 15
cfg = AudioConfig.BasicMelSpectrogram(n_fft=512)
a2s = AudioToSpec.from_cfg(cfg)
# item_transforms = [ResizeSignal(seconds*1000, pad_mode=AudioPadType.Repeat), a2s]
item_transforms = [a2s]

## Datablocks and dataloaders

In [None]:
train_dir = wav_dir+'train'
auds = DataBlock(blocks = (AudioBlock(crop_signal_to=seconds*1000), CategoryBlock),
                 get_x = ColReader("filename", pref=path/train_dir), 
                 splitter = TrainTestSplitter(random_state=42, stratify=df['category'], test_size=0.2),
                 item_tfms = item_transforms,
                 get_y = ColReader("category")
                 )

In [None]:
batch_size = 64
dbunch = auds.dataloaders(df, bs=batch_size, shuffle=False, seed=42)

In [None]:
set_seed(42,True)
dbunch.rng.seed(42)

## Checking batch

In [None]:
dbunch.show_batch(figsize=(20, 8), nrows=2, ncols=3)

In [None]:
dbunch.show_batch(figsize=(20, 8), nrows=2, ncols=3, unique=True)

## Instantianting model

In [None]:
model_arch = resnet34
learn = cnn_learner(dbunch, 
            model_arch,
            n_in=1,
            loss_func=CrossEntropyLossFlat(),
            metrics=[accuracy])

##Finding optimal learning rate

In [None]:
lr = learn.lr_find()[0]/10
print(lr)

## Training

In [None]:
callbacks = [SaveModelCallback(), ReduceLROnPlateau(monitor='valid_loss', min_delta=0.1, patience=2), EarlyStoppingCallback(monitor='valid_loss', min_delta=0.05, patience=4)]
learn.fine_tune(20, wd=0.1, base_lr=lr, cbs=callbacks)

In [None]:
## Save results
# learn.save('model0')

## Checking results

In [None]:
learn.show_results()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)
interp.most_confused()[:10]

## Testing model on new data

In [None]:
test_dir = wav_dir+'test'
auds_test = DataBlock(blocks = (AudioBlock(crop_signal_to=seconds*1000), CategoryBlock),  
                 get_x = ColReader("filename", pref=path/test_dir),
                 item_tfms = item_transforms,
                 get_y = ColReader("category")
                 )

dbunch_test = auds_test.dataloaders(df_test, bs=batch_size, shuffle=False, seed=42)

In [None]:
learn = cnn_learner(dbunch_test, 
            model_arch,
            n_in=1,
            loss_func=CrossEntropyLossFlat(),
            metrics=[accuracy])
learn.load('model')
dbunch_test.rng.seed(42)
set_seed(42,True)

In [None]:
learn.data = dbunch_test
preds1, y = learn.get_preds(dl=dbunch_test[0])
acc1 = accuracy(preds1, y)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)
interp.most_confused()[:10]

In [None]:
learn.data = dbunch_test
preds2, y = learn.get_preds(dl=dbunch_test[1])
acc2 = accuracy(preds2, y)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)
interp.most_confused()[:10]

In [None]:
preds1.shape[0] + preds2.shape[0]

In [None]:
(acc1*preds1.shape[0] + acc2*preds2.shape[0])/(preds1.shape[0] + preds2.shape[0])