In [None]:
import pickle
import IPython.display as ipd
# feature extractoring and preprocessing data
import librosa
import librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pathlib import Path
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy import signal
import noisereduce as nr
#Reports
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import pickle

## Preprocessing
Do not run this block unless you have new data

In [None]:
audio_path = Path('/media/sasanka/Expansion/xeno-canto-bird-recordings-extended-a-m/A-M')

In [None]:
e_file = '/media/sasanka/Expansion/xeno-canto-bird-recordings-extended-a-m/A-M/amecro/XC264942.mp3'
ipd.display(ipd.Audio(e_file))
y, sr = librosa.load(e_file)
reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)

In [None]:
ipd.Audio(data=reduced_noise, rate=sr)

In [None]:
from scipy import signal
def f_high(y,sr):
    b,a = signal.butter(10, 2000/(sr/2), btype='highpass')
    yf = signal.lfilter(b,a,y)
    return yf

In [None]:
ipd.Audio(data=f_high(reduced_noise, sr), rate=sr)

In [None]:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(y, sr=sr)
librosa.display.waveshow(f_high(y, sr), sr=sr)
librosa.display.waveshow(reduced_noise, sr=sr)
librosa.display.waveshow(f_high(reduced_noise, sr), sr=sr)

In [None]:
sr = 32000

In [None]:
df = pd.read_csv('./train_extended.csv')
print(sorted(set(df.ebird_code)))

In [None]:
df = pd.read_csv('./train_extended.csv')
# Selecting high-rated sound only
dff = df[df['rating'] > 3.0]
# Selecting shorter files only, less data to process
dff = dff[df['duration'] < 20]
print(len(dff))

In [None]:
# Selecting birds with more than 10 examples left
dfc = dff.groupby('ebird_code')['ebird_code'].count()
dff = dff[~dff['ebird_code'].isin(dfc[dfc.values < 20].index)]
print(len(dff))

In [None]:
header = 'filename label chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' de_chroma_stft de_rmse de_spectral_centroid de_spectral_bandwidth de_rolloff de_zero_crossing_rate'
for i in range(1, 21):
    header += f' de_mfcc{i}'
header = header.split()
file = open('extracted_data.csv', 'w', newline='')
writer = csv.writer(file)
writer.writerow(header)
file.close()

In [None]:
def reduce_noise(y, sr):
    reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)
    return f_high(reduced_noise, sr)

In [None]:
def feature_extractor(sound_path, category_name, file_name):
    y, y_sr = librosa.load(sound_path, mono=True)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=y_sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{file_name} {category_name} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    y_denoise = reduce_noise(y, y_sr)
    chroma_stft = librosa.feature.chroma_stft(y=y_denoise, sr=y_sr)
    rmse = librosa.feature.rms(y=y_denoise)
    spec_cent = librosa.feature.spectral_centroid(y=y_denoise, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y_denoise, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y_denoise, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y_denoise)
    mfcc = librosa.feature.mfcc(y=y_denoise, sr=sr)
    to_append += f' {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    # print(to_append)
    file = open('extracted_data.csv', 'a', newline='')
    writer = csv.writer(file)
    writer.writerow(to_append.split())
    file.close()

In [None]:
sound_categories = dff['ebird_code'].unique()

In [None]:
category_progress = tqdm(sound_categories)
for category_name in category_progress:
    category_progress.desc = category_name
    #Walk through the dataframe filename values
    l_files = dff[dff['ebird_code'] == category_name]['filename'].values
    tqdm.write("Bird: "+category_name+"  files: "+str(len(l_files)))
    for file_name in tqdm(l_files, desc='Files'):
        try:
            sound_path = audio_path/category_name/file_name
            feature_extractor(sound_path, category_name, file_name)
        except Exception as e:
            print(e)
            pass