In [None]:
import librosa as librosa
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import scipy
%matplotlib inline
import librosa.display
from tqdm.notebook import tqdm
import warnings  
warnings.filterwarnings('ignore') # Librosa's specshow gives warnings about a matplotlib deprecation thing

In [None]:
 !pip install librosa

In [None]:
train_files = glob.glob('train_audio/*/*.ogg')
train_files[0]

In [None]:
# chunk, rate = librosa.load(train_files[200], sr=32000, offset=3, duration=5)


In [None]:
# def chunk_to_spec(chunk, SPEC_HEIGHT=64,SPEC_WIDTH=256, rate=32000, FMIN=500, FMAX=12500):
#     mel_spec = librosa.feature.melspectrogram(y=chunk, 
#                                               sr=32000, 
#                                               n_fft=1024, 
#                                               hop_length=int(32000 * 5 / (SPEC_WIDTH - 1)), 
#                                               n_mels=SPEC_HEIGHT, 
#                                               fmin=FMIN, 
#                                               fmax=FMAX)
#     mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
#     return mel_spec_db

# def display_spec(spec, SPEC_HEIGHT=64,SPEC_WIDTH=256, rate=32000, FMIN=500, FMAX=12500):
#     librosa.display.specshow(spec, 
#                              sr=32000, 
#                              hop_length=int(32000 * 5 / (SPEC_WIDTH - 1)), 
#                              x_axis='time', 
#                              y_axis='mel',
#                              fmin=FMIN, 
#                              fmax=FMAX, 
#                              cmap=plt.get_cmap('viridis'))
    
# spec = chunk_to_spec(chunk)
# # display_spec(spec) # Will give axis labels in Hs and time
# plt.imshow(spec, cmap='inferno') # Simple and compact

In [None]:

# Save spectrogram as png:
def saveSpectogram(specgram: np.ndarray, filename: str, label: np.uint8, ind: int) -> None:
    bird_name = filename.rsplit('/', 2)[0]
    file_id = filename.rsplit('/', 2)[1].rsplit('.', 2)[0]
    file_name = IMAGES_DIR + 'class_' + str(labels[i]) + '/' + bird_name + '_' + file_id + '_' + str((ind + 1) * 5) + '.png'
    specgram = specgram + 80 # -80 dB -> Min
    specgram = specgram.astype(np.uint8) # 0 - 255 the pixel value
    plt.axis('off')
    plt.imsave(file_name, specgram)
    

# Common function:
def convertAudio(filename: str, label: np.uint8) -> None:
    signal = loadAudio(filename=TRAIN_DIR + filename)
    frames = framing(
        sig=signal,
        sample_rate=SAMPLE_RATE,
        frame_len=5,
        duration_time=librosa.get_duration(
            y=signal,
            sr=SAMPLE_RATE
        )
    )
    for i in range(frames.shape[0]):
        specgram = createSpectrogram(frame=frames[i])
        saveSpectogram(
            specgram=specgram,
            filename=filename,
            label=label,
            ind=i
        )

In [None]:
# def peak_plot(y, sr, FMIN=500, FMAX=12500):
#     # PCEN spec
#     plt.figure(figsize=(10, 6))
#     plt.subplot(3, 1, 1)
#     melspec = librosa.feature.melspectrogram(y, sr=sr,
#         fmin=FMIN, fmax=FMAX, n_mels=64)
#     pcen = librosa.core.pcen(melspec, sr=sr,
#         gain=0.8, bias=10, power=0.25, time_constant=0.06, eps=1e-06)
#     librosa.display.specshow(pcen, sr=sr,
#         fmin=FMIN, fmax=FMAX,
#         x_axis='time', y_axis='mel', cmap='magma_r')
#     # plt.title('PCEN-based SNR')
#     plt.tight_layout()

#     # SNR and a smoothed SNR with kernel 15
#     plt.subplot(3, 1, 2)
#     pcen_snr = np.max(pcen,axis=0) - np.min(pcen,axis=0)
#     pcen_snr = librosa.power_to_db(pcen_snr / np.median(pcen_snr))
#     median_pcen_snr = scipy.signal.medfilt(pcen_snr, kernel_size=15)
#     times = np.linspace(0, len(y)/sr, num=melspec.shape[1])
#     plt.plot(times, pcen_snr, color="orange")
#     plt.plot(times, median_pcen_snr, color="blue")
#     plt.xlim(times[0], times[-1])
#     plt.ylim(0, 10)
#     # And go through, picking some peaks
#     for i in range(12):
#         t_peak = np.argmax(median_pcen_snr)
#         plt.scatter(times[t_peak], median_pcen_snr[t_peak], c='red', zorder=100)
#         median_pcen_snr[t_peak-50:t_peak+50] = 0 # zero out around the peak to find the next one

#     # Kernel 55 for even smoother (bad idea?)
#     plt.subplot(3, 1, 3)
#     median_pcen_snr = scipy.signal.medfilt(pcen_snr, kernel_size=55)
#     times = np.linspace(0, len(y)/sr, num=melspec.shape[1])
#     plt.plot(times, median_pcen_snr, color="blue")
#     plt.xlim(times[0], times[-1])
#     plt.ylim(0, 10)
#     # And go through, picking some peaks
#     for i in range(12):
#         t_peak = np.argmax(median_pcen_snr)
#         plt.scatter(times[t_peak], median_pcen_snr[t_peak], c='red', zorder=100)
#         median_pcen_snr[t_peak-50:t_peak+50] = 0 # zero out around the peak to find the next one

In [None]:
# y, sr = librosa.load(train_files[0], sr=32000, duration=30) # 30 seconds to play with
# peak_plot(y, sr)

In [None]:
# y, sr = librosa.load(train_files[100], sr=32000) # A whole recording
# peak_plot(y, sr)

In [None]:
def get_peaks(y, sr, n_peaks=12, kernel_size=15, zero_dist=50, FMIN=500, FMAX=12500):
    # Spec
    melspec = librosa.feature.melspectrogram(y, sr=sr,
        fmin=FMIN, fmax=FMAX, n_mels=64)
    pcen = librosa.core.pcen(melspec, sr=sr,
        gain=0.8, bias=10, power=0.25, time_constant=0.06, eps=1e-06)
    # SNR
    pcen_snr = np.max(pcen,axis=0) - np.min(pcen,axis=0)
    pcen_snr = librosa.power_to_db(pcen_snr / np.median(pcen_snr))
    # SMoothed SNR
    median_pcen_snr = scipy.signal.medfilt(pcen_snr, kernel_size=kernel_size)
    # And go through, picking some peaks
    times = np.linspace(0, len(y)/sr, num=melspec.shape[1])
    peak_locs = []
    for i in range(n_peaks):
        t_peak = np.argmax(median_pcen_snr)
        peak_locs.append(times[t_peak])
        median_pcen_snr[t_peak-50:t_peak+50] = 0 # zero out around the peak to find the next one

    return peak_locs

print(get_peaks(y, sr, n_peaks=5))

In [None]:
# info = []
# for f in tqdm(train_files):
#     y, sr = librosa.load(f, sr=32000)
#     peaks =  get_peaks(y, sr, n_peaks=20)
#     info.append({
#         'fn':f,
#         'len':len(y), 
#         'label':f.split('train_audio/')[1].split('/')[0],
#         'peaks':'#'.join([str(p)[:5] for p in peaks]),  
#     })

# # Save to file        
# info_df = pd.DataFrame(info)
# info_df.to_csv('info_df.csv', index=False)
# info_df.head()