# Exploring the use of Rapids and cuSignal for SSFT for training pipeline

[Forked originally from here](https://www.kaggle.com/code/gabrielvinicius/speedup-spectogram-with-rapids).  This seems like a promising idea, but I'm running out of CUDA memory after about 600 samples.  This issue would need addressing.

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import seaborn as sns
import re
import librosa
import librosa.display

import IPython.display as ipd
from urllib.request import urlopen
from datetime import datetime, timedelta

import plotly.graph_objects as go
from scipy.interpolate import interp1d 
from bs4 import BeautifulSoup as bs
import librosa
import librosa.display
import IPython.display as ipd
import time
import random
# import noisereduce as nr
import gc
import torch


from tqdm.notebook import tqdm

# Pytorch
import torch
import torchaudio
import requests
from PIL import Image

In [2]:
import sys
!cp /kaggle/input/rapids/rapids.21.06 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.7/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.7"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

import cusignal
import cupy as cp

In [3]:
data_folder = Path('/kaggle/input') 
train_23 = data_folder / 'birdclef-2023' / 'train_audio'
paths = {f:str(f.name) for f in Path(train_23).rglob('*.ogg')}

In [4]:
class CFG():
    '''Configuration File'''
    test_samples = 500
    n_fft = 2048
    frame_size = 5 # seg
    frame_step = 5  # seg

    hop_length  = 128
    frame_size_t  = 256
    n_mels     = 250
    win_length = 1024
    f_min      = 500
    f_max      = 9000


In [5]:
meta = pd.read_csv('../input/birdclef-2023/train_metadata.csv')
meta['secondary_labels'] = meta['secondary_labels'].apply(lambda x: re.findall(r"'(\w+)'", x))
meta['len_sec_labels'] = meta['secondary_labels'].map(len)
meta.head(3)

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename,len_sec_labels
0,abethr1,[],['song'],4.3906,38.2788,Turdus tephronotus,African Bare-eyed Thrush,Rolf A. de By,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/128013,abethr1/XC128013.ogg,0
1,abethr1,[],['call'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363501,abethr1/XC363501.ogg,0
2,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363502,abethr1/XC363502.ogg,0


In [6]:
def get_spectogram_librosa(path, plot=False, spend_time=False):
    
    '''
    - path: audio path
    - plot: if plot spectogram
    - spend_time: if return spend_time to calculate spectogram
    '''
    data, sample_rate = librosa.load(path)
    
    start_time_lib = time.time()
    stft_lib = librosa.stft(data, n_fft=CFG.n_fft, hop_length=CFG.hop_length)
    t_lib = np.abs(stft_lib)
    # t_lib = librosa.amplitude_to_db(spectrogram_lib)
    end_time_lib = time.time()

    total_time_lib = round(end_time_lib - start_time_lib, 5 )
    
    
    if plot:
        fig = plt.figure(figsize = (10,5))
        librosa.display.specshow(cp.asnumpy(t_lib), sr=sample_rate, hop_length=CFG.hop_length)
        spcie = path.split('/')[-2]
        title = f'Librosa Spectogram - {spcie} ' 
        title = title + f'- time process {total_time_lib}' if spend_time else title
        plt.title(title)
        plt.xlabel('Time')
        plt.ylabel('Frequency')
        plt.show()
        
    stft_lib = None, None
    del stft_lib
    gc.collect()
    
    if spend_time:    
        return t_lib, total_time_lib
    else:    
        return t_lib
        
    
def get_spectogram_cusignal(path, plot=False, spend_time=False):
    
    '''
    - path: audio path
    - plot: if plot spectogram
    - spend_time: if return spend_time to calculate spectogram
    '''
    data, sample_rate = librosa.load(path)
    
    start_time_cusig = time.time()
    f, t_cusig, Zxx = cusignal.stft(data, nfft=CFG.n_fft , noverlap=CFG.hop_length)
    #f, t_cusig, Zxx = cusignal.stft(data, nfft=CFG.n_fft , nperseg=CFG.hop_length)
    
    end_time_cusig = time.time()
    total_time_cusig = round(end_time_cusig - start_time_cusig, 5)
    
    
    if plot:
        fig = plt.figure(figsize = (10,5))
        plt.pcolormesh(cp.asnumpy(t_cusig), cp.asnumpy(f), cp.asnumpy(cp.abs(Zxx)))
        spcie = path.split('/')[-2]
        title = f'CuSignal Spectogram - {spcie} ' 
        title = title + f'- time process {total_time_cusig}' if spend_time else title
        plt.title(title)
        plt.xlabel('Time')
        plt.ylabel('Frequency')
        plt.axis('off')
        plt.show()
    
    f, Zxx = None, None
    
    del f
    del Zxx
    gc.collect()
        
    if spend_time:    
        return t_cusig, total_time_cusig
    else:    
        return t_lib    

 Calculate Spectogram for a random sample of birdcalls.

In [7]:
top = meta['primary_label'].value_counts().index
mapping = meta.groupby('primary_label').agg({'common_name':'unique'}).to_dict()['common_name']

times_librosa = []
times_cusignal = []

for idx, (path, name ) in enumerate(tqdm(random.sample(paths.items(), CFG.test_samples))):
    
    #Calculate Spectogram
    t_lib, total_time_cusig =  get_spectogram_cusignal(path = path, 
                   plot=False, 
                   spend_time=True)
    

    t_cusig, total_time_lib = get_spectogram_librosa(path = path, 
                           plot=False, 
                           spend_time=True)
    
    times_librosa.append(total_time_lib)
    times_cusignal.append(total_time_cusig)
    
print('#'*200,'\n')
print('Mean time for Librosa: ', round(np.mean(times_librosa), 5))
print('Mean time for cuSignal: ', round(np.mean(times_cusignal), 5))

  0%|          | 0/500 [00:00<?, ?it/s]

######################################################################################################################################################################################################## 

Mean time for Librosa:  0.17193
Mean time for cuSignal:  0.0209
