In this file we get the chromagrams :)

In [1]:
#!pip install librosa
from pathlib import Path
import os
import pandas as pd
import librosa
import re
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import butter, sosfiltfilt
import cv2

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, silhouette_samples

In [2]:
#constants
SR = 22050

In [3]:
DIR = './dataset/songs_extracted'

In [4]:
#function that loads the songs from the songs_extracted dataset
def load_songs_extracted(directory):
    data = []
    for bird_names in os.listdir(directory)[:10]:
        path_bird_names = os.path.join(directory, bird_names)
        if os.path.isdir(path_bird_names):
            for channel in os.listdir(path_bird_names):
                if channel == "CH2":
                    path_channel  = os.path.join(path_bird_names, channel)
                    if os.path.isdir(path_channel):
                        for song in  os.listdir(path_channel):
                            song_num = int(re.search('\d+', song).group())
                            path_song = os.path.join(path_channel, song)
                            signal, sr = librosa.load(path_song)
                            data.append([bird_names, song_num, sr, signal]) #channel is CH2 by default

    # return dataframe creation
    return pd.DataFrame(data, columns=['bird', 'song', 'sr', 'signal'])

In [5]:
df = load_songs_extracted(Path(DIR))
df.head()

Unnamed: 0,bird,song,sr,signal
0,reginald_con,6,22050,"[-0.0002275218, -9.055339e-05, 0.00016206059, ..."
1,reginald_con,73,22050,"[0.00046550576, 0.00059071305, 0.00023334507, ..."
2,reginald_con,20,22050,"[0.00013039293, -7.703958e-05, -0.00013563978,..."
3,reginald_con,50,22050,"[-2.1934313e-05, -5.5870634e-05, -5.6157674e-0..."
4,reginald_con,4,22050,"[0.00026778644, 0.00013458135, -0.00020446077,..."


In [6]:
df.shape

(1478, 4)

# Preprocessing

truncation: understand if having signals of different length is a problem

normalization: understand how and if to normalize (-1,1)

### Padding

In [7]:
df['signal_length'] = df['signal'].apply(lambda x: len(x))
mean = np.mean(df['signal_length'])
mean = int(mean)
print('mean:', mean)
print('standard deviation:', np.std(df['signal_length']))
print(df[df['signal_length'] < 100000]['signal_length'].count(), 'signals are not truncated out of', df['signal_length'].count())

policy = 'truncation' #'padding'

if policy == 'padding':
    maxim = np.max(df['signal_length'])
    print(maxim)
    df['padded_signal'] = df['signal'].apply(lambda x: np.pad(x, (0, maxim-len(x))) )
    
elif policy == 'truncation':
    #TODO truncation
    trunc_thresh = 11*22050  #maxim
    #padding
    df['padded_signal'] = df['signal'].apply(lambda x: np.pad(x, (0, trunc_thresh-len(x))) if len(x)<trunc_thresh else x) 
    #truncation
    df['padded_signal'] = df['padded_signal'].apply(lambda x: x[0:trunc_thresh] if len(x)>trunc_thresh else x) 
    
print(len(df['padded_signal'][1]))

mean: 73538
standard deviation: 28347.52760631865
1319 signals are not truncated out of 1478
242550


### Filtering
We read on nscnet that they removed the noise.
We followed their idea, but are not sure if ch1 (the one they used) is more noisy than ch2 and so we don't need it.

We need to find a way to properly asses the level of noise in the data.

In [8]:
sr = SR     
order = 5
lf = 1500
filtered = []
def high_pass(sig):
    nyq = sr/2
    sos = butter(order, [lf/nyq], analog=False, btype='highpass', output='sos')
    return sosfiltfilt(sos,sig)

df['filtered_signal'] = df['padded_signal'].apply(lambda x: high_pass(x))    

### Chromagrams

In [None]:
chromagrams = []
for signal in df['filtered_signal']:
    chrom = librosa.feature.chroma_stft(y=signal, sr=SR)
    chromagrams.append(chrom)
    
#chromagrams = df['normalized_signal'].apply(lambda x: librosa.feature.chroma_stft(y=x, sr=SR))

In [None]:
#chromagrams
print(len(chromagrams))

116


In [None]:
plt.figure(figsize=(30, 10))
for i in range(len(chromagrams)):
	librosa.display.specshow(chromagrams[i], x_axis='time', y_axis='chroma', hop_length=512, cmap='coolwarm')
	plt.savefig("chromagrams/" + str(i) + "out.png", bbox_inches='tight', facecolor='white')
	plt.clf()


<Figure size 2160x720 with 0 Axes>