# Model Analysis

In [77]:
import pandas as pd
from numpy import loadtxt
from sklearn.metrics import confusion_matrix, classification_report, recall_score, accuracy_score
import matplotlib.pyplot as plt 

In [78]:
def read_preds(pred_file):
    preds = loadtxt(pred_file, dtype= str, delimiter='\n', unpack=False)
    return preds

In [79]:
def read_pred_probs(pred_file):
    pred_prob = pd.read_csv(pred_file)
    return pred_prob.apply(lambda x : x.max(), axis=1)

In [80]:
def read_y_test(y_test_file):
    y_test = pd.read_csv(y_test_file)
    return y_test

In [81]:
def get_confusion_matrix(y_test,preds):
    labels=['0_background','1_chimpanze']
    cm = confusion_matrix(y_test,preds)

    print(cm)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(cm)
    fig.colorbar(cax)
    ax.set_xticklabels([''] + labels)
    ax.set_yticklabels([''] + labels)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.xticks(rotation=90)
    plt.show()

In [82]:
def get_classification_report(y_test, preds):
    cr = classification_report(y_test, preds , output_dict=True)
    return pd.DataFrame(cr).transpose()

In [83]:
def get_selected_val(values):
    l = values.split(' ')
    lf = list(map(float, l))
    return lf.index(max(lf))

## Standard CNN-32

In [42]:
cnn_pred = read_preds('../output/models/cnn/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.978154,0.891301,0.93271,12760.0
1,0.674337,0.918746,0.777793,3126.0
accuracy,0.896701,0.896701,0.896701,0.896701
macro avg,0.826245,0.905023,0.855251,15886.0
weighted avg,0.91837,0.896701,0.902226,15886.0


In [41]:
#get_confusion_matrix(y,preds)

## Standard CNN-32-Sanaga

In [85]:
cnn_pred = read_preds('../output/models/cnn_64/test/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn_64/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.936034,0.395386,0.55594,35937.0
1,0.517905,0.960063,0.672845,24313.0
accuracy,0.623253,0.623253,0.623253,0.623253
macro avg,0.72697,0.677724,0.614393,60250.0
weighted avg,0.767305,0.623253,0.603115,60250.0


## Standard CNN-64

In [86]:
cnn_pred = read_preds('../output/models/cnn_64/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn_64/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.959905,0.996042,0.97764,20719.0
1,0.989644,0.900897,0.943187,8698.0
accuracy,0.96791,0.96791,0.96791,0.96791
macro avg,0.974774,0.94847,0.960414,29417.0
weighted avg,0.968698,0.96791,0.967453,29417.0


## Standard CNN-64-Sanaga

In [87]:
cnn_pred = read_preds('../output/models/cnn_64/test/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn_64/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.848114,0.874475,0.861092,35937.0
1,0.805527,0.768519,0.786588,24313.0
accuracy,0.831718,0.831718,0.831718,0.831718
macro avg,0.82682,0.821497,0.82384,60250.0
weighted avg,0.830928,0.831718,0.831027,60250.0


## CNN6-64-1

In [92]:
cnn_pred = read_preds('../output/models/cnn6_64_1/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn6_64_1/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.991939,0.968097,0.979873,20719.0
1,0.928121,0.98126,0.953951,8698.0
accuracy,0.971989,0.971989,0.971989,0.971989
macro avg,0.96003,0.974678,0.966912,29417.0
weighted avg,0.973069,0.971989,0.972208,29417.0


## CNN6-64-1-Sanaga

In [93]:
cnn_pred = read_preds('../output/models/cnn6_64_1/test/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn6_64_1/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.86896,0.884242,0.876534,35937.0
1,0.824332,0.802904,0.813477,24313.0
accuracy,0.851419,0.851419,0.851419,0.851419
macro avg,0.846646,0.843573,0.845006,60250.0
weighted avg,0.850951,0.851419,0.851088,60250.0


## CNN10-64-1

In [94]:
cnn_pred = read_preds('../output/models/cnn10_64_1/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn10_64_1/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.987776,0.990637,0.989204,20719.0
1,0.977541,0.970798,0.974158,8698.0
accuracy,0.984771,0.984771,0.984771,0.984771
macro avg,0.982659,0.980717,0.981681,29417.0
weighted avg,0.98475,0.984771,0.984755,29417.0


## CNN10-64-1-Sanaga

In [95]:
cnn_pred = read_preds('../output/models/cnn10_64_1/test/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn10_64_1/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.874894,0.891254,0.882998,35937.0
1,0.834694,0.811623,0.822997,24313.0
accuracy,0.85912,0.85912,0.85912,0.85912
macro avg,0.854794,0.851439,0.852998,60250.0
weighted avg,0.858672,0.85912,0.858786,60250.0


## CNN10-64-3

In [51]:
cnn_pred = read_preds('../output/models/cnn10_64_3/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn10_64_3/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.932888,0.940484,0.93667,20633.0
1,0.877102,0.862598,0.86979,10160.0
accuracy,0.914786,0.914786,0.914786,0.914786
macro avg,0.904995,0.901541,0.90323,30793.0
weighted avg,0.914481,0.914786,0.914603,30793.0


## CNN10-64-3-Sanaga

In [52]:
cnn_pred = read_preds('../output/models/cnn10_64_3/test/_predictions.txt')
cnn_y = read_y_test('../output/models/cnn10_64_3/test/_y_test.csv')

preds = [get_selected_val(row) for row in cnn_pred ]
ys = cnn_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.895351,0.631856,0.740872,35937.0
1,0.620797,0.89084,0.731698,24313.0
accuracy,0.736365,0.736365,0.736365,0.736365
macro avg,0.758074,0.761348,0.736285,60250.0
weighted avg,0.784559,0.736365,0.73717,60250.0


## Resnet50

In [73]:
res_pred = read_preds('../output/models/resnet/_predictions.txt')
res_y = read_y_test('../output/models/resnet/_y_test.csv')

preds = [get_selected_val(row) for row in res_pred ]
ys = res_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.665613,0.969369,0.789274,20633.0
1,0.150538,0.011024,0.020543,10160.0
accuracy,0.653168,0.653168,0.653168,0.653168
macro avg,0.408075,0.490197,0.404909,30793.0
weighted avg,0.495666,0.653168,0.535635,30793.0


## Resnet50-Sanaga

In [72]:
res_pred = read_preds('../output/models/resnet/test/_predictions.txt')
res_y = read_y_test('../output/models/resnet/test/_y_test.csv')

preds = [get_selected_val(row) for row in res_pred ]
ys = res_y.idxmax(axis=1)   
y = ys.apply(lambda x:int(x))

get_classification_report(y,preds)

Unnamed: 0,precision,recall,f1-score,support
0,0.666601,0.947213,0.782511,35937.0
1,0.793468,0.299757,0.43513,24313.0
accuracy,0.685942,0.685942,0.685942,0.685942
macro avg,0.730035,0.623485,0.608821,60250.0
weighted avg,0.717796,0.685942,0.642331,60250.0


In [64]:
import glob
import librosa
import pandas as pd
import numpy as np
import argparse
import os
import sys
from PIL import Image
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import librosa.display

def compute_melspectrogram_with_fixed_size(audio, sample_rate, window_length_set, hop_length_set, n_mel_set,
                                           new_img_size):
    """Create melspectrogram for a given audio

            Parameters
            ----------
            audio: np.ndarray
                audio time-series.
            sample_rate: int
                sampling rate of fp
            window_length_set: list
                length of the FFT windows, a list of three
            hop_length_set: list
                number of samples between successive frames, a list of three
            n_mel_set: int
                number of mel features, i.e. horizontal bars in spectrogram
            new_img_size: list
                the target size of the images

            Returns
            -------
            np.ndarray:
                Mel spectrogram.
    """
    try:
        specs = []
        num_channels = len(window_length_set)
        img_width = new_img_size[0]
        img_height = new_img_size[1]
        for i in range(num_channels):
            window_length = window_length_set[i]
            hop_length = hop_length_set[i]
            n_mel = n_mel_set[i]

            # compute a mel-scaled spectrogram
            # https://github.com/kamalesh0406/Audio-Classification/blob/master/preprocessing/preprocessingESC.py
            mel_spectrogram = librosa.feature.melspectrogram(y=audio,
                                                             sr=sample_rate,
                                                             hop_length=hop_length,
                                                             n_fft=window_length,
                                                             n_mels=n_mel,
                                                             window='hamming')

            eps = 1e-6
            spec = np.log(mel_spectrogram + eps)

            if spec.shape[1] != img_height:
                spec = np.array(Image.fromarray(spec).resize((img_width, img_height)))

            specs.append(spec)

        print(len(specs))
    except Exception as e:
        print("\nError encountered while parsing files\n>>", e)
        return None

    return specs


In [67]:
fp= '/Volumes/science.data.uu.nl/research-zwerts/data/version7_chunk5/chimpanze/396_2C_extracted_chimps_set_2_2682.5052_1_100_chunk3.wav'
sample_rate=48000
window_length= [750]
hop_length= [376 ]
n_mel=[64]
new_img_size= [64 ,64]
        
y, sr = librosa.load(fp, sr=sample_rate)
melspectrogram_db = compute_melspectrogram_with_fixed_size(y, sample_rate, window_length,
                                                               hop_length, n_mel, new_img_size)
    
print(melspectrogram_db)
print(type(melspectrogram_db))
fig = plt.Figure()
canvas = FigureCanvas(fig)
ax = fig.add_subplot(111)
#p = librosa.display.specshow(librosa.amplitude_to_db(out, ref=np.max), ax=ax, y_axis='log', x_axis='time')
p = librosa.display.specshow(melspectrogram_db, ax=ax)


fig.savefig('spec.png')


1
[array([[-4.3752036, -5.3064594, -4.839691 , ..., -5.294526 , -4.959032 ,
        -4.706888 ],
       [-4.4547515, -5.860826 , -5.8665266, ..., -6.717259 , -4.700924 ,
        -6.016617 ],
       [-3.5927286, -5.4724293, -5.5166473, ..., -4.219222 , -4.588617 ,
        -6.1992044],
       ...,
       [-4.6789565, -4.313019 , -4.3533564, ..., -4.464322 , -4.3810987,
        -3.997306 ],
       [-5.2849507, -4.863753 , -5.756742 , ..., -5.1384397, -5.298927 ,
        -4.682543 ],
       [-5.886543 , -6.011768 , -5.822068 , ..., -6.1994987, -6.0791597,
        -5.1511097]], dtype=float32)]
<class 'list'>


AttributeError: 'list' object has no attribute 'dtype'

In [88]:
import os
import matplotlib
matplotlib.use('Agg') # No pictures displayed 
import pylab
import librosa
import librosa.display
import numpy as np

fp= '/Volumes/science.data.uu.nl/research-zwerts/data/version7_chunk5/chimpanze/396_2C_extracted_chimps_set_2_2682.5052_1_100_chunk3.wav'


sig, fs = librosa.load(fp)   
# make pictures name 
save_path = 'test.jpg'

pylab.axis('off') # no axis
pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[]) # Remove the white edge
S = librosa.feature.melspectrogram(y=sig, sr=fs)
librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
pylab.savefig(save_path, bbox_inches=None, pad_inches=0)
pylab.close()

In [3]:
import librosa
import numpy
import skimage.io

def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled

def spectrogram_image(y, sr, out, hop_length, n_mels):
    # use log-melspectrogram
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels,
                                            n_fft=hop_length*2, hop_length=hop_length)
    mels = numpy.log(mels + 1e-9) # add small number to avoid log(0)

    # min-max scale to fit inside 8-bit range
#     img = scale_minmax(mels, 0, 255).astype(numpy.uint8)
#     img = numpy.flip(img, axis=0) # put low frequencies at the bottom in image
#     img = 255-img # invert. make black==more energy

    #img = 255-mels # invert. make black==more energy


    # save as PNG
    skimage.io.imsave(out, mels)

In [7]:

# settings
hop_length = 512 # number of samples per time-step in spectrogram
n_mels = 128 # number of bins in spectrogram. Height of image
time_steps = 384 # number of time-steps. Width of image

# load audio. Using example from librosa
#path = librosa.util.example_audio_file()
path= '/Volumes/science.data.uu.nl/research-zwerts/data/version7_chunk5/chimpanze/396_2C_extracted_chimps_set_2_2682.5052_1_100_chunk3.wav'


y, sr = librosa.load(path)
out = 'out.png'

# extract a fixed length window
start_sample = 0 # starting at beginning
length_samples = time_steps*hop_length
window = y[start_sample:start_sample+length_samples]

# convert to PNG
spectrogram_image(window, sr=sr, out=out, hop_length=hop_length, n_mels=n_mels)
print('wrote file', out)



wrote file out.png


In [8]:
import matplotlib.pyplot as plt
import librosa.display

import numpy as np
import pandas as pd
import librosa


#filename = librosa.util.example_audio_file()
filename= '/Volumes/science.data.uu.nl/research-zwerts/data/version7_chunk5/chimpanze/396_2C_extracted_chimps_set_2_2682.5052_1_100_chunk3.wav'
#filename= '/Volumes/science.data.uu.nl/research-zwerts/data/version7_chunk5/chimpanze/sanctuary/1/10_2C_20191220_185344_5.9516_chunk0.wav'

 

y, sr = librosa.load(filename)
#y = y[:100000] # shorten audio a bit for speed

window_size = 750
window = np.hanning(window_size)
stft  = librosa.core.spectrum.stft(y, n_fft=window_size, hop_length=376, window=window)
out = 2 * np.abs(stft) / np.sum(window)

# For plotting headlessly
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

fig = plt.Figure()
canvas = FigureCanvas(fig)
ax = fig.add_subplot(111)
p = librosa.display.specshow(librosa.amplitude_to_db(out, ref=np.max), ax=ax, y_axis='log', x_axis='time')
fig.savefig('spec1.png')