In [None]:
import glob
import os
import numpy as np
import pandas as pd
import sounddevice as sd
import pylab as pl
import random

from scipy.io import wavfile
import matplotlib.pyplot as plt
from python_speech_features import mfcc
from sklearn.cluster import MiniBatchKMeans

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
from adjustText import adjust_text
from collections import Counter
from scipy.io.wavfile import write
from sklearn.cluster import AgglomerativeClustering

from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures

import librosa
import pydub
from pydub import AudioSegment
import soundfile as sf
import tempfile
from pydub.utils import which
from scipy.io.wavfile import write
import scipy
import warnings 

AudioSegment.converter = which("ffmpeg")
%matplotlib inline

In [None]:
# Returns all clip indexes within a cluster
def get_clip_indexes(cluster_label, clusters):
    clips_i = []
    for i in range(len(clusters)):
        if clusters[i] == cluster_label:
            clips_i.append(i)
    return clips_i

# Plays a random clip in a given cluster
def play_random_clip_in_cluster(index, clusters, option='cluster'):
    assert(option == 'cluster' or option == 'clip')
    if option == 'clip':
        index = find_cluster(index, clusters) # Get cluster index
    
    clips = get_clip_indexes(index, clusters)
    clip_i = clips[random.randint(0, len(clips) - 1)]
    print('Playing clip index %d ' % clip_i)
    play_clip(clip_i)
        
# Returns the cluster label that a clip belongs to
def find_cluster(clip_index, clusters):
    return clusters[clip_index]

# Plays an audio clip given the clip index
def play_clip(clip_index):
    samples = shorter_clips[clip_index]
    sd.play(samples, samplerate)

# Flattens a given list
def list_flatten(l):
    flat_list = []
    for sublist in l:
        for item in sublist:
            flat_list.append(item)
    return flat_list

# Saves clip as a wav file
def save_clip(filename, wav_array):
    assert('.' not in filename)
    write(filename + '.wav', samplerate, wav_array)
    
# Saves all clips to a directory
def save_clips_to_dir(shorter_clips, dirname):
    for i, clip in enumerate(shorter_clips):
        save_clip(dirname + '/%d' % i, clip) 
            
def convert_mp3_to_wav(mp3_path, sr=44100, mono=True, overwrite=False, dtype='float32'): 
    # Need to change sample rate to 44.1kHz if using audiomoths
    # since mono=True by default, unless you pass mono=False, 
    # this function will save the wav as mono
#     print("entering convert_mp3_to_wav")
    """
    Parts of code from 
    https://github.com/bill317996/Audio-to-midi/blob/master/cfp.py
    """
    warnings.warn("deprecated", DeprecationWarning)
    
    # in case there is an .MP3
    assert mp3_path.lower().endswith('.mp3'), 'filename indicates not mp3'
    wav_path_to_write = os.path.splitext(mp3_path)[0] + '.wav'
#     print(wav_path_to_write)
    
    if not overwrite and os.path.exists(wav_path_to_write):
        print("ah")
        return
    
    mp3 = AudioSegment.from_file(mp3_path)
    
    _, temp_path = tempfile.mkstemp() 
    mp3.export(temp_path, format='wav')
    print(temp_path)
    del mp3
    x, fs = sf.read(temp_path)
    os.remove(temp_path)
    
    if mono and len(x.shape)>1: 
        x = np.mean(x, axis = 1) 
    if sr:
        x = scipy.signal.resample_poly(x, sr, fs)
        fs = sr 
    x = x.astype(dtype)
#     print(fs)
    write(wav_path_to_write, fs, x)
    return x, fs

def read(f, normalized=False):
    """MP3 to numpy array"""
    a = pydub.AudioSegment.from_file(f)
    y = np.array(a.get_array_of_samples())
    if a.channels == 2:
        y = y.reshape((-1, 2))
    if normalized:
        return a.frame_rate, np.float32(y) / 2**15
    else:
        return a.frame_rate, y

In [None]:
# Testing load_audio function

dir_path = '/Volumes/Elements/Madre_de_Dios_Xeno_Canto_Birdcalls/'
file_path = dir_path + 'XC431125 - Rufous Twistwing - Cnipodectes superrufus.mp3'
# file_path = dir_path + 'XC91323 - White-eyed Parakeet - Psittacara leucophthalmus.mp3'

x, fs = convert_mp3_to_wav(file_path, overwrite=True)

In [None]:
# Graphs to sanity check output of load_audio with online mp3 to wav converter 

fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.plot(x)

print(x.shape)

# This file is from a random mp3 to wav convert I found here
# https://online-audio-converter.com/ and then downloaded on Desktop
check_dir_path = '/Users/yoo-jin/Desktop/XC431125 - Rufous Twistwing - Cnipodectes superrufus.wav'
x_test, fs_test = sf.read(check_dir_path)

x_test = np.mean(x_test, axis = 1) 
print(x_test.shape)

ax2.plot(x_test)


In [None]:
# Figure out how many files have 44.1kHz vs 48kHz sampling rates in Xeno Canto
num_441k = 0 
num_48k = 0
for f in glob.glob(os.path.join(dir_path, '*.mp3')):
#     print(f)
#     print(num_441k)
#     print(num_48k)
    rate, sound = read(f)
    if rate == 44100: 
        num_441k = num_441k + 1
    elif rate == 48000:
        num_48k = num_48k + 1
        
print(num_441k)
print(num_48k) 

## Load Audio Data

In [None]:
dir_path = '/Volumes/Elements/Madre_de_Dios_Xeno_Canto_Birdcalls/'
samplerate = None
wav_data = []
wav_names = []

for file in glob.glob(os.path.join(dir_path, '*.mp3')):
#     print(file)
#     convert_mp3_to_wav(file, overwrite=True)
    
    try: 
        data, rate = convert_mp3_to_wav(file, overwrite=True)
#         data = data.astype(int)
        samplerate = rate
        if wav_data == []:
            wav_data = data
        else:
            wav_data = np.concatenate((wav_data, data))
        clip_len = len(data) / samplerate
        
        # Input only bird species
        bird_species_name = file.split(' - ')[2][:-4].replace(' ', '_')
        wav_names.append((bird_species_name, clip_len))
        print(file)
        # TO DO: deal with deprecation warning, don't suppress
        warnings.filterwarnings("ignore", category=DeprecationWarning) 
    except Exception as e:
        print('(failed) ' + file)
        print('\t' + str(e))
        pass

In [None]:
print(wav_data.shape)
# print(wav_data)
print(samplerate)
print(type(wav_data))

In [None]:
# print('number of channels = %d' % wav_data.shape[1]) 
# print(np.shape(wav_data))
# print('number of channels = %d' % np.shape(wav_data)) 

print('sample rate = %d' % samplerate)
length = wav_data.shape[0] / samplerate
print('length = %.1fs' % length)

## Split Audio Data

In [None]:
# Get n-minute clips
def split_into_minutes(wav_data, samplerate, n=1):
    length_in_seconds = len(wav_data) / samplerate
    length_in_minutes = length_in_seconds / 60
    length_in_minutes = int(length_in_minutes / n)

    cut_wav_data = wav_data[:-((len(wav_data)) % length_in_minutes)]
    shorter_clips = np.split(cut_wav_data, length_in_minutes)
    print('%d %d-minute clips' % (len(shorter_clips), n))
    return shorter_clips

# Get n-second clips
def split_into_n_seconds(wav_data, wav_names, samplerate, n=10):
    """
    wav_names =
    [
       ('Olive-faced_Flatbill':10), ('Olive-faced_Flatbill':2), ('Channel-billed_Toucan':8)
    ]
    file_names =
    [
        Olive-faced_Flatbill_Split.wav,
        Olive-faced_Flatbill2_Channel-billed_Toucan8.wav
    ]
    """
    file_names = []
    
    length_in_seconds = len(wav_data) / samplerate
    length_in_minutes = length_in_seconds / 60
    length_in_minutes = int(length_in_minutes)
    shorter_len = int(length_in_minutes / (1/(60/n)))
    
    second_clips = None
    
    try:
        second_clips = np.split(wav_data, shorter_len)
    except:
        cut_wav_data = wav_data[:-((len(wav_data)) % shorter_len)]
        second_clips = np.split(cut_wav_data, shorter_len)

    # wav_name[0][0]="Versasilles birdopolus", wav_name[0][1]=12
    # wav_name[1][0]="Amazilia lactea",        wav_name[1][1]=9
    # wav_name[2][0]="Ramphastos vitellinus",  wav_name[2][1]=7
    # 'Versasilles_birdopolus_Split.wav', 'Versasilles_birdopolus2_Amazilia_lactea_Split.wav', 'Ramphastos_vitellinus7_Split.wav'

    i = 0
    amount_left = 0
    while i < len(wav_names):
        # create new clip from just one bird
        if wav_names[i][1] >= n and amount_left == 0:
            file_names.append(wav_names[i][0] + "_Split.wav")
            wav_names[i][1] -= n
        
        # create new clip and leave it unfinished
        elif 0 < wav_names[i][1] < n and amount_left == 0:
            file_names.append(wav_names[i][0] + str(wav_names[i][1]) + "_")
            amount_left = n - wav_names[i][1]
            wav_names[i][1] = 0
            
        # finish prev clip
        elif wav_names[i][1] >= amount_left and amount_left > 0:
            file_names[-1] += wav_names[i][0] + "_Split.wav"
            wav_names[i][1] -= amount_left
            amount_left = 0
        
        # if length is 0
        elif wav_names[i][1] == 0:
            i+=1
    
    file_names[-1] += "_Split.wav"
    
    print('%d %d-second clips' % (len(second_clips), n))
    return second_clips

#             # if not last clip
#             if i < len(wav_names)-1:
#                 name = wav_names[i][0] + (wav_names[i][1] % n) + "_" + \
#                         wav_names[i+1][0] + (wav_names[i+1][1] % n) + ".wav"
#                 file_names.append(name)
#             else: 
#                 name = wav_names[i][0] + (wav_names[i][1] % n)

In [None]:
#shorter_clips = split_into_minutes(wav_data, samplerate, 1)

shorter_clips, file_names = split_into_n_seconds(wav_data, wav_names, samplerate, 10)


# Spectrograms

In [None]:
def plot_spectrogram(samplerate, shorter_clips, clip_index, separate_channels=False):
    left_channel = shorter_clips[clip_index][:, 0]
    right_channel = shorter_clips[clip_index][:, 1]
    
    plt.rcParams["figure.figsize"] = (100, 100)
    plt.rcParams.update({'font.size': 50})
    plt.xlabel('Time')
    plt.ylabel('Frequency')
    
    plt.subplot(211)
    if separate_channels:
        plt.title('Clip #%d (Left Channel)' % clip_index)
        plt.specgram(left_channel, Fs=samplerate)
        plt.subplot(212)
        plt.title('Clip #%d (Right Channel)' % clip_index)
        plt.specgram(right_channel, Fs=samplerate)
    else:
        plt.title('Clip #%d' % clip_index)
        both_channels = left_channel + right_channel
        plt.specgram(both_channels, Fs=samplerate)

    plt.show()
    
def plot_spectrogram_of_cluster(samplerate, shorter_clips, cluster_i, clusters, separate_channels=False):
    clip_indexes = get_clip_indexes(cluster_i, clusters)
    
    left_channel = shorter_clips[clip_indexes[0]][:, 0]
    right_channel = shorter_clips[clip_indexes[0]][:, 1]
    
    for i in range(1, len(clip_indexes)):
        left_channel = np.append(left_channel, shorter_clips[clip_indexes[i]][:, 0])
        right_channel = np.append(right_channel, shorter_clips[clip_indexes[i]][:, 1])
    
    plt.rcParams["figure.figsize"] = (100, 100)
    plt.rcParams.update({'font.size': 50})
    plt.xlabel('Time')
    plt.ylabel('Frequency')
    
    plt.subplot(211)
    if separate_channels:
        plt.title('Cluster #%d (Left Channel)' % cluster_i)
        plt.specgram(left_channel, Fs=samplerate)
        plt.subplot(212)
        plt.title('Cluster #%d (Right Channel)' % cluster_i)
        plt.specgram(right_channel, Fs=samplerate)
    else:
        plt.title('Cluster #%d' % cluster_i)
        both_channels = left_channel + right_channel
        plt.specgram(both_channels, Fs=samplerate)

    plt.show()

In [None]:
# TO DO: isn't working with mp3 
# plot_spectrogram(samplerate, shorter_clips)

# This is working with xeno canto data

plt.specgram(x, Fs=samplerate)
# plt.specgram(wav_data, Fs=samplerate)

# Feature Extraction

In [None]:
# TODO: Add a way to select and evaluate features

def extract_features(shorter_clips, mfcc_only=False, mono=False):
    feat_clips = []

    print('Extracting features for each of the %d audio clips' % len(shorter_clips))
    for clip in shorter_clips:
        if mono == False:
            clip = clip[:, 0] + clip[:, 1] # Merging left and right channels

        mfcc_feat=None
        if mfcc_only:
            mfcc_feat = mfcc(clip, samplerate, winlen=0.023, nfft = 1024).flatten()

        else:
            mfcc_feat = ShortTermFeatures.feature_extraction(clip, samplerate, 0.050*samplerate, 0.025*samplerate)
            print('')
            print(mfcc_feat[1])

        # Append the features
        if mfcc_only:
            feat_clips.append(mfcc_feat)
        else:
            feat_clips.append(list_flatten(mfcc_feat[0]))

    print('done')
    return feat_clips
    
# Set mfcc_only to True to shorten runtime
mfcc_clips = extract_features(shorter_clips, mfcc_only=True, mono=True)
#all_feat_clips = extract_features(shorter_clips)


# Clustering

In [None]:
dist = 1 - cosine_similarity(mfcc_clips)
mds = PCA(n_components=2, random_state=1)
pos = mds.fit_transform(dist)  # shape (n_components, n_samples)

X = pos

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
from matplotlib import pyplot as plt

linked = linkage(X, 'single')

labelList = range(len(X))

plt.figure(figsize=(16, 16))
dendro = dendrogram(linked,
#             p=7,
            orientation='top',
            #labels=labelList,
            truncate_mode='level',
            distance_sort='descending',
            show_leaf_counts=True)
plt.show()

In [None]:
def cluster_plot(pos, title='', without_labels=False, width=20, height=20, savepath=''):
    plt.rc('font', size=15)
    
    xs, ys = pos[:, 0], pos[:, 1]
    
    labels = range(len(shorter_clips))

    # Data frame with TSNE data, the cluster numbers and titles
    cluster_df = pd.DataFrame(dict(x=xs, y=ys, clusters=clusters, labels=labels)) 

    fig, ax = plt.subplots(figsize=(width, height))
    ax.margins(0.05)
    ax.set_title(title)

    groups = cluster_df.groupby('clusters')
    for name, group in groups:
        ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, mec='none', label=name)
        ax.set_aspect('auto')
        ax.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off')
        ax.tick_params(axis= 'y', which='both', left='off', top='off', labelleft='off')

    ax.legend(numpoints=1)
    ax.legend(bbox_to_anchor=(1.1, 1.05))

    #Add labels
    if not without_labels:
        texts = []
        for i in range(len(cluster_df)):
            texts.append(ax.text(cluster_df.loc[i, 'x'], cluster_df.loc[i, 'y'], cluster_df.loc[i,'labels'], size=10)) 

        #adjust_text(texts, arrowprops=dict(arrowstyle='->', color='red'))
    
    if savepath != '':
        plt.savefig(savepath, dpi=200, bbox_inches="tight")

    plt.show()

In [None]:
# This is for K-means clustering only, the elbow would be the ideal number of clusters to use

# TODO: plot.ly

def find_optimal_clusters(data, min_k, max_k): 
    iters= range(min_k, max_k+1, 2)
    
    sse = [] 
    for k in iters: 
        sse.append(MiniBatchKMeans(n_clusters=k, init_size=1024, batch_size=2048, random_state=20).fit(data).inertia_)
        
    f, ax = plt.subplots(1,1)
    ax.plot(iters, sse, marker = 'o')
    ax.set_xlabel('Cluster Centers')
#     ax.set_xticks(iters)
#     f.autofmt_xdate()
#     ax.set_xtickslabels(iters)
    ax.set_ylabel('SSE')
    ax.set_title('SSE by Number of Clusters')
    plt.tight_layout()
    plt.show()
        
find_optimal_clusters(X, 1, 100)

In [None]:
# Choose number of clusters
# TO DO: need to choose the correct number of clusters
n_clusters = 16

# Fit clustering model
clusters = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward').fit_predict(mfcc_clips)




In [None]:
plt.savefig(savepath, dpi=200, bbox_inches="tight")

In [None]:
# List all the bird species 

dir_path = '/Volumes/Elements/Madre_de_Dios_Xeno_Canto_Birdcalls/'
list_of_species = []

# for f in glob.glob(os.path.join(dir_path, '*.mp3')):

for f in glob.glob(os.path.join(dir_path, '*.mp3')): 
    bird_name = os.path.splitext(f)[0].split(" - ", 1)[1].strip()
    if list_of_species == []: 
        list_of_species = [bird_name]
    elif bird_name in list_of_species: 
        pass 
    else: 
        list_of_species.append(bird_name)

# list_of_species = np.array(list_of_species)

print(list_of_species)
print(len(list_of_species))
# print(list_of_species.shape)

In [None]:
# Project the data onto 2D graph - 10 clusters
dist = 1 - cosine_similarity(mfcc_clips)
mds = PCA(n_components=2, random_state=1)
pos = mds.fit_transform(dist)  # shape (n_components, n_samples)

# /Volumes/Elements/Madre_de_Dios_Xeno_Canto_Birdcalls/

cluster_plot(pos, 'PCA Cluster Plot', without_labels=True, savepath='/Volumes/Elements/Madre_de_Dios_Xeno_Canto_Birdcalls/Clusters_XenoCanto/pca.png')
spe




In [None]:
# Project the data onto 2D graph - 10 clusters
dist = 1 - cosine_similarity(mfcc_clips)
mds = TSNE(n_components=2, random_state=1)
pos = mds.fit_transform(dist)  # shape (n_components, n_samples)

cluster_plot(pos, 'TSNE Cluster Plot', without_labels=True, savepath='/Volumes/Elements/Madre_de_Dios_Xeno_Canto_Birdcalls/Clusters_XenoCanto/tsne.png')

# Cluster Interpretation

In [None]:
# Prints: (Cluster number, count of clips in cluster)
Counter(clusters).most_common()

In [None]:
play_random_clip_in_cluster(6, clusters) # Play random clip in a cluster

In [None]:
sd.stop() # Stop playing

In [None]:
# Can assign a label to each cluster here
#cluster_names = ['quiet', 'loud', 'quiet', 'rain', 'birds', 'crickets', 'birds', 'quiet', 'quiet', 'loud']

In [None]:
# See clip indexes of all clips inside cluster
clip_indexes = get_clip_indexes(0, clusters)
print(clip_indexes)

In [None]:
play_clip(36) 

In [None]:
# Save all clusters
dirname = '/Volumes/Elements/Madre_de_Dios_Xeno_Canto_Birdcalls/Clusters_XenoCanto/'

# TODO: play around with number of clusters
n_clusters = 134

# Fit clustering model
clusters = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward').fit_predict(mfcc_clips)
print(type(clusters))
print(clusters.shape)
print(clusters)

In [None]:
"""
# Saves clip as a wav file
def save_clip(filename, wav_array):
    assert('.' not in filename)
    write(filename + '.wav', samplerate, wav_array)
"""

# TODO; figure out a way to preserve the original file names
"""
wav_names =
[
   {'Olive-faced_Flatbill':10}, {'Olive-faced_Flatbill':2, 'Channel-billed_Toucan':8}
]
file_names =
[
    Olive-faced_Flatbill_Split.wav,
    Olive-faced_Flatbill2_Channel-billed_Toucan8.wav
]
"""

# Edited code 
for i in range(len(np.unique(clusters))):
    # if file already exists, then delete it and overwrite it
    os.mkdir(dirname + 'cluster_%d' % i)
    
    
    for j in get_clip_indexes(i, clusters):
        save_clip(dirname + 'cluster_%d/%d' % (i, j), shorter_clips[j]) 

# # Erika's original code 
# for i in range(len(np.unique(clusters))):
#     # if file already exists, then delete it and overwrite it
    
#     os.mkdir(dirname + 'cluster_%d' % i)
#     for j in get_clip_indexes(i, clusters):
#         save_clip(dirname + 'cluster_%d/%d' % (i, j), shorter_clips[j]) 
    

In [None]:
# Plot spectrogram of all clips in a cluster appended together
plot_spectrogram_of_cluster(samplerate, shorter_clips, 7, clusters, separate_channels=False)

# Visualization

In [None]:
num_clips = len(shorter_clips)
num_minutes = num_clips / 6
num_hours = num_minutes / 60

def clip_i_to_bin_i(clip_i):
    return int(clip_i / (num_clips/num_minutes))

def time_hist_for_cluster(cluster_i, clusters):
    hist_data = []
    for index in get_clip_indexes(cluster_i, clusters):
        index = clip_i_to_bin_i(index)
        hist_data.append(index)

    plt.ylim(0, (num_clips/num_minutes))
    plt.title('Cluster: %d (%s)' % (cluster_i, cluster_names[cluster_i]))
    plt.hist(hist_data, bins=int(num_minutes), range=[0, num_minutes])
    plt.show()
    
def colors(n):
    ret = []
    r = int(random.random() * 256)
    g = int(random.random() * 256)
    b = int(random.random() * 256)
    step = 256 / n
    for i in range(n):
        r += step
        g += step
        b += step
        r = int(r) % 256
        g = int(g) % 256
        b = int(b) % 256
        ret.append((r,g,b)) 
    return ret

def get_clusters_by_cluster_names(clusters, cluster_names):
    uniq_names = np.unique(cluster_names)
    print(uniq_names)
    new_clusters = [-1] * len(clusters)
    for uniq_name_i, uniq_name in enumerate(uniq_names):
        for name_i, name in enumerate(cluster_names):
            if name == uniq_name:
                # Assign name_i to each index in the cluster that has the same index
                for index in range(len(clusters)):
                    if clusters[index] == name_i:
                        new_clusters[index] = uniq_name_i
    return new_clusters

In [None]:
named_clusters = get_clusters_by_cluster_names(clusters, cluster_names)
Counter(named_clusters).most_common() 
cluster_names_to_plot = np.unique(cluster_names)

# Colors for visualization
jet = pl.get_cmap('jet', len(cluster_names_to_plot))
cluster_colors = {}
for i, name in enumerate(cluster_names_to_plot):
    cluster_colors[name] = jet(i)

Note: These are not real labels for the clusters, this is just a demonstration of the code. The histogram visualization will be replaced with better visualizations for cluster/time data such as Rose Plots.

In [None]:
def plot_all_histograms(clusters, cluster_names):
    plt.figure(figsize=(15,8))

    for cluster_i in range(len(cluster_names)):
        hist_data = []
        for index in get_clip_indexes(cluster_i, clusters):
            index = clip_i_to_bin_i(index)
            hist_data.append(index)

        cluster_name = cluster_names[cluster_i]
        c = cluster_colors[cluster_name]
        plt.hist(hist_data, bins=int(num_minutes), range=[0, num_minutes], alpha=0.5, color=c, label=cluster_name)

        plt.ylabel('Number of 10-second audio clips')
        plt.xlabel('Time in Minutes')
        plt.legend(bbox_to_anchor=(1.2, 1.05))
        plt.show()
    
clusters_to_plot = named_clusters
cluster_names_to_plot = np.unique(cluster_names)
plot_all_histograms(named_clusters, np.unique(cluster_names))