## Imports

In [1]:
# Imports

#General
import numpy as np
import itertools

# System
import os, fnmatch

# Visualization
import seaborn #visualization library, must be imported before all other plotting libraries
import matplotlib.pyplot as plt
from IPython.core.display import HTML, display

# Machine Learning
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import recall_score, precision_score, accuracy_score
from sklearn.metrics import confusion_matrix, f1_score, classification_report

# Random Seed
from numpy.random import seed
seed(1)

# Audio
import librosa.display, librosa

## Get filenames

In [2]:
# Get files in data path
path='TestSound2/'
# Get Audio Files
files = []
for root, dirnames, filenames in os.walk(path):
    for filename in fnmatch.filter(filenames, '*.wav'):
        files.append(os.path.join(root, filename))

print("found %d audio files in %s"%(len(files),path))

found 25 audio files in TestSound2/


## Prepare labels from filenames

In [3]:
labels =[]
classes=['Test']



color_dict={'Test':'blue'}

color_list=[]
for filename in files:
    for name in classes:
        if fnmatch.fnmatchcase(filename, '*'+name+'*'):
            labels.append(name)
            color_list.append(color_dict[name])
            break
    else:
        labels.append('other')

## Encode Labels

In [4]:
# Encode Labels
labelencoder = LabelEncoder()
labelencoder.fit(labels)
print(len(labelencoder.classes_), "classes:", ", ".join(list(labelencoder.classes_)))
classes_num = labelencoder.transform(labels)

1 classes: Test


## Parameters for MFCC

In [5]:
# Parameters
# Signal Processing Parameters
fs = 44100         # Sampling Frequency
n_fft = 2048       # length of the FFT window
hop_length = 512   # Number of samples between successive frames
n_mels = 128       # Number of Mel bands
n_mfcc = 13        # Number of MFCCs

# Machine Learning Parameters
testset_size = 0.25 #Percentage of data for Testing
n_neighbors=1       # Number of neighbors for kNN Classifier

## Function to Calculate Audio Features: MFCC

In [6]:
# Define Function to Calculate MFCC, Delta_MFCC and Delta2_MFCC
def get_features(y, sr=fs):
    S = librosa.feature.melspectrogram(y, sr=fs, n_mels=n_mels)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(S), n_mfcc=n_mfcc)
    feature_vector = np.mean(mfcc,1)
    #feature_vector = (feature_vector-np.mean(feature_vector))/np.std(feature_vector)
    return feature_vector

## Load audio files, calculate features and create feature vectors

In [7]:
# Load audio files, calculate features and create feature vectors
feature_vectors = []
sound_paths = []
for i,f in enumerate(files):
    print ("get %d of %d = %s"%(i+1, len(files), f))
    try:
        y, sr = librosa.load(f, sr=fs)
        y/=y.max() #Normalize
        if len(y) < 2:
            print("Error loading %s" % f)
            continue
        feat = get_features(y, sr)
        feature_vectors.append(feat)
        sound_paths.append(f)
    except Exception as e:
        print("Error loading %s. Error: %s" % (f,e))
        
print("Calculated %d feature vectors"%len(feature_vectors))

get 1 of 25 = TestSound2/XC141694 - Pied Avocet - Recurvirostra avosetta.wav
get 2 of 25 = TestSound2/XC2479 - Andean Avocet - Recurvirostra andina.wav
get 3 of 25 = TestSound2/XC333699 - Pied Avocet - Recurvirostra avosetta.wav
get 4 of 25 = TestSound2/XC199264 - Pied Avocet - Recurvirostra avosetta.wav
get 5 of 25 = TestSound2/XC133080 - American Avocet - Recurvirostra americana.wav
get 6 of 25 = TestSound2/XC188266 - Pied Avocet - Recurvirostra avosetta.wav
get 7 of 25 = TestSound2/XC325032 - Pied Avocet - Recurvirostra avosetta.wav
get 8 of 25 = TestSound2/XC313293 - Pied Avocet - Recurvirostra avosetta.wav
get 9 of 25 = TestSound2/XC325032 - Pied Avocet - Recurvirostra avosetta (1).wav
get 10 of 25 = TestSound2/XC304644 - American Avocet - Recurvirostra americana.wav
get 11 of 25 = TestSound2/XC2474 - Andean Avocet - Recurvirostra andina.wav
get 12 of 25 = TestSound2/XC281050 - Pied Avocet - Recurvirostra avosetta.wav
get 13 of 25 = TestSound2/XC145135 - Pied Avocet - Recurvirostr

## Standardization: Zero-Mean and Unit-Variance

In [8]:
# Scale features using Standard Scaler
scaler = StandardScaler()
scaled_feature_vectors = scaler.fit_transform(np.array(feature_vectors))
print("Feature vectors shape:",scaled_feature_vectors.shape)

Feature vectors shape: (25, 13)


## Train and Test Sets

In [9]:
# Create Train and Test Set
splitter = StratifiedShuffleSplit(n_splits=1, test_size=testset_size, random_state=0)
splits = splitter.split(scaled_feature_vectors, classes_num)
for train_index, test_index in splits:
    train_set = scaled_feature_vectors[train_index]
    test_set = scaled_feature_vectors[test_index]
    train_classes = classes_num[train_index]
    test_classes = classes_num[test_index]

In [10]:
# Check Set Shapes
print("train_set shape:",train_set.shape)
print("test_set shape:",test_set.shape)
print("train_classes shape:",train_classes.shape)
print("test_classes shape:",test_classes.shape)

train_set shape: (18, 13)
test_set shape: (7, 13)
train_classes shape: (18,)
test_classes shape: (7,)


## K-Means Algorithim

In [11]:
import sklearn

In [22]:
model_kmeans = sklearn.cluster.KMeans(n_clusters=6)

In [23]:
model_kmeans.fit(train_set, train_classes);

In [24]:
# Predict using the Test Set
predicted_labels = model_kmeans.predict(test_set)
print(predicted_labels)

[2 5 1 1 2 3 2]


## Affinity Propagation

In scikit-learn, other clustering algorithms such as affinity propagation can cluster without defining the number of clusters beforehand.

In [25]:
model_affinity = sklearn.cluster.AffinityPropagation()

In [26]:
model_affinity.fit(train_set, train_classes);

In [27]:
# Predict using the Test Set
predicted_labels = model_affinity.predict(test_set)
print(predicted_labels)

[4 3 1 1 4 0 4]
