In [11]:
# Import dependencies
import numpy as np
import pandas as pd
import sklearn as skl
import os
import random

In [2]:
# Import models
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [12]:
# Define values
DIRNAME = os.path.abspath('.')
INPUT_FILE_PATH = os.path.join(DIRNAME, '..', 'Final_DF', 'final_df.csv')
TARGET = 'Rose-crested Blue Pipit'
OTHER_BIRDS = [
    'Bombadil',
    'Orange Pine Plover',
    'Blue-collared Zipper',
    'Eastern Corn Skeet',
    'Qax',
    'Ordinary Snape',
    'Scrawny Jay',
    'Pinkfinch',
    'Carries Champagne Pipit',
    'Darkwing Sparrow',
    'Bent-beak Riffraff',
    'Vermillion Trillian',
    'Green-tipped Scarlet Pipit',
    'Lesser Birchbeere',
    'Canadian Cootamum',
    'Purple Tooting Tout',
    'Queenscoat',
    'Broad-winged Jojo'
]

In [4]:
# Initialize classifiers
classifiers = [
    'Multi-layer Perceptron', MLPClassifier(alpha = 1),
    'K-nearest Neighbor', KNeighborsClassifier(3),
    'Support Vector Machine', SVC(gamma = 2, C = 1),
#    'Gaussian Process', GaussianProcessClassifier(1.0 * RBF(1.0)),
    'Decision Tree', DecisionTreeClassifier(max_depth = 5),
    'Random Forest', RandomForestClassifier(max_depth = 5, n_estimators = 10, max_features = 1),
    'AdaBoost', AdaBoostClassifier(),
    'Gaussian Naive Bayes', GaussianNB(),
    'Quadratic Discriminant Analysis', QuadraticDiscriminantAnalysis()
]
names, models = classifiers[::2], classifiers[1::2] # Split array

In [24]:
# Read input file
df = pd.read_csv(INPUT_FILE_PATH)
df = df.dropna() # Drop rows with null values

Unnamed: 0.1,Unnamed: 0,File ID,English_name,Vocalization_type,Quality,Time,Date,X,Y,ZCR,...,CromaVector4,CromaVector5,CromaVector6,CromaVector7,CromaVector8,CromaVector9,CromaVector10,Croma_Vector11,Croma_vector12,Croma_Deviation
0,0,402254,Rose-crested Blue Pipit,call,no score,13:30,2/8/2018,49,63,0.0,...,0.059907,0.006428,0.045912,0.003329,0.04788,0.001446,0.001912,0.014191,0.028368,0.001169
1,1,406171,Rose-crested Blue Pipit,call,A,7:48,6/7/2017,125,133,0.0,...,0.010212,0.01112,0.010412,0.01366,0.011036,0.006927,0.008333,0.008851,0.009685,0.009195
2,2,405901,Rose-crested Blue Pipit,call,A,12:00,2/8/2018,58,76,0.0,...,0.00991,0.009431,0.009823,0.012833,0.00897,0.006625,0.007789,0.009239,0.008964,0.008641
3,3,405548,Rose-crested Blue Pipit,song,A,11:00,3/10/2018,55,125,0.0,...,0.083664,0.003877,0.024294,0.003276,0.118844,0.000452,0.001658,0.009702,0.06284,0.000914
4,4,401782,Rose-crested Blue Pipit,song,A,6:00,6/29/2008,129,123,0.0,...,0.009887,0.006798,0.008011,0.006187,0.010824,0.002845,0.005979,0.007233,0.011939,0.007687


In [None]:
# Initialize scaler
scaler = skl.preprocessing.StandardScaler()

# Initialize cross validation
kf = skl.model_selection.KFold(n_splits = 5, shuffle = True)

In [54]:
# For every other bird
accuracies = []
for i, other_bird in enumerate(OTHER_BIRDS):
    
    subset = df.loc[df['English_name'].isin([TARGET, other])] # Get subset
    X = subset.loc[:, 'X':'Croma_Deviation'].values # Get features
    y = subset['English_name'].astype('category').cat.codes.values # Get labels
    X = scaler.fit_transform(X) # Scale features
    
    # For every split
    for train_index, test_index in kf.split(X):

        # Split dataset
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Fit models
        for model in models:
            model.fit(X_train, y_train)
            y_hat = model.predict(X_test)
            accuracies += [skl.metrics.accuracy_score(y_test, y_hat)]











In [81]:
a = np.array(accuracies)
a = np.reshape(a, (len(OTHER_BIRDS), 5, len(models)))
a = a.transpose((2, 1, 0))
a = a.mean(axis = 1)
for name, score in zip(names, a):
    print(name, score.mean())

Multi-layer Perceptron 0.9349983349983351
K-nearest Neighbor 0.8176915676915677
Support Vector Machine 0.5477688977688978
Decision Tree 0.905094905094905
Random Forest 0.7611407111407112
AdaBoost 0.9342472342472343
Gaussian Naive Bayes 0.861083361083361
Quadratic Discriminant Analysis 0.7501017501017501
