In [None]:
# Import dependencies
import numpy as np
import pandas as pd
import sklearn as skl
import os

In [None]:
# Import models
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [None]:
# Define values
DIRNAME = os.path.abspath('.')
INPUT_FILE_PATH = os.path.join(DIRNAME, '..', 'Final_DF', 'final_df.csv')
BIRD_NAMES = [
    'Bombadil',
    'Orange Pine Plover',
    'Blue-collared Zipper',
    'Rose-crested Blue Pipit',
    'Eastern Corn Skeet',
    'Qax',
    'Ordinary Snape',
    'Scrawny Jay',
    'Pinkfinch',
    'Carries Champagne Pipit',
    'Darkwing Sparrow',
    'Bent-beak Riffraff',
    'Vermillion Trillian',
    'Green-tipped Scarlet Pipit',
    'Lesser Birchbeere',
    'Canadian Cootamum',
    'Purple Tooting Tout',
    'Queenscoat',
    'Broad-winged Jojo'
]

In [None]:
# Initialize classifiers
classifiers = [
    'Multi-layer Perceptron', MLPClassifier(alpha = 1),
    'K-nearest Neighbor', KNeighborsClassifier(3),
    'Support Vector Machine', SVC(gamma = 2, C = 1),
#    'Gaussian Process', GaussianProcessClassifier(1.0 * RBF(1.0)),
    'Decision Tree', DecisionTreeClassifier(max_depth = 5),
    'Random Forest', RandomForestClassifier(max_depth = 5, n_estimators = 10, max_features = 1),
    'AdaBoost', AdaBoostClassifier(),
    'Gaussian Naive Bayes', GaussianNB(),
    'Quadratic Discriminant Analysis', QuadraticDiscriminantAnalysis()
]
names, models = classifiers[::2], classifiers[1::2] # Split array

In [None]:
# Read input file
df = pd.read_csv(INPUT_FILE_PATH)
df = df.dropna() # Drop rows with null values

# Convert names to categorial codes
df['y'] = df['English_name'] == 'Rose-crested Blue Pipit'

# Split data
X = df.loc[:, 'X':'Croma_Deviation'].values # Features
y = df['y'].astype('category').cat.codes.values # Labels

# Scale features
scaler = skl.preprocessing.StandardScaler()
X = scaler.fit_transform(X)

# Initialize cross validation
kf = skl.model_selection.KFold(n_splits = 5, shuffle = True)

In [None]:
# For every split
accuracies = []
for train_index, test_index in kf.split(X):
    
    # Split dataset
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit models
    for model in models:
        model.fit(X_train, y_train)
        y_hat = model.predict(X_test)
        print(skl.metrics.accuracy_score(y_test, y_hat))