# K Nearest Neighbors Model with Bayes

Refined kNN classification model with Bayesian input from the very-accurate breed classification

# Imports

In [2]:
## Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from seaborn import set_style
import csv
set_style("whitegrid")

## More specific imports for this notebook
import joblib ## For saving trained models
from sklearn.neighbors import KNeighborsClassifier ## Import the model here
from sklearn.model_selection import train_test_split ## Import train_test_split
from sklearn.metrics import confusion_matrix ## Import confusion_matrix
from sklearn.metrics import accuracy_score

# Initial Settings and Load Data

In [3]:
data_fp = '../../data/processed_data/specgram_aug_Fretim_mask.npy' ## Import raw wave data

file = open('../../data/processed_data/metadatadata.csv','r') ##Import raw classifications corresponding to raw data
data_cat = list(csv.reader(file, delimiter=','))
file.close()

# Load data (currently without classifications attached)
x_data = np.load(data_fp)

# Flatten the 2-dim matrices into vectors for the kNN
X = []
for x in x_data:
    X.append(x.flatten())

# Isolate list of categories for this data
y = []
for t in range(1,len(data_cat)):
    y.append(data_cat[t][2])
    
print(len(X), len(y)) # Same length is required

440 440


In [4]:
df = pd.read_csv("../../data/processed_data/metadata.csv")

situation_to_number = {'brushing': 0, 'food': 1, 'isolation': 2}
breed_to_number = {'european_shorthair': 0, 'maine_coon': 1}
sex_to_number = {0:0, 1:1} ## Sex already transfered to number; this is for uniformity of the code
breed_and_situation_to_number = {'brushing_and_european_shorthair': 0, 'brushing_and_maine_coon': 1,
                    'food_and_european_shorthair': 2, 'food_and_maine_coon': 3,
                    'isolation_and_european_shorthair': 4, 'isolation_and_maine_coon': 5} ## Use for Bayesian study

## Create a new column with numerical values based on the situation mapping
model_types = ['situation', 'breed'] ## Predict model_types[0] after filtering by model_types[1]
model_type = model_types[1] + '_given_' + model_types[1]

df['numerical_'+model_types[0]] = df[model_types[0]].map(situation_to_number)
df['numerical_'+model_types[1]] = df[model_types[1]].map(breed_to_number)
df['numerical_'+model_type] = 2*df['numerical_'+model_types[0]] + df['numerical_'+model_types[1]]

y_situation = df['numerical_'+model_types[0]]
y_breed = df['numerical_'+model_types[1]]
y_combined = df['numerical_'+model_type]

X_split = [[],[]]
y_split = [[],[]]
for a in range(0,len(X)):
    X_split[ y_combined[a]%2 ].append(X[a])
    y_split[ y_combined[a]%2 ].append(y[a])

# Train Test Splits

In [5]:
## Set up the train test splits we need

#Use these variables to automate saving runs with different filesnames
test_size = 1/5
random_state = 440

## Train test split for modelinlg the combined breed
x_train_br, x_test_br, y_train_br, y_test_br = train_test_split(X.copy(), y_breed,
                                        shuffle = True,
                                        random_state = random_state,
                                        test_size=test_size)

## Train test split for modelinlg the combined situation 
x_train_sit, x_test_sit, y_train_sit, y_test_sit = train_test_split(X.copy(), y_situation,
                                        shuffle = True,
                                        random_state = random_state,
                                        test_size=test_size)

## Train test split for modelinlg the combined breed-situation combination
x_train_com, x_test_com, y_train_com, y_test_com = train_test_split(X.copy(), y_combined,
                                        shuffle = True,
                                        random_state = random_state,
                                        test_size=test_size)

## Train test split to modeling the situation for shorthairs
x_train_sh, x_test_sh, y_train_sh, y_test_sh = train_test_split(X_split[0].copy(), y_split[0],
                                        shuffle = True,
                                        random_state = random_state,
                                        test_size=test_size)

## Train test split for modeling the situation for maine coons
x_train_mc, x_test_mc, y_train_mc, y_test_mc = train_test_split(X_split[1].copy(), y_split[1],
                                        shuffle = True,
                                        random_state = random_state,
                                        test_size=test_size)

print(x_train_br)

[array([10.0550995, 10.823364 , 12.937111 , ..., -1.       , -1.       ,
       -1.       ], dtype=float32), array([13.321991, 17.073288, 15.714199, ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([10.714146, 18.658554, 18.412472, ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([33.766716, 39.02159 , 36.6618  , ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([28.421787, 31.738285, 36.22747 , ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([33.68839 , 34.772446, 36.501667, ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([13.409584, 17.821125, 20.319588, ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([34.678516, 34.808823, 32.852627, ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([13.233261, 18.493233, 22.14835 , ..., -1.      , -1.      ,
       -1.      ], dtype=float32), array([33.153732, 35.302547, 36.595882, ..., -1.      , -1.      ,
       -

# Fit Models

In [35]:
## Use these variables later to automate saving runs with different filesnames

k_breed = 4 #optimal value 4 from other file
k_situation = 5 #optimal value 5 from other file
k_combined = 5 #optimized in this file
k_shorthair = 7 #optimized in this file
k_mainecoon = 4 #optimized in this file

## Make the model objects
knn_breed = KNeighborsClassifier(k_breed)
knn_situation = KNeighborsClassifier(k_situation)
knn_combined = KNeighborsClassifier(k_combined)
knn_shorthair = KNeighborsClassifier(k_shorthair)
knn_mainecoon = KNeighborsClassifier(k_mainecoon)

## "Fit" the model object
knn_breed.fit(x_train_br, y_train_br)
knn_situation.fit(x_train_sit, y_train_sit)
knn_combined.fit(x_train_com, y_train_com)
knn_shorthair.fit(x_train_sh, y_train_sh)
knn_mainecoon.fit(x_train_mc, y_train_mc)

# Assess Model Performances

In [36]:
## Predict on the training sets
y_test_pred_br = knn_breed.predict(x_test_br)
y_test_pred_sit = knn_situation.predict(x_test_sit)
y_test_pred_com = knn_combined.predict(x_test_com)
y_test_pred_sh = knn_shorthair.predict(x_test_sh)
y_test_pred_mc = knn_mainecoon.predict(x_test_mc)

## Compute confusion matrix for each model
conf_mat_breed = confusion_matrix(y_test_br, y_test_pred_br)
conf_mat_situation = confusion_matrix(y_test_sit, y_test_pred_sit)
conf_mat_combined = confusion_matrix(y_test_com, y_test_pred_com)
conf_mat_shorthair = confusion_matrix(y_test_sh, y_test_pred_sh)
conf_mat_mainecoon = confusion_matrix(y_test_mc, y_test_pred_mc)

## Compute accuracy for the model
acc_breed = accuracy_score(y_test_br, y_test_pred_br)
acc_situation = accuracy_score(y_test_sit, y_test_pred_sit)
acc_combined = accuracy_score(y_test_com, y_test_pred_com)
acc_shorthair = accuracy_score(y_test_sh, y_test_pred_sh)
acc_mainecoon = accuracy_score(y_test_mc, y_test_pred_mc)

print('Breed accuracy:', acc_breed)
print('Situation accuracy:', acc_situation)
print('Combined accuracy:', acc_combined, '( Breed * Situation =', acc_breed*acc_situation,')')
print('(Situation | European Shorthair) accuracy:', acc_shorthair)
print('(Situation | Maine Coon) accuracy:', acc_mainecoon)
print()
print('Breed confusion matrix:')
print(conf_mat_breed)
print()
print('Situation confusion matrix:')
print(conf_mat_situation)
print()
print('Combined confusion matrix:')
print(conf_mat_combined)
print()
print('(Situation | European Shorthair) confusion matrix:')
print(conf_mat_shorthair)
print()
print('(Situation | Maine Coon) confusion matrix:')
print(conf_mat_mainecoon)

Breed accuracy: 0.9431818181818182
Situation accuracy: 0.6704545454545454
Combined accuracy: 0.625 ( Breed * Situation = 0.6323605371900827 )
(Situation | European Shorthair) accuracy: 0.6862745098039216
(Situation | Maine Coon) accuracy: 0.6052631578947368

Breed confusion matrix:
[[51  2]
 [ 3 32]]

Situation confusion matrix:
[[17  4 10]
 [ 7  9  5]
 [ 1  2 33]]

Combined confusion matrix:
[[10  0  1  0  5  1]
 [ 1  6  1  1  0  5]
 [ 6  1  3  0  2  0]
 [ 0  2  1  3  0  3]
 [ 1  0  0  1 22  0]
 [ 0  0  0  1  0 11]]

(Situation | European Shorthair) confusion matrix:
[[ 7  2  6]
 [ 1  4  3]
 [ 2  2 24]]

(Situation | Maine Coon) confusion matrix:
[[ 6  3  3]
 [ 3  4  4]
 [ 1  1 13]]
