In [1]:
import pandas as pd
import numpy as np
import cv2
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score

In [2]:
directory = os.getcwd()

def process_images(directory, subfolder):

    # Assign features as the raw RGB pixel values and the labels as 'yes' or 'no'
    labels = []
    pixels = []

    if subfolder == '/no':
        label = 0
    else:
        label = 1

    # Go into the image folder
    os.chdir(directory + subfolder)

    for file in os.listdir(directory + subfolder):

        # Read image file, resize, and output RGB values 
        img = cv2.imread(file)
        img_array = cv2.resize(img, (128,128)).flatten()

        labels.append(label)
        pixels.append(img_array)

    # Return from the image folder 
    os.chdir('..')
    return pixels, labels 

pixels_no, labels_no = process_images(directory, '/no')
pixels_yes, labels_yes = process_images(directory, '/yes')


In [3]:
# Split up no and yes data
(X_no_train, X_no_test, y_no_train, y_no_test) = train_test_split(
    pixels_no, labels_no, test_size=0.2)

(X_yes_train, X_yes_test, y_yes_train, y_yes_test) = train_test_split(
    pixels_yes, labels_yes, test_size=0.2)

# Make sure train and test variables have equals amounts of yes's and no's
X_train = np.concatenate((X_no_train, X_yes_train), axis=0)
X_test = np.concatenate((X_no_test, X_yes_test), axis=0)

y_train = np.concatenate((y_no_train, y_yes_train), axis=0)
y_test = np.concatenate((y_no_test, y_yes_test), axis=0)

In [4]:
# Build model
model = KNeighborsClassifier(n_neighbors=1)
model.fit(X_train, y_train)
train_acc = model.score(X_train, y_train)
print("Training Accuracy: {:.2f}%".format(train_acc * 100))

# Evaluate model on singular instance of testing data
test_acc = model.score(X_test, y_test)
print("Test Accuracy: {:.2f}%".format(test_acc * 100))

Training Accuracy: 100.00%
Test Accuracy: 76.47%


In [5]:
# Perform cross-validation
model = KNeighborsClassifier(n_neighbors=1)
X = np.concatenate((X_no_train, X_yes_train, X_no_test, X_yes_test), axis=0)
y = np.concatenate((y_no_train, y_yes_train, y_no_test, y_yes_test), axis=0)

scores = cross_val_score(model, X, y, cv=10, scoring='accuracy')
print("Cross-Validation Accuracy: {:.2f}%".format(np.mean(scores) * 100))

Cross-Validation Accuracy: 76.68%
