In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import stats
plt.rcParams['figure.figsize'] = (4.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
import seaborn as sns
import copy
from joblib import dump, load

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from scipy import stats
from sklearn.model_selection import cross_val_score
from sklearn import ensemble

In [66]:
# Load images here
img_list = np.load('/Users/jdobrow/Desktop/Data Science/Capstone 3 Files/image.npy')
clean_list = copy.deepcopy(img_list)
df = pd.DataFrame()

In [67]:
# Given an image, a pixel coordinates on that image, and a direction, this returns whether or not if you
# start at that pixel and head in that direction to the edge of the image if you'll make it to the edge.
# This function is used to help determine if a pixel is surrounded.
# 1 == up, 2 == right, 3 == down, 4 == left

def goesToWall(img, pixel, direction):

    if img[pixel[0]][pixel[1]] == 1:
        return False
    else:
        if direction == 1:
            for i in range(pixel[0]):
                if img[i][pixel[1]] == 1:
                    return False
            return True
        elif direction == 3:
            for i in range(28 - pixel[0]):
                if img[27 - i][pixel[1]]:
                    return False
            return True
        elif direction == 4:
            for i in range(pixel[1]):
                if img[pixel[0]][i] == 1:
                    return False
            return True
        else:
            for i in range(28 - pixel[1]):
                if img[pixel[0]][27 - i] == 1:
                    return False
            return True
        
# Used to help condense images to 6x6 to help with dimensionality of features later
def checkRange(img, istart, iend, jstart, jend):
    
    for i in range(istart, iend):
        for j in range(jstart, jend):
            try:
                if img[i][j] == 1:
                    return True
            except:
                None
    return False

# Finds and returns the next empty pixel in an image
def findNext(img):
    for i in range(28):
        for j in range(28):
            if img[i][j] == 0:
                return [[i,j]]
    return None

In [68]:
total_pixels = []
for i in range(len(clean_list)):
    pixel_count = 0
    for j in range(len(clean_list[i])):
        for k in range(len(clean_list[i][j])):
            if clean_list[i][j][k] > 0:
                pixel_count += 1
    total_pixels.append(pixel_count)

df['NumberPositivePixels'] = total_pixels

vertical_pixels = []
horizontal_pixels = []
for i in range(len(clean_list)):
    countv = 0
    counth = 0
    for j in range(28):
        for k in range(28):
            try:
                if (clean_list[i][j+1][k] == 0) & (clean_list[i][j][k] == 1):
                    countv += 1
            except:
                None
            try:
                if (clean_list[i][j][k+1] == 0) & (clean_list[i][j][k] == 1):
                    counth += 1
            except:
                None
    vertical_pixels.append(countv)
    horizontal_pixels.append(counth)
df['VerticalEdges'] = vertical_pixels
df['HorizontalEdges'] = horizontal_pixels

heights = []
for img in range(len(clean_list)):
    height = 0
    for i in range(28):
        for j in range(28):
            if clean_list[img][i][j] == 1:
                height += 1
                break
    heights.append(height)
df['Height'] = heights

widths = []
for img in range(len(clean_list)):
    width = 0
    for i in range(28):
        for j in range(28):
            if clean_list[img][j][i] == 1:
                width += 1
                break
    widths.append(width)
df['Width'] = widths

y_reflect = []
for img in range(len(clean_list)):
    closest = 784
    pixel_count = 0
    for j in range(28):
        for k in range(28):
            if clean_list[img][j][k] > 0:
                pixel_count += 1
    for i in range(6,28):
        side_pixel_count = 0
        for j in range(i, 28):
            for k in range(28):
                if clean_list[img][j][k] > 0:
                    side_pixel_count += 1
        if abs(pixel_count/2 - side_pixel_count) <= closest:
            closest = (abs(pixel_count/2 - side_pixel_count))
        else:
            line_reflection = i
            break

    count=0
    for j in range(28):
        for k in range(28):
            try:
                if (clean_list[img][j][k] == 1) & (clean_list[img][2*line_reflection - j][k] == 1):
                    count += 1
            except:
                None
    y_reflect.append(count)
df['YReflect'] = y_reflect

x_reflect = []
for img in range(len(clean_list)):
    closest = 784
    pixel_count = 0
    for j in range(28):
        for k in range(28):
            if clean_list[img][j][k] > 0:
                pixel_count += 1
    for i in range(6,28):
        side_pixel_count = 0
        for j in range(28):
            for k in range(i, 28):
                if clean_list[img][j][k] > 0:
                    side_pixel_count += 1
        if abs(pixel_count/2 - side_pixel_count) <= closest:
            closest = (abs(pixel_count/2 - side_pixel_count))
        else:
            line_reflection = i
            break

    count=0
    for j in range(28):
        for k in range(28):
            try:
                if (clean_list[img][j][k] == 1) & (clean_list[img][j][2*line_reflection - k] == 1):
                    count += 1
            except:
                None
    x_reflect.append(count)
df['XReflect'] = x_reflect

none_counts = []
one_counts = []
two_counts = []
three_counts = []
four_counts = []
for img in clean_list:
    count_none = 0
    count_one = 0
    count_two = 0
    count_three = 0
    count_four = 0
    for i in range(28):
        for j in range(28):
            if img[i][j] == 0:
                count = 0
                for k in range(4):
                    if goesToWall(img, [i,j], k+1):
                        count += 1  
                if count == 0:
                    count_none += 1
                elif count == 1:
                    count_one += 1
                elif count == 2:
                    count_two += 1
                elif count == 3:
                    count_three += 1
                elif count == 4:
                    count_four += 1
    none_counts.append(count_none)
    one_counts.append(count_one)
    two_counts.append(count_two)
    three_counts.append(count_three)
    four_counts.append(count_four)
df['NoneToWall'] = none_counts
df['OneToWall'] = one_counts
df['TwoToWall'] = two_counts
df['ThreeToWall'] = three_counts
df['FourToWall'] = four_counts

upcounts = []
rightcounts = []
downcounts = []
leftcounts = []
for img in clean_list:
    upcount = 0
    rightcount = 0
    downcount = 0
    leftcount = 0
    for i in range(28):
        for j in range(28):
            if img[i][j] == 0:
                if (goesToWall(img, [i,j], 1)) and not (goesToWall(img, [i,j], 2)) and not (goesToWall(img, [i,j], 3)) and not (goesToWall(img, [i,j], 4)):
                    upcount += 1
                elif (goesToWall(img, [i,j], 2)) and not (goesToWall(img, [i,j], 1)) and not (goesToWall(img, [i,j], 3)) and not (goesToWall(img, [i,j], 4)):
                    rightcount += 1
                elif (goesToWall(img, [i,j], 3)) and not (goesToWall(img, [i,j], 2)) and not (goesToWall(img, [i,j], 1)) and not (goesToWall(img, [i,j], 4)):
                    downcount += 1
                elif (goesToWall(img, [i,j], 4)) and not (goesToWall(img, [i,j], 2)) and not (goesToWall(img, [i,j], 3)) and not (goesToWall(img, [i,j], 1)):
                    leftcount += 1
    upcounts.append(upcount)
    rightcounts.append(rightcount)
    downcounts.append(downcount)
    leftcounts.append(leftcount)
df['GoesToTopOnly'] = upcounts
df['GoesToRightOnly'] = rightcounts
df['GoesToDownOnly'] = downcounts
df['GoesToLeftOnly'] = leftcounts

curved_pixels = []
for img in range(len(clean_list)):
    curvy_count = 0
    for i in range(28):
        for j in range(28):
            if clean_list[img][i][j] == 1:
                count = 0
                try:
                    if clean_list[img][i + 1][j] == 1:
                        count += 1
                except:
                    None
                try:
                    if clean_list[img][i - 1][j] == 1:
                        count += 1
                except:
                    None
                try:
                    if clean_list[img][i][j + 1] == 1:
                        count += 1
                except:
                    None
                try:
                    if clean_list[img][i][j - 1] == 1:
                        count += 1
                except:
                    None
                if count == 2:
                    curvy_count += 1
    curved_pixels.append(curvy_count)
df['CurvedPixels'] = curved_pixels

horizontal_averages = []
for img in clean_list:
    horizontal_change = 0
    count = 0
    for i in range(28):
        flips = 0
        for j in range(28):
            if j == 0:
                state = img[i][j]
            else:
                if img[i][j] != state:
                    state = img[i][j]
                    flips += 1
        if flips > 0:
            count += 1
        horizontal_change += flips
    horizontal_averages.append(horizontal_change/count)
df['HorizontalFlips'] = horizontal_averages

vertical_averages = []
for img in clean_list:
    vertical_change = 0
    count = 0
    for i in range(28):
        flips = 0
        for j in range(28):
            if j == 0:
                state = img[j][i]
            else:
                if img[j][i] != state:
                    state = img[j][i]
                    flips += 1
        if flips > 0:
            count += 1
        vertical_change += flips
    vertical_averages.append(vertical_change/count)
df['VerticalFlips'] = vertical_averages

area_counts = []
for i in clean_list:
    img = i.copy()
    img[0][0] = 1
    active = [[0,0]]
    new_active = []
    go = 1
    areas_count = 1
    iteration = 0
    while go == 1:
        for coor in active:
            img[coor[0]][coor[1]] = 1
            try:
                if img[coor[0] + 1][coor[1]] == 0:
                    new_active.append([coor[0] + 1, coor[1]])
                    img[coor[0] + 1][coor[1]] = 1
            except:
                None
            try:
                if img[coor[0] - 1][coor[1]] == 0:
                    new_active.append([coor[0] - 1, coor[1]])
                    img[coor[0] - 1][coor[1]] = 1
            except:
                None
            try:
                if img[coor[0]][coor[1] + 1] == 0:
                    new_active.append([coor[0], coor[1] + 1])
                    img[coor[0]][coor[1] + 1] = 1
            except:
                None
            try:
                if img[coor[0]][coor[1] - 1] == 0:
                    new_active.append([coor[0], coor[1] - 1])
                    img[coor[0]][coor[1] - 1] = 1
            except:
                None
        if len(new_active) == 0:
            complete = 1
            for i in range(28):
                for j in range(28):
                    complete *= img[i][j]
            if complete == 1:
                go = 0
            else:
                if iteration > 0:
                    areas_count += 1
                active = (findNext(img))
                new_active = []
                iteration = 0
        else:
            active = new_active
            new_active = []
            iteration += 1
    area_counts.append(areas_count)
df['AreaCounts'] = area_counts

condensed_train = []
for img in range(len(clean_list)):
    condensed_train.append(np.zeros((6,6)))
    for i in range(6):
        for j in range(6):
            if checkRange(clean_list[img], 2 + 4*i, 6 + 4*i, 2 + 4*j, 6 + 4*j):
                condensed_train[img][i][j] = 1

for i in range(36):
    on_list = []
    for img in range(len(condensed_train)):
        on_list.append(condensed_train[img][i%6][i//6])
    df['CondPix' + str(i)] = on_list

df['ExtraVerticals'] = abs(df['VerticalEdges'] - df['Width'])
df['ExtraHorizontals'] = abs(df['HorizontalEdges'] - df['Height'])
df['ScaledExtraVerticals'] = abs(df['VerticalEdges'] - df['Width'])/df['NumberPositivePixels']
df['ScaledExtraHorizontals'] = abs(df['HorizontalEdges'] - df['Height'])/df['NumberPositivePixels']
df['ScaledVerticalEdges'] = df['VerticalEdges']/df['NumberPositivePixels']
df['ScaledHorizontalEdges'] = df['HorizontalEdges']/df['NumberPositivePixels']
df['ScaledYReflect'] = df['YReflect']/df['NumberPositivePixels']
df['ScaledXReflect'] = df['XReflect']/df['NumberPositivePixels']
df['ScaledNoneToWall'] = df['NoneToWall']/(784 - df['NumberPositivePixels'])
df['ScaledNoneToWall'] = df['NoneToWall']/(784 - df['NumberPositivePixels'])
df['ScaledOneToWall'] = df['OneToWall']/(784 - df['NumberPositivePixels'])
df['ScaledTwoToWall'] = df['TwoToWall']/(784 - df['NumberPositivePixels'])
df['ScaledThreeToWall'] = df['ThreeToWall']/(784 - df['NumberPositivePixels'])
df['ScaledFourToWall'] = df['FourToWall']/(784 - df['NumberPositivePixels'])
df['ScaledCurvedPixels'] = df['CurvedPixels']/df['NumberPositivePixels']
df['ScaledGoesToTopOnly'] = df['GoesToTopOnly']/df['NumberPositivePixels']
df['ScaledGoesToRightOnly'] = df['GoesToRightOnly']/df['NumberPositivePixels']
df['ScaledGoesToDownOnly'] = df['GoesToDownOnly']/df['NumberPositivePixels']
df['ScaledGoesToLeftOnly'] = df['GoesToLeftOnly']/df['NumberPositivePixels']
df.drop(['VerticalEdges', 'HorizontalEdges', 'ExtraVerticals', 'ExtraHorizontals', 'HorizontalEdges', 'YReflect', 'XReflect', 'NoneToWall', 'OneToWall', 'TwoToWall', 'ThreeToWall', 'FourToWall', 'CurvedPixels', 'GoesToTopOnly', 'GoesToRightOnly', 'GoesToDownOnly', 'GoesToLeftOnly'], 1, inplace=True)


In [69]:
df

Unnamed: 0,NumberPositivePixels,Height,Width,HorizontalFlips,VerticalFlips,AreaCounts,CondPix0,CondPix1,CondPix2,CondPix3,...,ScaledNoneToWall,ScaledOneToWall,ScaledTwoToWall,ScaledThreeToWall,ScaledFourToWall,ScaledCurvedPixels,ScaledGoesToTopOnly,ScaledGoesToRightOnly,ScaledGoesToDownOnly,ScaledGoesToLeftOnly
0,392,25,28,3.782609,4.892857,7,1.0,1.0,1.0,1.0,...,0.308673,0.063776,0.413265,0.214286,0.0,0.137755,0.0,0.012755,0.017857,0.033163


In [70]:
clf = load('/Users/jdobrow/Desktop/Data Science/Capstone 3 Files/gradientmodel')

In [71]:
clf.predict(df)

array([3])