In [1]:
import csv
import scipy

#Function for reading from csv
#Turns it into 2-dimensional list
def readcsv(name):
    pixels = []
    with open(name, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        for row in reader:
            rowlist = row[0].split(",")
            for num in range(len(rowlist)):
                rowlist[num] = int(rowlist[num])
            pixels.append(rowlist)
    for x in range (len(pixels)):
        for y in range (len(pixels[0])):
            if pixels[x][y] < 128:
                pixels[x][y] = 1
            else:
                pixels[x][y] = 0
    return pixels

#Calculates number of pixels by iterating through every integer in the array
#And checking if it's a filled in pixel
def nr_pix(pixels):
    bpix = 0
    for row in pixels:
        for item in row:
            if item == 1:
                bpix = bpix+1
    return bpix

#Iterates through every row, checking if there is just one pixel
def rows_with_1(pixels):
    oneRows = 0
    for row in pixels:
        bpix = 0
        for item in row:
            if item == 1:
                bpix = bpix + 1
        if bpix == 1:
            oneRows = oneRows+1
    return oneRows

#Iterates through every column, checking if there is just pixel
def cols_with_1(pixels):
    oneCols = 0
    for x in range (18):
        bpix = 0
        for y in range (18):
            if pixels[y][x] == 1:
                bpix+=1
        if bpix == 1:
            oneCols +=1
    return oneCols

#Iterates through every row
#Checking if there are 3 or more pixels
def rows_with_3p(pixels):
    threeRows = 0
    for row in pixels:
        bpix = 0
        for item in row:
            if item == 1:
                bpix = bpix + 1
        if bpix >= 3:
            threeRows = threeRows + 1
    return threeRows

#Iterates through every column
#Checking if there are 3 or more pixels
def cols_with_3p(pixels):
    threeCols = 0
    for x in range (18):
        bpix = 0
        for y in range (18):
            if pixels[y][x] == 1:
                bpix+=1
        if bpix >= 3:
            threeCols +=1
    return threeCols

#Gets the leftmost, rightmost, tallest and lowest pixel
#Uses that to calculate height and width
#Then uses that to calculate the aspect ratio
def aspect_ratio(pixels):
    upper = -1
    lower = 999
    left = 999
    right = -1
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1:
                if upper == -1:
                    upper = x
                lower = x
                if y < left:
                    left = y
                if y > right:
                    right = y
    height = (lower-upper)+1
    width = (right - left)+1
    return width/height

#this returns a string contains every neighbour the pixel has
def check_neighbours(pixels,x,y):
    neighArr = []
    if x-1 >= 0 and pixels[x-1][y] == 1:
            neighArr.append("upper")
    if y-1 >= 0 and pixels[x][y-1] == 1:
            neighArr.append("left")
    if x+1 < len(pixels) and pixels[x+1][y] == 1:
            neighArr.append("lower")
    if y+1 < len(pixels[0]) and pixels[x][y+1] == 1:
            neighArr.append("right")
    if x - 1 >= 0 and y - 1 >= 0 and pixels[x-1][y-1] == 1:
        neighArr.append("upper-left")
    if x - 1 >= 0 and y + 1 < len(pixels[0]) and pixels[x-1][y+1] == 1:
        neighArr.append("upper-right")
    if x + 1 < len(pixels) and y - 1 >= 0 and pixels[x+1][y-1] == 1:
        neighArr.append("lower-left")
    if x + 1 < len(pixels) and y + 1 < len(pixels[0]) and pixels[x+1][y+1] == 1:
        neighArr.append("lower-right")

    return neighArr

#checks every pixel for the length of what check_neighbours returns
#returns however many times check_neighbour returned 1
def neigh_1 (pixels):
    oneNeighs = 0
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1 and len(check_neighbours(pixels,x,y)) == 1:
                oneNeighs+=1
    return oneNeighs

#All of these functions just check whether check_neighbour
#returns their specific criteria or not
def no_neigh_above (pixels):
    noNeighAbove = 0
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1:
                if "upper" not in check_neighbours(pixels,x,y) and "upper-left" not in check_neighbours(pixels,x,y) and "upper-right" not in check_neighbours(pixels,x,y):
                    noNeighAbove +=1
    return noNeighAbove

def no_neigh_below (pixels):
    noNeighBelow = 0
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1:
                if "lower" not in check_neighbours(pixels,x,y) and "lower-left" not in check_neighbours(pixels,x,y) and "lower-right" not in check_neighbours(pixels,x,y):
                    noNeighBelow +=1
    return noNeighBelow

def no_neigh_left (pixels):
    noNeighLeft= 0
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1:
                if "left" not in check_neighbours(pixels,x,y) and "lower-left" not in check_neighbours(pixels,x,y) and "upper-left" not in check_neighbours(pixels,x,y):
                    noNeighLeft +=1
    return noNeighLeft

def no_neigh_right (pixels):
    noNeighRight= 0
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1:
                if "right" not in check_neighbours(pixels,x,y) and "lower-right" not in check_neighbours(pixels,x,y) and "upper-right" not in check_neighbours(pixels,x,y):
                    noNeighRight +=1
    return noNeighRight

def no_neigh_horiz (pixels):
    noNeighHoriz= 0
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1:
                if "left" not in check_neighbours(pixels,x,y) and "right" not in check_neighbours(pixels,x,y):
                    noNeighHoriz +=1
    return noNeighHoriz

def no_neigh_vert (pixels):
    noNeighVert= 0
    for x in range(18):
        for y in range(18):
            if pixels[x][y] == 1:
                if "upper" not in check_neighbours(pixels,x,y) and "lower" not in check_neighbours(pixels,x,y):
                    noNeighVert +=1
    return noNeighVert

#This uses scipy.ndimage.label to categorize the image into areas
#The structure is changed so it counts diagonals as part of one area
def connected_areas (pixels):
    structure = [[1,1,1],
                 [1,1,1],
                 [1,1,1]]
    return scipy.ndimage.label(pixels,structure)[1]

def eyes(pixels):
    eye_pixels = []
    #Create a new image that is reversed
    #So our white becomes our foreground
    for row in pixels:
        new_row = []
        for item in row:
            if item == 0:
                new_row.append(1)
            else:
                new_row.append(0)
        eye_pixels.append(new_row)
    #Add a new white border to the entire image
    #This way eyes cannot be created by bisecting part of the image
    for i in range(len(eye_pixels)):
        eye_pixels[i] = [1] + eye_pixels[i] + [1]
    new_row = []
    for i in range(len(eye_pixels[0])):
        new_row.append(1)
    eye_pixels.insert(0,new_row)
    eye_pixels.append(new_row)

    #the function will return the number of "bubbles" of foreground in the image
    #+ 1( the background)
    #So we remove 1 for the result
    return scipy.ndimage.label(eye_pixels)[1] - 1

#This evaluates the longest vertical line in the image
#it checks every integer in the dimensional array
#if it's black, it travels down as far as it can go, storing how far it traveled
#it returns the longest distance it traveled
def longest_vertical_line(pixels):
    longestLine = 0
    for x in range (len(pixels)):
        for y in range (len(pixels[0])):
            if (pixels[x][y] == 1):
                currLine = 0
                depth = x
                while (depth < len(pixels) and pixels[depth][y] == 1):
                    currLine +=1
                    if currLine > longestLine:
                        longestLine = currLine
                    depth+=1
    return longestLine

#this iterates through every letter and non-letter
#calculating the features for each of them
with open('40431475_features.csv', 'w', newline ='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',',
                        quotechar='|', quoting=csv.QUOTE_MINIMAL)
    chars = ['a','b','c','d','e','f','g','h','i','j']
    writer.writerow(['label','Index','nr_pix','rows_with_1','cols_with_1','rows_with_3p',
                     'cols_with_3p','aspect_ratio','neigh_1','no_neigh_above',
                     'no_neigh_below','no_neigh_left','no_neigh_right','no_neigh_horiz',
                     'no_neigh_vert','connected_areas','eyes','longest_vertical_line'])
    for char in chars:
        for x in range(1,9):
            name = 'images/40431475_'+char+'_'+str(x)+'.csv'
            pixels = readcsv(name)
            features = [char, x, nr_pix(pixels), rows_with_1(pixels), cols_with_1(pixels), rows_with_3p(pixels),
                        cols_with_3p(pixels), aspect_ratio(pixels), neigh_1(pixels), no_neigh_above(pixels),
                        no_neigh_below(pixels), no_neigh_left(pixels), no_neigh_right(pixels),
                        no_neigh_horiz(pixels), no_neigh_vert(pixels), connected_areas(pixels), eyes(pixels),
                        longest_vertical_line(pixels)]
            writer.writerow(features)

    emoticons = ['sad','smiley','xclaim']
    for emoticon in emoticons:
        for x in range(1,21):
            name = 'images/40431475_'+emoticon+'_'+str(x)+'.csv'
            pixels = readcsv(name)
            features = [emoticon, x, nr_pix(pixels), rows_with_1(pixels), cols_with_1(pixels), rows_with_3p(pixels),
                        cols_with_3p(pixels), aspect_ratio(pixels), neigh_1(pixels), no_neigh_above(pixels),
                        no_neigh_below(pixels), no_neigh_left(pixels), no_neigh_right(pixels),
                        no_neigh_horiz(pixels), no_neigh_vert(pixels), connected_areas(pixels), eyes(pixels),
                        longest_vertical_line(pixels)]
            writer.writerow(features)