In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# cv2 computer vision library for loading images
import cv2

In [13]:
import os

# directories for data
rock_dir = "./rock"
paper_dir = "./paper"
scissors_dir ="./scissors"

# class values
rock_cat = 0
paper_cat = 1
scissors_cat = 2

In [14]:
# initiate dataframes for each of rock, paper, scissors datasets
# will concatenate dataframes later
# create list with all filenames

def init_df(path, category):
    f_names = []
    for f in os.listdir(path):
        f_names.append(f)
    
    # create list len of filesnames array with category values = rock (0), paper (1) or scissor (2)
    category_column = []
    category_column = [category for i in range(len(f_names))]
    
    df = pd.DataFrame(list(zip(f_names, category_column)), columns =['name', 'category']) 
    return df

In [15]:
rock_init = init_df(rock_dir, rock_cat)
paper_init = init_df(paper_dir, paper_cat)
scissors_init = init_df(scissors_dir, scissors_cat)

In [11]:
def get_color_images(path):
    image_list = []
    for filename in os.listdir(path):
        im=cv2.imread(os.path.join(path,filename),cv2.IMREAD_COLOR)
        new_im = cv2.resize(im, dsize=(100, 100))
        image_list.append(new_im)
    return image_list

In [12]:
def get_bw_images(path):
    image_list = []
    for filename in os.listdir(path):
        im=cv2.imread(os.path.join(path,filename),cv2.IMREAD_GRAYSCALE)
        new_im = cv2.resize(im, dsize=(100, 100))
        image_list.append(new_im)
    return image_list

In [17]:
def bw_rows(arr):
    dims = arr.shape
    col_avgs = []
    for i in range(dims[1]):
        col_avgs.append(np.average(arr[:,i]))
    return np.max(col_avgs), np.min(col_avgs)

In [18]:
def bw_cols(arr):
    dims = arr.shape
    row_avgs = []
    for i in range(dims[0]):
        row_avgs.append(np.average(arr[i,:]))
    return np.max(row_avgs), np.min(row_avgs)

In [19]:
# ------------------- ADD IMAGE CHARACTERISTICS TO DF ---------------------
def add_img_data(df, p):
    
    # initiate lists of pixel arrays
    color_ims = []
    bw_ims = []
    
    # create color array
    color_ims = get_color_images(p)
    
    # create grayscale array
    bw_ims = get_bw_images(p)
    
    # initiate all of arrays that will become columns in the dataframe
    avg_red = []
    avg_green = []
    avg_blue = []
    avg_bw = []
    max_av_col_bw = []
    min_avg_col_bw = []
    max_avg_row_bw = []
    min_avg_row_bw = []
    
    # average red values
    for i in range(len(color_ims)):
        avg_red.append(np.average(color_ims[i][:,:,0]))
    # average green values
    for i in range(len(color_ims)):
        avg_green.append(np.average(color_ims[i][:,:,1]))
    # average blue values
    for i in range(len(color_ims)):
        avg_blue.append(np.average(color_ims[i][:,:,2]))
    # average grayscale values
    for i in range(len(bw_ims)):
        avg_bw.append(np.average(bw_ims[i]))
        
    # add color averages to df
    df['avg_red'] = avg_red
    df['avg_green'] = avg_green
    df['avg_blue'] = avg_blue
    df['avg_bw'] = avg_bw

    # Largest and smallest avg amongst rows in grayscale images
    big_list = []
    small_list= []
    for x in range(len(bw_ims)):
        big = []
        small = []
        big, small = bw_rows(bw_ims[x])
        big_list.append(big)
        small_list.append(small)
    max_avg_row_bw = big_list
    min_avg_row_bw = small_list
    
    # Largest and smallest avg amongst columns in grayscale images
    big_list = []
    small_list= []
    for x in range(len(bw_ims)):
        big = []
        small = []
        big, small = bw_cols(bw_ims[x])
        big_list.append(big)
        small_list.append(small)
    max_av_col_bw = big_list
    min_avg_col_bw = small_list
    
    # Assign new columns
    df['max_av_col_bw'] = max_av_col_bw
    df['min_avg_col_bw'] = min_avg_col_bw
    df['max_avg_row_bw'] = max_avg_row_bw
    df['min_avg_row_bw'] = min_avg_row_bw
    
    return df


In [24]:
rock_df = add_img_data(rock_init, rock_dir)

In [26]:
paper_df = add_img_data(paper_init, paper_dir)

In [27]:
scissors_df = add_img_data(scissors_init, scissors_dir)

In [31]:
# concatenate dataframes
master = pd.concat([rock_df, paper_df, scissors_df])

In [32]:
master.tail()

Unnamed: 0,name,category,avg_red,avg_green,avg_blue,avg_bw,max_av_col_bw,min_avg_col_bw,max_avg_row_bw,min_avg_row_bw
745,aMAVOdimraDSK6P1.png,2,53.931,133.6015,80.2907,108.1165,164.01,75.9,132.55,76.66
746,kCoX1GxQ9o2ZhRy9.png,2,57.8718,140.3127,59.7198,106.3322,140.83,90.73,116.23,91.18
747,KMUCDyhYewIdDnvu.png,2,49.7314,134.3142,77.6277,107.2499,149.1,61.15,129.39,74.09
748,V14OoL0RMpvwPptc.png,2,56.2948,136.8844,73.9999,108.4191,153.43,85.0,129.89,86.07
749,44Hu6owS3pPqmyG1.png,2,58.043,133.2497,79.2878,108.0586,150.45,75.33,130.34,80.83


In [33]:
master.describe()

Unnamed: 0,category,avg_red,avg_green,avg_blue,avg_bw,max_av_col_bw,min_avg_col_bw,max_avg_row_bw,min_avg_row_bw
count,2188.0,2188.0,2188.0,2188.0,2188.0,2188.0,2188.0,2188.0,2188.0
mean,1.010969,66.55975,140.275591,82.583831,114.144575,144.335772,90.799246,135.699068,91.379721
std,0.821448,14.069843,11.189489,13.107461,10.140908,14.135544,12.199509,12.572177,15.04169
min,0.0,20.9315,93.2267,42.4163,69.9471,96.7,37.41,99.0,32.08
25%,0.0,58.5216,133.6178,73.968425,108.02095,135.56,83.285,127.6125,83.6275
50%,1.0,65.7599,136.6946,80.2622,111.034,143.115,88.485,133.205,91.995
75%,2.0,74.6608,143.863675,88.75475,117.88285,152.195,95.94,141.355,97.345
max,2.0,120.2381,183.3469,153.7115,160.1417,210.59,136.04,196.74,136.4


In [34]:
master.to_csv("master_df", index=False)