# Adding dummy var features regarding ellipses

>1. Load original data
>2. Create a function for 0 dummy var
>3. Create a function for 1 dummy var
>4. Create a function for 6 dummy var
>5. Create a function for 8 dummy var
>6. Create a function for 9 dummy var
>7. Load intermediate data and append onto that

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.svm as skl_svm
import sklearn.cross_validation as skl_cv
import seaborn as sns
sns.set_context(context = 'notebook', font_scale=1.5)
import os
import sys
import cv2

base_path = '/home/lundi/Python/MNIST/'
sys.path.append(base_path + '/libraries/')

import MNIST_data_processor as mdp

MNIST_data_processor = mdp.MNIST_data_processor()

from matplotlib.patches import Ellipse

## 1. Load original data

In [2]:
X, y = MNIST_data_processor.load_subset_data()

dummy_vars = pd.DataFrame()

## 2. Create a function for 0 dummy var

In [6]:
def convert_to_image(data):
    img = np.zeros((28, 28,3))
    img[:,:,0] = data.reshape(28,28)
    img[:,:,1] = data.reshape(28,28)
    img[:,:,2] = data.reshape(28,28)
    
    img = img.astype('uint8')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    return img

def get_valid_contours(digit_data):
    image = convert_to_image(digit_data)
    
    ret,thresh = cv2.threshold(image,127,255,0)
    contours,hierarchy = cv2.findContours(thresh, 1, 2)

    valid_contours = []
    #Get the valid contours
    for cnt in contours:
        if len(cnt) >= 5:
            valid_contours.append(cnt)
    return valid_contours

In [7]:
def get_distance_between_ellipses(ellipse_1, ellipse_2):
    return np.sqrt(((ellipse_1[0][0] - ellipse_2[0][0]) ** 2) + ((ellipse_1[0][1] - ellipse_2[0][1]) ** 2))

In [52]:
def get_center_to_center_dist(valid_contours):
        
    ellipse = []
    ellipse_1 = cv2.fitEllipse(valid_contours[0])
    ellipse_2 = cv2.fitEllipse(valid_contours[1])

    distance = get_distance_between_ellipses(ellipse_1, ellipse_2)

    return distance
    

In [14]:
def get_ratio_of_areas_big_to_small(valid_contours):
        
    if len(valid_contours) == 2:
        ellipse = []
        ellipse_1 = cv2.fitEllipse(valid_contours[0])
        ellipse_2 = cv2.fitEllipse(valid_contours[1])
                
        area_ellipse_1, area_ellipse_2 = (ellipse_1[1][0] * ellipse_1[1][1] * np.pi), (ellipse_2[1][0] * ellipse_2[1][1] * np.pi)
        
        ratio_of_areas = 1.0 * area_ellipse_1 / area_ellipse_2
        if ratio_of_areas < 1:
            ratio_of_areas = 1.0 / ratio_of_areas
        return ratio_of_areas
    else:
        return -1

In [23]:
def get_0_dummy_var(digit_data):
    
    valid_contours = get_valid_contours(digit_data)
    if len(valid_contours) == 2:
        center_to_center_distance = get_center_to_center_dist(valid_contours)

        if center_to_center_distance < 1.4:
            ratio_of_area_big_to_small = get_ratio_of_areas_big_to_small(valid_contours)

            if ratio_of_area_big_to_small < 3:
                return 1
    return 0

In [34]:
dummy_var_0 = X.apply(get_0_dummy_var, axis=1)
dummy_vars = pd.DataFrame(dummy_var_0)
dummy_vars = dummy_vars.rename(columns = {0: 'dummy_var_0'})

## 3. Create a function for 1 dummy var

In [47]:
def get_aspect_ratio(ellipse):
    aspect_ratio = ellipse[1][1] / ellipse[1][0]
    return aspect_ratio

def get_aspect_ratio_of_single_ellipses(valid_contours):
        
    if len(valid_contours) == 1:
        ellipse_1 = cv2.fitEllipse(valid_contours[0])
                        
        return get_aspect_ratio(ellipse_1)
    else:
        return -1

def get_1_dummy_var(digit_data):
    
    valid_contours = get_valid_contours(digit_data)

    if len(valid_contours) == 1:
        aspect_ratio = get_aspect_ratio_of_single_ellipses(valid_contours)

        if aspect_ratio > 3.6:
            return 1
    return 0

In [56]:
dummy_var_1 = X.apply(get_1_dummy_var, axis=1)
dummy_vars['dummy_var_1'] = dummy_var_1

## 4. Create a function for 6 dummy var

In [54]:
def get_ratio_of_areas_top_to_bottom(valid_contours):
    
    ellipse = []
    ellipse_1 = cv2.fitEllipse(valid_contours[0])
    ellipse_2 = cv2.fitEllipse(valid_contours[1])

    area_ellipse_1, area_ellipse_2 = (ellipse_1[1][0] * ellipse_1[1][1] * np.pi), (ellipse_2[1][0] * ellipse_2[1][1] * np.pi)

    is_1_on_top = ellipse_1[0][1] < ellipse_2[0][1]
    if is_1_on_top:
        ratio_of_areas = area_ellipse_1 / area_ellipse_2
    else:
        ratio_of_areas = area_ellipse_2 / area_ellipse_1
    return ratio_of_areas

    
def get_6_dummy_var(digit_data):
    valid_contours = get_valid_contours(digit_data)
    
    if len(valid_contours) == 2:
        ratio_of_area_top_to_bottom = get_ratio_of_areas_top_to_bottom(valid_contours)
        
        if ratio_of_area_top_to_bottom > 2:
            center_to_center_dist = get_center_to_center_dist(valid_contours)
            
            if 2 < center_to_center_dist < 7:
                return 1
    return 0

In [57]:
dummy_var_6 = X.apply(get_6_dummy_var, axis=1)
dummy_vars['dummy_var_6'] = dummy_var_6

## 5. Create a function for 8 dummy var

## 6. Create a function for 9 dummy var

In [60]:
def get_9_dummy_var(digit_data):
    valid_contours = get_valid_contours(digit_data)
    
    if len(valid_contours) == 2:
        ratio_of_area_top_to_bottom = get_ratio_of_areas_top_to_bottom(valid_contours)
        
        if ratio_of_area_top_to_bottom < 2:
            center_to_center_dist = get_center_to_center_dist(valid_contours)
            
            if 1 < center_to_center_dist < 6:
                return 1
    return 0

In [61]:
dummy_var_9 = X.apply(get_9_dummy_var, axis=1)
dummy_vars['dummy_var_9'] = dummy_var_9

## 7. Load intermediate data and append onto that

In [62]:
intermediate_data = pd.read_csv('./../../data/intermediate/train_with_ellipse_count.csv')

In [67]:
data_ellipses = pd.concat([intermediate_data, dummy_vars], axis=1)

In [72]:
data_ellipses = data_ellipses[[u'label', u'ellipse_count',
        u'dummy_var_0', u'dummy_var_1', u'dummy_var_6',
       u'dummy_var_9']]

In [73]:
data_ellipses.to_csv('./../../data/processed/data_ellipses.csv', index=False)