# Get the XML Docs

In [1]:
import xml.dom.minidom as minidom
request_xml = minidom.parse('xml/ImageRequest.xml')
info_xml = minidom.parse('xml/ImageInfo.xml')

# Function to upload image to BetaFaceAPI

In [2]:
def upload_image(image_filename, request_xml):
    import base64
    import requests
    import xml.dom.minidom as minidom
    
    # encode the image as base64
    image_file = open(image_filename, 'rb')
    encoded_string = base64.b64encode(image_file.read())
    
    #insert the encoded string into the xml
    request_xml.getElementsByTagName('imagefile_data')[0].firstChild.nodeValue = encoded_string
    
    #headers for the request
    headers = {
        'Content-Type': 'application/xml'
    }
    
    #send the request
    r = requests.post('http://www.betafaceapi.com/service.svc/UploadNewImage_File', data=request_xml.toxml(), headers=headers)
    
    #get the response xml as DOM
    response_xml = minidom.parseString(r.text)
    uid = response_xml.getElementsByTagName('img_uid')[0].firstChild.nodeValue
    print "Image Filename: "+image_filename+"\t UID: "+str(uid)
    
    return uid  

# Function to get the image info

In [3]:
def get_image_info(image_uid, info_xml):
    import xml.dom.minidom as minidom
    import requests
    
    # insert the UID into the xml
    info_xml.getElementsByTagName('img_uid')[0].firstChild.nodeValue = image_uid
    
    #prepare headers
    headers = {
        'Content-Type': 'application/xml'
    }
    
    #send request
    r = requests.post('http://www.betafaceapi.com/service.svc/GetImageInfo', data=info_xml.toxml(), headers=headers)
    
    return minidom.parseString(r.text)

# Function to get dictionary of features from xml

In [4]:
def get_feature_points(xml):
    import numpy as np
    
    pointInfos = xml.getElementsByTagName('PointInfo')
    
    feature_points = {}
    
    for info in pointInfos:
        name = info.getElementsByTagName('name')[0].firstChild.nodeValue
        name = name.replace(' ', '_')
        x = info.getElementsByTagName('x')[0].firstChild.nodeValue;
        y = info.getElementsByTagName('y')[0].firstChild.nodeValue;
        
        feature_points[str(name)] = {
            'x': float(x),
            'y': float(y)
        }
    return feature_points

# Function to calculate euclidean distance from feature name

In [5]:
def calculate_distance(feature_1, feature_2, feature_points):
    import numpy as np
    x = np.array([feature_points[feature_1]['x'],feature_points[feature_1]['y']])
    y = np.array([feature_points[feature_2]['x'],feature_points[feature_2]['y']])
    return np.linalg.norm(x-y)

# North East Indian Dataset

## Form the dictionary of Image UIDs

In [34]:
import glob
import json

image_filenames = glob.glob('data/normalized/asian/*.jpg')

image_uids = {}
i = 0
for image_filename in image_filenames:
    i += 1
    print "Image "+str(i)+" of "+str(len(image_filenames))
    image_uids[image_filename] = upload_image(image_filename, request_xml)

# dump uid info to file
with open('data/normalized/asian_uids.json', 'w') as fp:
    json.dump(image_uids, fp)

Image 1 of 188
Image Filename: data/normalized/asian\0001.jpg	 UID: 989519de-0394-40f5-97c3-25869a625c7b
Image 2 of 188
Image Filename: data/normalized/asian\0002.jpg	 UID: 9351f36c-36b0-4978-941e-cbbbc51d48b8
Image 3 of 188
Image Filename: data/normalized/asian\0003.jpg	 UID: 5c05e1af-4dd1-47ab-b37d-88365eea3613
Image 4 of 188
Image Filename: data/normalized/asian\0004.jpg	 UID: f8af3959-d2cf-4ee3-87dd-f75657efa40d
Image 5 of 188
Image Filename: data/normalized/asian\0005.jpg	 UID: 96cabe1f-1bef-49f8-9e32-bfe512a7cda1
Image 6 of 188
Image Filename: data/normalized/asian\0006.jpg	 UID: 7f3033d5-1923-400d-9d11-3146b2016ee9
Image 7 of 188
Image Filename: data/normalized/asian\0007.jpg	 UID: 31eb1dd7-0552-40a3-a7a5-7cf114a7105a
Image 8 of 188
Image Filename: data/normalized/asian\0008.jpg	 UID: d086b19f-4590-418a-97a6-7210ee5762d8
Image 9 of 188
Image Filename: data/normalized/asian\0009.jpg	 UID: e790e026-3352-4a48-9d3d-886b8fa2c5ce
Image 10 of 188
Image Filename: data/normalized/asian\0

## Get features points of the images

In [54]:
image_infos = {}
i = 0
length = len(image_uids.keys())
for key in image_uids.keys():
    i += 1
    print "Getting feature points for image "+str(i)+" of "+str(length)
    feature_points = get_feature_points(get_image_info(image_uids[key], info_xml))
    image_infos[key] = {
        'image_uid': image_uids[key],
        'feature_points': feature_points
    }

with open('data/normalized/asian_feature_points.json', 'w') as fp:
    json.dump(image_infos, fp)

Getting feature points for image 1 of 188
Getting feature points for image 2 of 188
Getting feature points for image 3 of 188
Getting feature points for image 4 of 188
Getting feature points for image 5 of 188
Getting feature points for image 6 of 188
Getting feature points for image 7 of 188
Getting feature points for image 8 of 188
Getting feature points for image 9 of 188
Getting feature points for image 10 of 188
Getting feature points for image 11 of 188
Getting feature points for image 12 of 188
Getting feature points for image 13 of 188
Getting feature points for image 14 of 188
Getting feature points for image 15 of 188
Getting feature points for image 16 of 188
Getting feature points for image 17 of 188
Getting feature points for image 18 of 188
Getting feature points for image 19 of 188
Getting feature points for image 20 of 188
Getting feature points for image 21 of 188
Getting feature points for image 22 of 188
Getting feature points for image 23 of 188
Getting feature poin

In [55]:
image_infos[image_infos.keys()[0]]['feature_points']

{'basic_chin_bottom': {'x': 98.46, 'y': 192.79},
 'basic_chin_left': {'x': 40.29, 'y': 131.24},
 'basic_chin_right': {'x': 155.6, 'y': 128.83},
 'basic_eye_left': {'x': 68.26, 'y': 98.97},
 'basic_eye_left_inner': {'x': 79.55, 'y': 100.61},
 'basic_eye_left_outer': {'x': 57.19, 'y': 99.63},
 'basic_eye_right': {'x': 124.76, 'y': 99.68},
 'basic_eye_right_inner': {'x': 113.96, 'y': 100.83},
 'basic_eye_right_outer': {'x': 135.76, 'y': 99.92},
 'basic_eyebrow_left': {'x': 64.25, 'y': 81.87},
 'basic_eyebrow_left_inner_corner': {'x': 79.46, 'y': 84.93},
 'basic_eyebrow_left_outer_corner': {'x': 49.73, 'y': 84.91},
 'basic_eyebrow_right': {'x': 126.61, 'y': 82.87},
 'basic_eyebrow_right_inner_corner': {'x': 111.25, 'y': 86.56},
 'basic_eyebrow_right_outer_corner': {'x': 141.89, 'y': 84.8},
 'basic_mouth_center': {'x': 98.65, 'y': 158.92},
 'basic_mouth_left': {'x': 78.04, 'y': 159.71},
 'basic_mouth_right': {'x': 118.67, 'y': 157.96},
 'basic_nose_left': {'x': 83.24, 'y': 136.16},
 'basic_

# Calculate the necessary features and build SFrame

In [69]:
import graphlab as gl
from graphlab import SFrame

image_features = {}
length = len(image_infos.keys())
i = 0
data = None
for key in image_infos.keys():
    i += 1
    feature_points = image_infos[key]['feature_points']
    features = {
        'filename': [key[22:]],
        'eye_outer': [calculate_distance('basic_eye_left_outer', 'basic_eye_right_outer', feature_points)],
        'eye_inner': [calculate_distance('basic_eye_left_inner', 'basic_eye_right_inner', feature_points)],
        'eye': [calculate_distance('basic_eye_left', 'basic_eye_right', feature_points)],
        'mouth': [calculate_distance('basic_mouth_left', 'basic_mouth_right', feature_points)],
        'nose_mouth': [calculate_distance('basic_nose_tip', 'basic_mouth_center', feature_points)],
        'eyebrow_inner': [calculate_distance('basic_eyebrow_left_inner_corner', 'basic_eyebrow_right_inner_corner', feature_points)],
        'eyebrow_outer': [calculate_distance('basic_eyebrow_left_outer_corner', 'basic_eyebrow_right_outer_corner', feature_points)],
        'chin': [calculate_distance('basic_chin_left', 'basic_chin_right', feature_points)],
        'nose': [calculate_distance('basic_nose_left', 'basic_nose_right', feature_points)],
        'chin_mouth': [calculate_distance('basic_chin_bottom', 'basic_mouth_center', feature_points)],
        'ethnicity': [1]
    }
    image_features[key] = {
        'image_uid': image_infos[key]['image_uid'],
        'features': features
    }
    
    #create the SFrame
    if data is None:
        data = SFrame(features)
    else:
        data = data.append(SFrame(features))

# Save the SFrame for later use

In [92]:
data.save('data/normalized/asian_sframe')

# Rest of India Dataset

## Form dictionary of image UIDs

In [73]:
import glob
import json

image_filenames = glob.glob('data/normalized/non-asian/*.jpg')

image_uids = {}
i = 0
for image_filename in image_filenames:
    i += 1
    print "Image "+str(i)+" of "+str(len(image_filenames))
    image_uids[image_filename] = upload_image(image_filename, request_xml)

# dump uid info to file
with open('data/normalized/non_asian_uids.json', 'w') as fp:
    json.dump(image_uids, fp)

Image 1 of 110
Image Filename: data/normalized/non-asian\0001.jpg	 UID: b97e2479-4ee7-4697-939f-76f30ea35d68
Image 2 of 110
Image Filename: data/normalized/non-asian\0002.jpg	 UID: 81269a7f-73c9-4641-a55f-aa362140ab21
Image 3 of 110
Image Filename: data/normalized/non-asian\0003.jpg	 UID: 9bdb4cdd-2028-4ddf-a17e-75668b1f2322
Image 4 of 110
Image Filename: data/normalized/non-asian\0004.jpg	 UID: 2b2cc768-b2c2-4996-bec1-3d9cfa62a45c
Image 5 of 110
Image Filename: data/normalized/non-asian\0005.jpg	 UID: 0f358b20-29c1-4ff1-af2a-922ebbcebb9e
Image 6 of 110
Image Filename: data/normalized/non-asian\0006.jpg	 UID: 4c91a0bd-3441-46b2-906f-ee51260c4ad4
Image 7 of 110
Image Filename: data/normalized/non-asian\0007.jpg	 UID: 5541702b-a581-42ec-9684-336927ed6658
Image 8 of 110
Image Filename: data/normalized/non-asian\0008.jpg	 UID: fff82027-e6cc-43c7-9597-22eb29a94b67
Image 9 of 110
Image Filename: data/normalized/non-asian\0009.jpg	 UID: 60ccee7f-bd42-4c2a-9a2d-00a9893a06f7
Image 10 of 110
Ima

## Get feature points of the images

In [87]:
image_infos = {}
i = 0
length = len(image_uids.keys())
for key in image_uids.keys():
    i += 1
    print "Getting feature points for image "+str(i)+" of "+str(length)
    feature_points = get_feature_points(get_image_info(image_uids[key], info_xml))
    image_infos[key] = {
        'image_uid': image_uids[key],
        'feature_points': feature_points
    }

with open('data/normalized/non_asian_feature_points.json', 'w') as fp:
    json.dump(image_infos, fp)

Getting feature points for image 1 of 110
Getting feature points for image 2 of 110
Getting feature points for image 3 of 110
Getting feature points for image 4 of 110
Getting feature points for image 5 of 110
Getting feature points for image 6 of 110
Getting feature points for image 7 of 110
Getting feature points for image 8 of 110
Getting feature points for image 9 of 110
Getting feature points for image 10 of 110
Getting feature points for image 11 of 110
Getting feature points for image 12 of 110
Getting feature points for image 13 of 110
Getting feature points for image 14 of 110
Getting feature points for image 15 of 110
Getting feature points for image 16 of 110
Getting feature points for image 17 of 110
Getting feature points for image 18 of 110
Getting feature points for image 19 of 110
Getting feature points for image 20 of 110
Getting feature points for image 21 of 110
Getting feature points for image 22 of 110
Getting feature points for image 23 of 110
Getting feature poin

## Calculate necessary features and build SFrame

In [95]:
import graphlab as gl
from graphlab import SFrame

image_features = {}
length = len(image_infos.keys())
i = 0
data2 = None
for key in image_infos.keys():
    print key
    i += 1
    feature_points = image_infos[key]['feature_points']
    features = {
        'filename': [key[26:]],
        'eye_outer': [calculate_distance('basic_eye_left_outer', 'basic_eye_right_outer', feature_points)],
        'eye_inner': [calculate_distance('basic_eye_left_inner', 'basic_eye_right_inner', feature_points)],
        'eye': [calculate_distance('basic_eye_left', 'basic_eye_right', feature_points)],
        'mouth': [calculate_distance('basic_mouth_left', 'basic_mouth_right', feature_points)],
        'nose_mouth': [calculate_distance('basic_nose_tip', 'basic_mouth_center', feature_points)],
        'eyebrow_inner': [calculate_distance('basic_eyebrow_left_inner_corner', 'basic_eyebrow_right_inner_corner', feature_points)],
        'eyebrow_outer': [calculate_distance('basic_eyebrow_left_outer_corner', 'basic_eyebrow_right_outer_corner', feature_points)],
        'chin': [calculate_distance('basic_chin_left', 'basic_chin_right', feature_points)],
        'nose': [calculate_distance('basic_nose_left', 'basic_nose_right', feature_points)],
        'chin_mouth': [calculate_distance('basic_chin_bottom', 'basic_mouth_center', feature_points)],
        'ethnicity': [0]
    }
    image_features[key] = {
        'image_uid': image_infos[key]['image_uid'],
        'features': features
    }
    
    #create the SFrame
    if data2 is None:
        data2 = SFrame(features)
    else:
        data2 = data2.append(SFrame(features))

data/normalized/non-asian\0008.jpg
data/normalized/non-asian\0082.jpg
data/normalized/non-asian\0024.jpg
data/normalized/non-asian\0050.jpg
data/normalized/non-asian\0055.jpg
data/normalized/non-asian\0038.jpg
data/normalized/non-asian\0015.jpg
data/normalized/non-asian\0068.jpg
data/normalized/non-asian\0075.jpg
data/normalized/non-asian\0019.jpg
data/normalized/non-asian\0092.jpg
data/normalized/non-asian\0090.jpg
data/normalized/non-asian\0003.jpg
data/normalized/non-asian\0017.jpg
data/normalized/non-asian\0084.jpg
data/normalized/non-asian\0037.jpg
data/normalized/non-asian\0028.jpg
data/normalized/non-asian\0094.jpg
data/normalized/non-asian\0044.jpg
data/normalized/non-asian\0027.jpg
data/normalized/non-asian\0095.jpg
data/normalized/non-asian\0048.jpg
data/normalized/non-asian\0063.jpg
data/normalized/non-asian\0020.jpg
data/normalized/non-asian\0072.jpg
data/normalized/non-asian\0013.jpg
data/normalized/non-asian\0014.jpg
data/normalized/non-asian\0085.jpg
data/normalized/non-

## Save the SFrame for later use

In [None]:
data2.save('data/normalized/non_asian_sframe')

# Hispanic Dataset

## Form the dictionary of image UIDs

In [6]:
import glob
import json

image_filenames = glob.glob('data/normalized/hispanic/*.jpg')

image_uids = {}
i = 0
for image_filename in image_filenames:
    i += 1
    print "Image "+str(i)+" of "+str(len(image_filenames))
    image_uids[image_filename] = upload_image(image_filename, request_xml)

# dump uid info to file
with open('data/normalized/hispanic_uids.json', 'w') as fp:
    json.dump(image_uids, fp)

Image 1 of 48
Image Filename: data/normalized/hispanic\0001.jpg	 UID: 52aa3da6-a35a-488e-b7c6-40801c3d318d
Image 2 of 48
Image Filename: data/normalized/hispanic\0002.jpg	 UID: afbcad01-a6c9-4a16-adad-c917da95c875
Image 3 of 48
Image Filename: data/normalized/hispanic\0003.jpg	 UID: 0ffd1640-21f1-49e5-8173-efaf15e64687
Image 4 of 48
Image Filename: data/normalized/hispanic\0004.jpg	 UID: 623f0840-e6f8-4e1c-9731-c65642c8ffa7
Image 5 of 48
Image Filename: data/normalized/hispanic\0005.jpg	 UID: 683b9c30-c9ad-499b-bca2-ea7a8bb70c9f
Image 6 of 48
Image Filename: data/normalized/hispanic\0006.jpg	 UID: 621cd6ed-4ff0-4082-93a1-36a5bc241202
Image 7 of 48
Image Filename: data/normalized/hispanic\0007.jpg	 UID: f50705be-169c-487a-b221-7346018376fc
Image 8 of 48
Image Filename: data/normalized/hispanic\0008.jpg	 UID: d19ff0d7-f012-4f0a-974f-9ed973d58425
Image 9 of 48
Image Filename: data/normalized/hispanic\0009.jpg	 UID: 81694156-811c-43e0-9b54-63281ad4a307
Image 10 of 48
Image Filename: data/n

## Get feature points of the images

In [7]:
image_infos = {}
i = 0
length = len(image_uids.keys())
for key in image_uids.keys():
    i += 1
    print "Getting feature points for image "+str(i)+" of "+str(length)
    feature_points = get_feature_points(get_image_info(image_uids[key], info_xml))
    image_infos[key] = {
        'image_uid': image_uids[key],
        'feature_points': feature_points
    }

with open('data/normalized/hispanic_feature_points.json', 'w') as fp:
    json.dump(image_infos, fp)

Getting feature points for image 1 of 48
Getting feature points for image 2 of 48
Getting feature points for image 3 of 48
Getting feature points for image 4 of 48
Getting feature points for image 5 of 48
Getting feature points for image 6 of 48
Getting feature points for image 7 of 48
Getting feature points for image 8 of 48
Getting feature points for image 9 of 48
Getting feature points for image 10 of 48
Getting feature points for image 11 of 48
Getting feature points for image 12 of 48
Getting feature points for image 13 of 48
Getting feature points for image 14 of 48
Getting feature points for image 15 of 48
Getting feature points for image 16 of 48
Getting feature points for image 17 of 48
Getting feature points for image 18 of 48
Getting feature points for image 19 of 48
Getting feature points for image 20 of 48
Getting feature points for image 21 of 48
Getting feature points for image 22 of 48
Getting feature points for image 23 of 48
Getting feature points for image 24 of 48
G

## Calculate necessary features and build SFrame


In [10]:
import graphlab as gl
from graphlab import SFrame

image_features = {}
length = len(image_infos.keys())
i = 0
data3 = None
for key in image_infos.keys():
    print key
    i += 1
    feature_points = image_infos[key]['feature_points']
    features = {
        'filename': [key[26:]],
        'eye_outer': [calculate_distance('basic_eye_left_outer', 'basic_eye_right_outer', feature_points)],
        'eye_inner': [calculate_distance('basic_eye_left_inner', 'basic_eye_right_inner', feature_points)],
        'eye': [calculate_distance('basic_eye_left', 'basic_eye_right', feature_points)],
        'mouth': [calculate_distance('basic_mouth_left', 'basic_mouth_right', feature_points)],
        'nose_mouth': [calculate_distance('basic_nose_tip', 'basic_mouth_center', feature_points)],
        'eyebrow_inner': [calculate_distance('basic_eyebrow_left_inner_corner', 'basic_eyebrow_right_inner_corner', feature_points)],
        'eyebrow_outer': [calculate_distance('basic_eyebrow_left_outer_corner', 'basic_eyebrow_right_outer_corner', feature_points)],
        'chin': [calculate_distance('basic_chin_left', 'basic_chin_right', feature_points)],
        'nose': [calculate_distance('basic_nose_left', 'basic_nose_right', feature_points)],
        'chin_mouth': [calculate_distance('basic_chin_bottom', 'basic_mouth_center', feature_points)],
        'ethnicity': [2]
    }
    image_features[key] = {
        'image_uid': image_infos[key]['image_uid'],
        'features': features
    }
    
    #create the SFrame
    if data3 is None:
        data3 = SFrame(features)
    else:
        data3 = data3.append(SFrame(features))

data/normalized/hispanic\0016.jpg
data/normalized/hispanic\0019.jpg
data/normalized/hispanic\0044.jpg
data/normalized/hispanic\0007.jpg
data/normalized/hispanic\0035.jpg
data/normalized/hispanic\0047.jpg
data/normalized/hispanic\0041.jpg
data/normalized/hispanic\0034.jpg
data/normalized/hispanic\0008.jpg
data/normalized/hispanic\0021.jpg
data/normalized/hispanic\0048.jpg
data/normalized/hispanic\0014.jpg
data/normalized/hispanic\0012.jpg
data/normalized/hispanic\0040.jpg
data/normalized/hispanic\0009.jpg
data/normalized/hispanic\0024.jpg
data/normalized/hispanic\0022.jpg
data/normalized/hispanic\0045.jpg
data/normalized/hispanic\0020.jpg
data/normalized/hispanic\0002.jpg
data/normalized/hispanic\0029.jpg
data/normalized/hispanic\0013.jpg
data/normalized/hispanic\0006.jpg
data/normalized/hispanic\0036.jpg
data/normalized/hispanic\0030.jpg
data/normalized/hispanic\0001.jpg
data/normalized/hispanic\0010.jpg
data/normalized/hispanic\0043.jpg
data/normalized/hispanic\0026.jpg
data/normalize

## Save the SFrame for later use

In [14]:
data3.save('data/normalized/hispanic_sframe')

# Combine the 2 SFrames and shuffle

In [102]:
asian_data, asian_temp = data.random_split(0.665, seed=5)
features_sframe = asian_data.append(data2)
print len(asian_data)
print len(data2)
print len(features_sframe)

118
110
228


In [108]:
train_data, test_data = features_sframe.random_split(.9, seed=5)
test_data = test_data.append(asian_temp)
print len(train_data)
print len(test_data)
print len(test_data[test_data['ethnicity'] == 1])

200
98
87


In [109]:
train_data.save('data/train_data_sframe')
test_data.save('data/test_date_sframe')