# Project Oxford: Computer Vision API example

### This Jupyter notebook shows you how to get started with the Project Oxford <b>Computer Vision API</b> in Python, and how to visualize your results.

To use this notebook, you will need to get keys to <b>Computer Vision API</b>. Visit <a href="http://www.projectoxford.ai/vision">www.projectoxford.ai/vision</a>, and then the “Try for free” button. On the “Sign in” page, use your Microsoft account to sign in and you will be able to subscribe to Computer Vision API and get free keys (Code of Conduct and TOS). After completing the sign-up process, paste your key into the variables section below. (Either the primary or the secondary key works.)

In [1]:
import time 
import requests
# import cv2
# import operator
import numpy as np
from __future__ import print_function

# Import library to display results
import matplotlib.pyplot as plt
%matplotlib inline 
# Display images within Jupyter

import pickle
from pymongo import MongoClient
import pprint
import json as json_lib
import os
import Queue
import threading





In [2]:
# Variables
# used to be _url and _key
_cvurl = 'https://api.projectoxford.ai/vision/v1.0/analyze'
_faceurl = 'https://api.projectoxford.ai/face/v1.0/detect'
_emotiurl = 'https://api.projectoxford.ai/emotion/v1.0/recognize'
_cvkey ="insert_key_here"
_emotikey = "insert_key_here" 
_facekey = "insert_key_here" 
_maxNumRetries = 5
client = MongoClient('localhost:27017')
db = client.Teenie
curr_time = int(time.time())
num_requests_out = 0
api_rezults = {}
advanced_face_data = None
with open( "/Volumes/HueyFreeman/teenie-faces-1600.json", 'rb' ) as f:
    advanced_face_data = eval(f.read())
print(len(advanced_face_data))

59278


## Helper functions

In [3]:
def processRequest(request_type ,json, data, headers, params = None ):

    """
    Helper function to process the request to Project Oxford

    Parameters:
    json: Used when processing images from its URL. See API Documentation
    data: Used when processing image read from disk. See API Documentation
    headers: Used to pass the key information and the data type request
    """

    retries = 0
    result = None
    if request_type not in ["cv", "face", "emotion"] : return result
    while True:
        if request_type == "cv" :
            _url = _cvurl
            response = requests.request( 'post', _url, json = json, data = data, headers = headers, params = params )
        elif request_type == "face":
            _url = _faceurl
            response = requests.request( 'post', _url, json = json, data = data, headers = headers, params = params )
        else:
            _url = _emotiurl
            response = requests.request( 'post', _url, json = json, data = data, headers = headers)
            
            
        if response.status_code == 429: 

            print( "Message: %s" % ( response.json()) )

            if retries <= _maxNumRetries: 
                time.sleep(1) 
                retries += 1
                continue
            else: 
                print( 'Error: failed after retrying!' )
                break

        elif response.status_code == 200 or response.status_code == 201:

            if 'content-length' in response.headers and int(response.headers['content-length']) == 0: 
                result = None 
            elif 'content-type' in response.headers and isinstance(response.headers['content-type'], str): 
                if 'application/json' in response.headers['content-type'].lower(): 
                    result = response.json() if response.content else None 
                elif 'image' in response.headers['content-type'].lower(): 
                    result = response.content
        else:
            print( "Error code: %d" % ( response.status_code ) )
            print( "Message: %s" % ( response.json()) )

        break
        
    return result

## Analysis of an image stored on disk

In [4]:

# Load raw image file into memory
def loadInDB(path):
    data = None 
    pathToFileInDisk = path
    if path == "!":
        print("failed")
        return
    with open( pathToFileInDisk, 'rb' ) as f:
        data = f.read()

    # Computer Vision parameters
    cvparams = { 'visualFeatures' : 'Categories,Tags,Description,Faces'} 
    faceparams = {'returnFaceId' : True, 'returnFaceLandmarks' : True, 'returnFaceAttributes':'age,gender,headPose,smile,facialHair,glasses'}
    #emotion api does not take parameters
    headers = dict()
    headers['Ocp-Apim-Subscription-Key'] = _cvkey
    headers['Content-Type'] = 'application/octet-stream'

    json = None

    result1 = processRequest("cv", json, data, headers, cvparams )
    headers['Ocp-Apim-Subscription-Key'] = _facekey
    ending = pathToFileInDisk[pathToFileInDisk.index("Box"):]
    i=0
    while (not advanced_face_data[i]['file'].endswith(ending)):
        i+=1
    photo_has_faces = len(advanced_face_data[i]['faces']) > 0
    if result1 is not None:
        if photo_has_faces:
            result2 = processRequest("face", json, data, headers, faceparams )
            headers['Ocp-Apim-Subscription-Key'] = _emotikey
            result3 = processRequest("emotion", json, data, headers)
        else:
            result2 = None
            result3 = None
    else:
        with open('/Volumes/HueyFreeman/microsoftlog.txt', 'a') as the_file:
                the_file.write('Photo unwritten: ' + path + '\n')

    #make a log of anything that results in none so that it can be retried
    if (result1 is not None):
        # pp = pprint.PrettyPrinter(depth=6,indent=4)
        # pp.pprint(result1)

        #Takes everything out of unicode
        result1 = eval(json_lib.dumps(result1))
        if result2 and result3: 
            result2 = eval(json_lib.dumps(result2))
            result3 = eval(json_lib.dumps(result3))

        new_data_point = {
                "path": pathToFileInDisk,
        }
        new_data_point["advancedFaceDetected"] = advanced_face_data[i]["faces"]
        if ('description' in result1) and ('captions' in result1['description']):
            cap_concatenated = " "
            for caption in result1['description']['captions']:
                cap_concatenated = caption['text'] + " " + cap_concatenated
            new_data_point["caption"] = cap_concatenated
        
        if ('description' in result1) and ('tags' in result1['description']):
            for tag in result1['description']['tags']:
                new_data_point[tag] = True
        if ('categories' in result1):
            for ind in result1['categories']:
                new_data_point[ind['name']] = ind['score']
        k = 0  
        try:
            obj_id = db.Photos.insert(new_data_point)
            obj_id_str = repr(obj_id).split("'")[1]  
        except:
            with open('/Volumes/HueyFreeman/microsoftlog.txt', 'a') as the_file:
                the_file.write('Photo unwritten: ' + path + '\n')
                the_file.write(repr(new_data_point))

        # type(result1['description']['tags'][1])
        # altered = eval(json.dumps(result1))
        # pp.pprint(eval(json.dumps(result3)))
        # pp.pprint(eval(json.dumps(result2)))
        #make one json for the face details
        if result3 and result2:
            for face in result3:
                for compare_face in result2:
                    try:
                        if ((face['faceRectangle']['left'] == compare_face['faceRectangle']['left']) and
                            (face['faceRectangle']['top'] == compare_face['faceRectangle']['top'])):
                            compare_face['scores'] = face['scores']
                    except:
                        with open('/Volumes/HueyFreeman/microsoftlog.txt', 'a') as the_file:
                             the_file.write('Erroneneous face data in photo: ' + path + '\n')
            face_obj_id_list = []

            for ind in result2:
                new_face_point = {} #instead of result2 set it equal to a list of object ids returned from mongodb
                new_face_point['photoId'] = obj_id
                try:
                    for dim in ind['faceRectangle']:
                        new_face_point[dim] = ind['faceRectangle'][dim]
                except:
                    pass
                try:
                    for emotion in ind['scores']:
                        new_face_point[emotion] = ind['scores'][emotion]
                except:
                    pass
                try:
                    for marks in ind['faceLandmarks']:
                        new_face_point[marks] = ind['faceLandmarks'][marks]
                except:
                    pass
                try:
                    new_face_point['age'] = ind['faceAttributes']['age']
                    new_face_point['beard'] = ind['faceAttributes']['facialHair']['beard']
                    new_face_point['moustache'] = ind['faceAttributes']['facialHair']['moustache']
                    new_face_point['sideburns'] = ind['faceAttributes']['facialHair']['sideburns']
                    new_face_point['gender'] = ind['faceAttributes']['gender']
                    new_face_point['smile'] = ind['faceAttributes']['smile']
                    new_face_point['headPose_pitch'] = ind['faceAttributes']['headPose']['pitch']
                    new_face_point['headPose_roll'] = ind['faceAttributes']['headPose']['roll']
                    new_face_point['headPose_yaw'] = ind['faceAttributes']['headPose']['yaw']
                except:
                    pass
                try:
                    face_obj_id = db.Faces.insert(new_face_point)
                    face_obj_id_list.append(face_obj_id)
                except:
                    try:
                        face_obj_id = db.Faces.insert(new_face_point)
                        face_obj_id_list.append(face_obj_id)
                    except:
                         with open('/Volumes/HueyFreeman/microsoftlog.txt', 'a') as the_file:
                            the_file.write('Face unwritten in photo: ' + path + '\n')
                            the_file.write(repr(new_face_point))
            try:
            # update a data point with the list of faces
                new = db.Photos.update({
                  '_id': obj_id
                },{
                  '$set': {
                    'face_data': face_obj_id_list
                  }
                }, upsert=False, multi=False)
                return 1
            except:
                with open('/Volumes/HueyFreeman/microsoftlog.txt', 'a') as the_file:
                    the_file.write('Faces list not updated in : '+ path + '\n')
                    the_file.write(repr(face_obj_id_list))
        else:
            print("Avoided unnecessary calls to api " + pathToFileInDisk)
            
    else: 
        print(pathToFileInDisk)
        with open('/Volumes/HueyFreeman/microsoftlog.txt', 'a') as the_file:
             the_file.write('Error in : '+ path + '\n')
        return 0

In [5]:

exitFlag = 0

class myThread (threading.Thread):
    def __init__(self, threadID, name, q):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.q = q
    def run(self):
        print( "Starting " + self.name)
        process_data(self.name, self.q)
        print( "Exiting " + self.name)

def process_data(threadName, q):
    while not exitFlag:
        queueLock.acquire()
        if not workQueue.empty():
            data = q.get() #pulling out of namelist
            queueLock.release()
            loadInDB(data)
            print( "%s processing %s" % (threadName, data))
        else:
            queueLock.release()
        time.sleep(1)
#70,100 ...70 through 100 and 118 through 197
threadList = ["Thread-"+str(i) for i in xrange(12)] #maybe try only up to 72 max when you know it works
pathList = []
for i in xrange(80,85): #do (80, 100)
    cur_dir = "/Volumes/HueyFreeman/Teenie_Harris_PNG1024/Box_0" + str(i)
    for root, dirs, files in os.walk(cur_dir):
        pathList += [os.path.join(root, file) if file.endswith(".png") else "!" for file in files]
        #Give it the signal to abort in function if the path name isn't valid
queueLock = threading.Lock()
workQueue = Queue.Queue() #you want it to run about 100 times faster, or 600 calls per minute max
threads = []
threadID = 1


# Create new threads
for tName in threadList:
    thread = myThread(threadID, tName, workQueue)
    thread.start()
    threads.append(thread)
    threadID += 1

# Fill the queue
queueLock.acquire()
for the_path in pathList:
    workQueue.put(the_path)
queueLock.release()
print("workQueueLength = " + str(workQueue.qsize))

#something about having a long queue that won't allow it to run faster????

# Wait for queue to empty
while not workQueue.empty():
    pass

# Notify threads it's time to exit
exitFlag = 1

# Wait for all threads to complete
for t in threads:
    t.join()
print("Exiting Main Thread")

Starting Thread-0
Starting Thread-1
Starting Thread-2
Starting Thread-3
Starting Thread-4
Starting Thread-5
Starting Thread-6
Starting Thread-7
Starting Thread-8
Starting Thread-9
Starting Thread-10
Starting Thread-11
workQueueLength = <bound method Queue.qsize of <Queue.Queue instance at 0x19e9b6758>>
Message: {u'message': u'Rate limit is exceeded. Try again in 1 seconds.', u'statusCode': 429}
Message: {u'message': u'Rate limit is exceeded. Try again in 1 seconds.', u'statusCode': 429}
Avoided unnecessary calls to api /Volumes/HueyFreeman/Teenie_Harris_PNG1024/Box_080/12735.png
Thread-6 processing /Volumes/HueyFreeman/Teenie_Harris_PNG1024/Box_080/12735.png
Error code: 500
Message: {u'message': u'Internal server error.', u'code': u'InternalServerError', u'requestId': u'4e0e9968-00c6-465c-8379-34f25d6abe15'}
/Volumes/HueyFreeman/Teenie_Harris_PNG1024/Box_080/12731.png
Thread-10 processing /Volumes/HueyFreeman/Teenie_Harris_PNG1024/Box_080/12731.png
Thread-8 processing /Volumes/HueyFree

In [None]:
ex_b="/Users/zariahoward/Desktop/Box_085/"
for i in xrange(64,100):
    cur_dir = "/Volumes/HueyFreeman/Teenie_Harris_PNG1024/Box_0" + str(i)
    for root, dirs, files in os.walk(cur_dir):
        for file in files:
            if file.endswith(".png"):
                print(os.path.join(root, file))
                print(loadInDB(os.path.join(root, file))) 

# print(db.Photos.find_one(obj_id)) #you must retrieve by the object id returned to you

'''
>> import pymongo
>>> conn = pymongo.MongoClient()
>>> db = conn.test #test is my database
>>> col = db.spam #Here spam is my collection
>>> cur = col.find()  
>>> cur
<pymongo.cursor.Cursor object at 0xb6d447ec>
>>> for doc in cur:
...     print(doc)
... 
{'a': 1, '_id': ObjectId('54ff30faadd8f30feb90268f'), 'b': 2}
{'a': 1, 'c': 3, '_id': ObjectId('54ff32a2add8f30feb902690'), 'b': 2}
'''

In [None]:
for i in xrange(55,56):
    cur_dir = "/Volumes/HueyFreeman/Teenie_Harris_PNG1024/Box_0" + str(i)
    for root, dirs, files in os.walk(cur_dir):
        for file in files:
            if file.endswith(".png"):
                if (int(file[0:4]) > 7377):
                    print(os.path.join(root, file))
                    print(loadInDB(os.path.join(root, file)))