# Computer Vision API Diversity Audit

In [1]:
import io
import os
import json
from PIL import Image
import requests
from time import sleep
import csv

from google.cloud import vision
from google.cloud.vision import types

import boto3

from clarifai.rest import ClarifaiApp
from clarifai.rest import Image as ClImage

from azure.cognitiveservices.vision.computervision import ComputerVisionAPI

In [2]:
class TestSuite():

    def __init__(self, config):
                
        self.configs = json.load(open(config))
        
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=self.configs['google']
            
    def analyze_image(self, file_name):
        
        test_result = {}
        
        # Google
        client = vision.ImageAnnotatorClient()

        with io.open(file_name, 'rb') as image_file:
            content = image_file.read()

        image = types.Image(content=content)
        
        response = client.label_detection(image=image, max_results=100, timeout=5)
        labels = {label.description:label.score for label in response.label_annotations}
        
        test_result['google'] = labels
        
        # Amazon
        client=boto3.client('rekognition')

        image = Image.open(file_name)
        stream = io.BytesIO()
        image.save(stream,format="JPEG")
        image_binary = stream.getvalue()

        response = client.detect_labels(Image={'Bytes':image_binary}, MaxLabels=10)
        
        labels = {label['Name']:label['Confidence'] for label in response['Labels']}

        test_result['amazon'] = labels
        
        # Clarifai
        app = ClarifaiApp(api_key=self.configs['clarifai'])

        model = app.models.get('general-v1.3')
        image = ClImage(file_obj=open(file_name, 'rb'))
        response = model.predict([image])
        
        labels = {item['name']:item['value'] for item in response['outputs'][0]['data']['concepts']}
        
        test_result['clarifai'] = labels
        
        # Microsoft
        pathToFileInDisk = file_name
        with open( pathToFileInDisk, 'rb' ) as f:
            data = f.read()

        params = { 'visualFeatures' : 'Categories,Tags,Description'} 

        headers = dict()
        headers['Ocp-Apim-Subscription-Key'] = self.configs['microsoft']['key']
        headers['Content-Type'] = 'application/octet-stream'

        json = None
        
        _region = self.configs['microsoft']['region']
        _url = 'https://{}.api.cognitive.microsoft.com/vision/v2.0/analyze'.format(_region)
        _key = self.configs['microsoft']['key']
        _maxNumRetries = 100

        response = requests.request('post', _url, json = json, data = data, headers = headers, params = params )
        
        labels = {item['name']:item['confidence'] for item in response.json()['tags']}
        
        test_result['microsoft'] = labels
        
        return test_result
        

- Google Cloud Vision API
- Amazon Rekognition
- Clarifai
- Microsoft Azure Computer Vision API
- IBM Watson Visual Recognition

## Loop it up!

In [3]:
directory = 'Doctor'

In [None]:
results = {}

male_results = {}

male = []
mb = []
msa = []
msea = []
mw = []

female_results = {}

female = []
fb = []
fsa = []
fsea = []
fw = []

test_suite = TestSuite('config.json')
#skipped_photos = []
#bad_photos = []

for subdir, dirs, files in os.walk(directory):    
    
    if len(files) == 0 or (len(files) == 1 and '.DS' in files[0]):
        continue
        
    print(subdir)
    for file in files:
        
        if '.png' not in file:
            continue
        
        result = test_suite.analyze_image(os.path.abspath(os.path.join(subdir, file)))
        
        substrings = subdir.split('/', 3)
        
        if substrings[1] == 'Male':
            
            if substrings[2] == 'Black':
                mb.append(result)
            elif substrings[2] == 'South Asian':
                msa.append(result)
            elif substrings[2] == 'South-East Asian':
                msea.append(result)
            else:
                mw.append(result)
        else:
                        
            if substrings[2] == 'Black':
                fb.append(result)
            elif substrings[2] == 'South Asian':
                fsa.append(result)
            elif substrings[2] == 'South-East Asian':
                fsea.append(result)
            else:
                fw.append(result)
        print('yay')
        
male_results['Black'] = mb
male_results['South Asian'] = msa
male_results['South-East Asian'] = msea
male_results['White'] = mw

female_results['Black'] = fb
female_results['South Asian'] = fsa
female_results['South-East Asian'] = fsea
female_results['White'] = fw

results['Male'] = male_results
results['Female'] = female_results

In [5]:
with open(directory + '.csv', 'w') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['google_score', 'amazon_score', 'clarifai_score', 'class'])
    for key1, value1 in results.items():
        for key2, value2 in results[key1].items():
            
            for item in results[key1][key2]:
            
                if 'physician' in item['google']:
                    google_score = item['google']['physician']
                else:
                    google_score = 0.5

                if 'Doctor' in item['amazon']:
                    amazon_score = item['amazon']['Doctor']
                else:
                    amazon_score = 50

                if 'doctor' in item['clarifai']:
                    clarifai_score = item['clarifai']['doctor']
                else:
                    clarifai_score = 0.5

                writer.writerow([google_score, amazon_score, clarifai_score, key1 + '_' + key2])