# PetFinder API Tests

Using the PetFinder API, get data about pets for analysis. Data comes in a messy JSON.

### Imports

In [81]:
import requests
import json
import pandas as pd
from pandas.io.json import json_normalize
from IPython.display import Image, display
from IPython.core.display import HTML
import re
import os.path

### Constants


In [82]:
with open('./hidden/PETFINDER_API_KEY.txt') as f:
    KEY = f.read().replace('\n', '')

### Test API Call: Getting a random pet

In [83]:
parameters = {"key": KEY,
              "format": "json",
              "output": "full",
              "location": "02472",
              "animal": "dog"}
response = requests.get('http://api.petfinder.com/pet.getRandom',
                        params=parameters)

In [84]:
print(response.content)

{"@encoding":"iso-8859-1","@version":"1.0","petfinder":{"pet":{"options":{},"status":{"$t":"A"},"contact":{"phone":{"$t":"706-541-4077  "},"state":{"$t":"GA"},"address2":{},"email":{"$t":"rfranks@columbiacountyga.gov"},"city":{"$t":"Appling"},"zip":{"$t":"30802"},"fax":{},"address1":{"$t":"1940 William Few Parkway"}},"age":{"$t":"Adult"},"size":{"$t":"M"},"media":{"photos":{"photo":[{"@size":"pnt","$t":"http://photos.petfinder.com/photos/pets/41163550/1/?bust=1520908063&width=60&-pnt.jpg","@id":"1"},{"@size":"fpm","$t":"http://photos.petfinder.com/photos/pets/41163550/1/?bust=1520908063&width=95&-fpm.jpg","@id":"1"},{"@size":"x","$t":"http://photos.petfinder.com/photos/pets/41163550/1/?bust=1520908063&width=500&-x.jpg","@id":"1"},{"@size":"pn","$t":"http://photos.petfinder.com/photos/pets/41163550/1/?bust=1520908063&width=300&-pn.jpg","@id":"1"},{"@size":"t","$t":"http://photos.petfinder.com/photos/pets/41163550/1/?bust=1520908063&width=50&-t.jpg","@id":"1"}]}},"id":{"$t":"41163550"},"

json is quite a mess, let's take a peek

In [85]:
data = json.loads(response.content)
print(json.dumps(data["petfinder"]["pet"], indent=4, sort_keys=True))

{
    "age": {
        "$t": "Adult"
    }, 
    "animal": {
        "$t": "Dog"
    }, 
    "breeds": {
        "breed": [
            {
                "$t": "Shiba Inu"
            }, 
            {
                "$t": "Mixed Breed"
            }
        ]
    }, 
    "contact": {
        "address1": {
            "$t": "1940 William Few Parkway"
        }, 
        "address2": {}, 
        "city": {
            "$t": "Appling"
        }, 
        "email": {
            "$t": "rfranks@columbiacountyga.gov"
        }, 
        "fax": {}, 
        "phone": {
            "$t": "706-541-4077  "
        }, 
        "state": {
            "$t": "GA"
        }, 
        "zip": {
            "$t": "30802"
        }
    }, 
    "description": {
        "$t": "This animal was rescued by:       A Citizen    Date animal was rescued: 03/06/18  Date animal was brought to the shelter: 03/06/18  Location where animal was found:  Old Louisville Rd, Grovetown  If you believe this is your lost pet, 

In [86]:
json_normalize(data["petfinder"]["pet"])

Unnamed: 0,age.$t,animal.$t,breeds.breed,contact.address1.$t,contact.city.$t,contact.email.$t,contact.phone.$t,contact.state.$t,contact.zip.$t,description.$t,id.$t,lastUpdate.$t,media.photos.photo,mix.$t,name.$t,sex.$t,shelterId.$t,shelterPetId.$t,size.$t,status.$t
0,Adult,Dog,"[{u'$t': u'Shiba Inu'}, {u'$t': u'Mixed Breed'}]",1940 William Few Parkway,Appling,rfranks@columbiacountyga.gov,706-541-4077,GA,30802,This animal was rescued by: A Citizen ...,41163550,2018-03-13T01:04:20Z,[{u'$t': u'http://photos.petfinder.com/photos/...,yes,A083843,F,GA257,A083843,M,A


Embedded arrays could use some flattening

In [87]:
def flatten_json(y):
    """ recursively loop through JSON
    flattening embedded arrays and dicts
    output flattened version that is easy normalize
    -thanks Amir Ziai
    """
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out


flat = flatten_json(data["petfinder"]["pet"])
pet_data = json_normalize(flat)
print(pet_data.columns)
pet_data.transpose()

Index([u'age_$t', u'animal_$t', u'breeds_breed_0_$t', u'breeds_breed_1_$t',
       u'contact_address1_$t', u'contact_city_$t', u'contact_email_$t',
       u'contact_phone_$t', u'contact_state_$t', u'contact_zip_$t',
       u'description_$t', u'id_$t', u'lastUpdate_$t',
       u'media_photos_photo_0_$t', u'media_photos_photo_0_@id',
       u'media_photos_photo_0_@size', u'media_photos_photo_1_$t',
       u'media_photos_photo_1_@id', u'media_photos_photo_1_@size',
       u'media_photos_photo_2_$t', u'media_photos_photo_2_@id',
       u'media_photos_photo_2_@size', u'media_photos_photo_3_$t',
       u'media_photos_photo_3_@id', u'media_photos_photo_3_@size',
       u'media_photos_photo_4_$t', u'media_photos_photo_4_@id',
       u'media_photos_photo_4_@size', u'mix_$t', u'name_$t', u'sex_$t',
       u'shelterId_$t', u'shelterPetId_$t', u'size_$t', u'status_$t'],
      dtype='object')


Unnamed: 0,0
age_$t,Adult
animal_$t,Dog
breeds_breed_0_$t,Shiba Inu
breeds_breed_1_$t,Mixed Breed
contact_address1_$t,1940 William Few Parkway
contact_city_$t,Appling
contact_email_$t,rfranks@columbiacountyga.gov
contact_phone_$t,706-541-4077
contact_state_$t,GA
contact_zip_$t,30802


What kind of beautiful beast are we looking at?

In [88]:
# loop through the photo columns displaying them
for col in pet_data.columns:
    if re.match('media_photos_photo_[0-9]+_\$t', col):
        display(Image(url=pet_data.loc[0, col]))

### Pull More Data at Once

In [97]:
parameters = {"key": KEY,
              "animal": "dog",
              "count": 1000,
              "output": "full",
              "format": "json",
              "location": "02472"
              }

response = requests.get('http://api.petfinder.com/pet.find',
                         params=parameters)

In [98]:
data = json.loads(response.content)
# print(json.dumps(data, indent=4, sort_keys=True))

In [99]:
flat = flatten_json(data["petfinder"]["pets"]["pet"])
df = pd.DataFrame([flatten_json(x) for x in data["petfinder"]["pets"]["pet"]])
df.head()

Unnamed: 0,age_$t,animal_$t,breeds_breed_$t,breeds_breed_0_$t,breeds_breed_1_$t,contact_address1_$t,contact_address2_$t,contact_city_$t,contact_email_$t,contact_phone_$t,...,options_option_2_$t,options_option_3_$t,options_option_4_$t,options_option_5_$t,options_option_6_$t,sex_$t,shelterId_$t,shelterPetId_$t,size_$t,status_$t
0,Young,Dog,,Chihuahua,Terrier,,,Newton,rhondabarron@earthlink.net,7133045266,...,,,,,,F,TX1961,,S,A
1,Young,Dog,Chihuahua,,,,,Newton,rhondabarron@earthlink.net,7133045266,...,,,,,,M,TX1961,,S,A
2,Young,Dog,Retriever,,,,,Newton,rhondabarron@earthlink.net,7133045266,...,,,,,,M,TX1961,,M,A
3,Senior,Dog,,Australian Kelpie,Labrador Retriever,,,Newton,rhondabarron@earthlink.net,7133045266,...,noCats,,,,,F,TX1961,,M,A
4,Young,Dog,,Border Collie,Cattle Dog,,,Newton,rhondabarron@earthlink.net,7133045266,...,,,,,,M,TX1961,,M,A


#### Write the data to output

In [100]:
f = './hidden/dog_data.csv'
if os.path.isfile(f):
    df.to_csv(f, mode='a', header=False, encoding='utf-8')
else:
    df.to_csv(f, encoding='utf-8')