Uses PetFinder developer's API key to search and download pet data from https://www.petfinder.com/. Extract relevant information and save to csv file for analysis. 

In [3]:
import requests
import json 
import csv 
import pandas as pd
from datetime import datetime, timedelta

In [4]:
## Get API key and secret from https://www.petfinder.com/user/developer-settings/
## Need to get new api key if inactive for 120 days
## Documentation: https://www.petfinder.com/developers/v2/docs/

petFinder_api_key = 'sVaWSBbHIblMHVWKtm17K1P7vUTgzt1rtjNqHxZAsc9XCC0YXH'
petFinder_secret = 'Sxbed7XtmZ0TLeT4r4OB8hREp6uWcBWfPnf09N4i'

data_dir = '/content/drive/My Drive/Data_Science_Projects/PetFinder/'

In [5]:
def get_auth_token_header():
  client_id = petFinder_api_key 
  client_secret = petFinder_secret 

  #scope = "appstore::apps:readwrite"
  grant_type = "client_credentials"
  data = {
      "grant_type": grant_type,
      "client_id": client_id,
      "client_secret": client_secret
  }
  petfinder_auth_url = "https://api.petfinder.com/v2/oauth2/token"
  auth_response = requests.post(petfinder_auth_url, data=data)

  # Read token from auth response
  auth_response_json = auth_response.json()
  auth_token = auth_response_json["access_token"]
  auth_token_header_value = "Bearer %s" % auth_token
  auth_token_header = {"Authorization": auth_token_header_value}

  return auth_token_header

In [None]:
##Search for adopted pet profiles that are more than 1 day old
#search_day = datetime.today() - timedelta(days=1)
#search_day = str(day.isoformat())
#print(search_day)

In [6]:
def get_data(url,auth_token_header):

  response = requests.get(url, headers=auth_token_header)
  response_json = response.json()

  return response_json

In [7]:
pet_columns = ['age', 'declawed', 'house_trained', 'shots_current', 'spayed_neutered', 'special_needs',
               'breed_mixed', 'breed_primary', 'breed_secondary', 'breed_unknown', 'size',
               'coat', 'color_primary', 'color_secondary', 'description',
               'good_with_cats', 'good_with_children', 'good_with_dogs',
               'gender','id','organization_id', 'status',
               'publish_date','status_change_date','time_to_adoption']

pets_df = pd.DataFrame(columns=pet_columns)

In [8]:
pet_dict = {
  "age": "None",
  "declawed": False,
  "house_trained": False,
  "shots_current": False,
  "spayed_neutered": False,
  "special_needs": False,
  "breed_mixed": True,
  "breed_primary": "None",
  "breed_secondary": "None",
  "breed_unknown": False,
  "size": "None",
  "coat": "None",
  "color_primary": "None",
  "color_secondary": "None",
  "description": "None",
  "good_with_cats": False,
  "good_with_children": False,
  "good_with_dogs": False,
  "gender": "None",
  "id": "None",
  "organization_id": "None",
  "status": "None",
  "publish_date": "0000",
  "status_change_date": "0000",
  "time_to_adoption": 1
}

In [10]:
def get_animal_attributes(data_json_object, pets_df, pet_dict):
  
  pets = pets_df
  now = datetime.now()

  for i in range(len(data_json_object['animals'])):
    
    thispet = pet_dict
    
    try:
      thispet["age"] = data_json_object['animals'][i]['age']
      thispet["declawed"] = data_json_object['animals'][i]['attributes']['declawed']
      thispet["house_trained"] = data_json_object['animals'][i]['attributes']['house_trained']
      thispet["shots_current"] = data_json_object['animals'][i]['attributes']['shots_current']
      thispet["spayed_neutered"] = data_json_object['animals'][i]['attributes']['spayed_neutered']
      thispet["special_needs"] = data_json_object['animals'][i]['attributes']['special_needs']
      thispet["breed_mixed"] = data_json_object['animals'][i]['breeds']['mixed']
      thispet["breed_primary"] = data_json_object['animals'][i]['breeds']['primary']
      thispet["breed_secondary"] = data_json_object['animals'][i]['breeds']['secondary']
      thispet["breed_unknown"] = data_json_object['animals'][i]['breeds']['unknown']
      thispet["size"] = data_json_object['animals'][i]['size']
      thispet["coat"] = data_json_object['animals'][i]['coat']
      thispet["color_primary"] = data_json_object['animals'][i]['colors']['primary']
      thispet["color_secondary"] = data_json_object['animals'][i]['colors']['secondary']
      thispet["description"] = data_json_object['animals'][i]['description']
      thispet["good_with_cats"] = data_json_object['animals'][i]['environment']['cats']
      thispet["good_with_children"] = data_json_object['animals'][i]['environment']['children']
      thispet["good_with_dogs"] = data_json_object['animals'][i]['environment']['dogs']
      thispet["gender"] = data_json_object['animals'][i]['gender']
      thispet["id"] = data_json_object['animals'][i]['id']
      thispet["organization_id"] = data_json_object['animals'][i]['organization_id']

      status = data_json_object['animals'][i]['status']
      publish_date = datetime.strptime(data_json_object['animals'][i]['published_at'], "%Y-%m-%dT%H:%M:%S+0000")
      status_change_date = datetime.strptime(data_json_object['animals'][i]['status_changed_at'], "%Y-%m-%dT%H:%M:%S+0000")
      thispet["status"] = status
      thispet["publish_date"] = publish_date
      thispet["status_change_date"] = status_change_date

      if status == "adopted":
        time_to_adoption = status_change_date - publish_date
      if status == "adoptable":
        time_to_adoption = now - publish_date

      thispet["time_to_adoption"] = time_to_adoption.days 

      pets = pets.append(thispet, ignore_index=True)

    except:
      print('Error occured at ', i)
    
  return pets

In [19]:
## Get authentication token (need to renew every hour)
auth_token_header = get_auth_token_header()

## Do search in 20-page increments 
## Append results to csv file
n_pages = 20
n_iter = 8   ## Last search n_iter = 8

## Initialize my_pet_df
my_pet_df = pets_df

for i in range(n_pages):
  zipcode = '91006' # Use a California zipcode for now
  pagenum = i + 1 + (n_iter * n_pages) # Compute page number
  status = "adopted"

  ## Make URL
  BASE_URL = "https://api.petfinder.com/v2/"
  url = BASE_URL+'animals?limit=100&type=cat&location='+zipcode+'&status='+status+'&page='+str(int(pagenum))

  ## Get data
  response = get_data(url, auth_token_header)
  current_pet_df = get_animal_attributes(response, pets_df, pet_dict)

  ## Append to my_pet_df dataframe
  my_pet_df = my_pet_df.append(current_pet_df)

In [21]:
csv_name = data_dir+'Adopted_pets.csv'

## Filter out the profiles with the same publish and status change date
## Profile likely removed and re-published once the pet is adopted
my_pet_df_filtered = my_pet_df[my_pet_df['time_to_adoption'] > 0] 

## Use first dataframe (n_iter = 0) to create csv
#my_pet_df_filtered.to_csv(csv_name)

## Append the subsequent csv (n_iter > 0)
with open(csv_name, 'a') as f:
    my_pet_df_filtered.to_csv(f, header=False)