In [2]:
import pandas as pd
import requests

from config import api_key
from config import secret_key


In [3]:
# see https://curl.trillworks.com/ for curl --> python
# see https://www.petfinder.com/developers/v2/docs/ for API

access_token_query_url = "https://api.petfinder.com/v2/oauth2/token"

parameters = {
  'grant_type': 'client_credentials',
  'client_id': api_key,
  'client_secret': secret_key
}

access_token = requests.post(access_token_query_url, data=parameters).json()["access_token"]


In [33]:
# curl -H "Authorization: Bearer {YOUR_ACCESS_TOKEN}" GET https://api.petfinder.com/v2/{CATEGORY}/{ACTION}?{parameter_1}={value_1}&{parameter_2}={value_2}

# category = animals | types | organizations
# parameters = type | breed | size | gender | age | color | coat | status | name | organization | location | distance | sort | page | limit

#########    Option 1 - by state name (with data cleaning)   ############################

import pymongo
import time
import json

#MongoDB setup
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
mongo_pet_db = mongo_client['pets']
mongo_pet_collection = mongo_pet_db["pets_by_state"]

states = ["Alabama", "Alaska", "Arizona", "Arkansas","California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", 
          "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", 
          "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "New Hampshire",
          "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
          "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", 
          "West Virginia", "Wisconsin", "Wyoming"]


#parameter1 = "?type=dog"
#parameter2 = "&limit=10"
#parameter3 = "&location=Atlanta, GA"

#data_query_url = "https://api.petfinder.com/v2/animals" + parameter1 + parameter2 + parameter3

base_query_url = "https://api.petfinder.com/v2/animals?type=dog&limit=100&location="

for state in states:
    data_query_url = base_query_url + state

    headers = {
        'Authorization': 'Bearer {}'.format(access_token),
    }

    response = requests.get(data_query_url, headers=headers).json()
    
    # json cleaning
    data = response["animals"]
    
    for item in data:
        
        url = item["url"]
        species = item["species"]
        breed = item["breeds"]["primary"]
        color = item["colors"]["primary"]
        age = item["age"]
        gender = item["gender"]
        size = item["size"]
        coat = item["coat"]
        spayed_neutered = item["attributes"]["spayed_neutered"]
        house_trained = item["attributes"]["house_trained"]
        declawed = item["attributes"]["declawed"]
        special_needs = item["attributes"]["special_needs"]
        shots = item["attributes"]["shots_current"]
        name = item["name"]
        description = item["description"]
        
        if len(item["photos"]) != 0:
            photo = item["photos"][0]["full"]
        else:
            photo = "NULL"        
        
        status = item["status"]
        publish_date = item["published_at"]
        contact_email = item["contact"]["email"]
        contact_phone = item["contact"]["phone"]
        contact_address1 = item["contact"]["address"]["address1"]
        contact_address2 = item["contact"]["address"]["address2"]
        contact_city = item["contact"]["address"]["city"]
        contact_state = item["contact"]["address"]["state"]
        contact_postcode = item["contact"]["address"]["postcode"]
        contact_country = item["contact"]["address"]["country"]
    
        data_dict = {
        "url" : url,
        "species" : species,
        "breed" : breed,
        "color" : color,
        "age" : age,
        "gender" : gender,
        "size" : size,
        "coat" : coat,
        "spayed_neutered" : spayed_neutered,
        "house_trained" : house_trained,
        "declawed" : declawed,
        "special_needs" : special_needs,
        "shots" : shots,
        "name" : name,
        "description" : description,
        "photo" : photo,
        "status" : status,
        "publish_date" : publish_date,
        "contact_email" : contact_email,
        "contact_phone" : contact_phone,
        "contact_address1" : contact_address1,
        "contact_address2" : contact_address2,
        "contact_city" : contact_city,
        "contact_state" : contact_state,
        "contact_postcode" : contact_postcode,
        "contact_country" : contact_country
        }
        
        # Save to json file
        with open("pet_by_state.json", "a") as json_file:
            json.dump(data_dict, json_file)
    
        # save dataFrame to mongoDB
        mongo_pet_collection.insert(data_dict)
        
    
    print("Saving data from {}...".format(state))
        
    time.sleep(2)

    
############# end of option 1 ########################



Saving data from Alabama...
Saving data from Alaska...
Saving data from Arizona...
Saving data from Arkansas...
Saving data from California...
Saving data from Colorado...
Saving data from Connecticut...
Saving data from Delaware...
Saving data from Florida...
Saving data from Georgia...
Saving data from Hawaii...
Saving data from Idaho...
Saving data from Illinois...
Saving data from Indiana...
Saving data from Iowa...
Saving data from Kansas...
Saving data from Kentucky...
Saving data from Louisiana...
Saving data from Maine...
Saving data from Maryland...
Saving data from Massachusetts...
Saving data from Michigan...
Saving data from Minnesota...
Saving data from Mississippi...
Saving data from Missouri...
Saving data from Montana...
Saving data from Nebraska...
Saving data from New Hampshire...
Saving data from New Jersey...
Saving data from New Mexico...
Saving data from New York...
Saving data from North Carolina...
Saving data from North Dakota...
Saving data from Ohio...
Saving

In [38]:

#########    Option 2 - by state name (without data cleaning)   ############################

import pymongo
import time
import json

#MongoDB setup
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
mongo_pet_db = mongo_client['pets']
mongo_pet_collection = mongo_pet_db["pets_by_state_Dirty"]

states = ["Alabama", "Alaska", "Arizona", "Arkansas","California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", 
          "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", 
          "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "New Hampshire",
          "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
          "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", 
          "West Virginia", "Wisconsin", "Wyoming"]


base_query_url = "https://api.petfinder.com/v2/animals?type=dog&limit=100&location="

for state in states:
    data_query_url = base_query_url + state

    headers = {
        'Authorization': 'Bearer {}'.format(access_token),
    }

    response = requests.get(data_query_url, headers=headers).json()
    data = response["animals"]
          
    # Save to json file
    with open("pet_by_state_Dirty.json", "a") as json_file:
        json.dump(data, json_file)
    
    # save dataFrame to mongoDB
    mongo_pet_collection.insert(data)
        
    
    print("Saving data from {}...".format(state))
        
    time.sleep(2)

    
############# end of option 2 ########################



Saving data from Alabama...
Saving data from Alaska...
Saving data from Arizona...
Saving data from Arkansas...
Saving data from California...
Saving data from Colorado...
Saving data from Connecticut...
Saving data from Delaware...
Saving data from Florida...
Saving data from Georgia...
Saving data from Hawaii...
Saving data from Idaho...
Saving data from Illinois...
Saving data from Indiana...
Saving data from Iowa...
Saving data from Kansas...
Saving data from Kentucky...
Saving data from Louisiana...
Saving data from Maine...
Saving data from Maryland...
Saving data from Massachusetts...
Saving data from Michigan...
Saving data from Minnesota...
Saving data from Mississippi...
Saving data from Missouri...
Saving data from Montana...
Saving data from Nebraska...
Saving data from New Hampshire...
Saving data from New Jersey...
Saving data from New Mexico...
Saving data from New York...
Saving data from North Carolina...
Saving data from North Dakota...
Saving data from Ohio...
Saving

In [4]:
################### Option 3 - by pages###########################

import time 
import pymongo

data_query_url_base = "https://api.petfinder.com/v2/animals?type=dog&limit=100"

#mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
mongo_client = pymongo.MongoClient("mongodb+srv://k9sam:1234@petfinder-qbryn.mongodb.net/test?retryWrites=true")

# mongoDB db name = pets
mongo_pet_db = mongo_client['pets'] 
mongo_pet_collection = mongo_pet_db["pets_by_page"]


for i in range(5):
    page_parameter = "&page=" + str(i+1)
    data_query_url = data_query_url_base + page_parameter
  
    headers = {
        'Authorization': 'Bearer {}'.format(access_token),
    }

    response = requests.get(data_query_url, headers=headers).json()

    mongo_pet_collection.insert(response["animals"])
  
    print("Saving page {}".format(i+1))
    
    time.sleep(2)

###### end of option 3 ######################



Saving page 0
Saving page 1
Saving page 2
Saving page 3
Saving page 4


In [15]:
# You need to clean up json file to save to pandas
import pandas as pd
import json

data = response["animals"]

url = []
species = []
breed = []
color = []
age = []
gender = []
size = []
coat = []
neutered = []
house_trained = []
declawed = []
special_needs = []
shots = []
name = []
description = []
photo = []
status = []
publish_date = []
contact_email = []
contact_phone = []
contact_address1 = []
contact_address2 = []
contact_city = []
contact_state = []
contact_postcode = []
contact_country = []

for item in data:
    url.append(item["url"])
    species.append(item["species"])
    breed.append(item["breeds"]["primary"])
    color.append(item["colors"]["primary"])
    age.append(item["age"])
    gender.append(item["gender"])
    size.append(item["size"])
    coat.append(item["coat"])
    neutered.append(item["attributes"]["spayed_neutered"])
    house_trained.append(item["attributes"]["house_trained"])
    declawed.append(item["attributes"]["declawed"])
    special_needs.append(item["attributes"]["special_needs"])
    shots.append(item["attributes"]["shots_current"])
    name.append(item["name"])
    description.append(item["description"])
    
    if len(item["photos"]) != 0:
        photo.append(item["photos"][0]["full"])
    else:
        photo.append("NULL")

    status.append(item["status"])
    publish_date.append(item["published_at"])
    contact_email.append(item["contact"]["email"])
    contact_phone.append(item["contact"]["phone"])
    contact_address1.append(item["contact"]["address"]["address1"])
    contact_address2.append(item["contact"]["address"]["address2"])
    contact_city.append(item["contact"]["address"]["city"])
    contact_state.append(item["contact"]["address"]["state"])
    contact_postcode.append(item["contact"]["address"]["postcode"])
    contact_country.append(item["contact"]["address"]["country"])
    


data_dict = {
    "url" : url,
    "species" : species,
    "breed" : breed,
    "color" : color,
    "age" : age,
    "gender" : gender,
    "size" : size,
    "coat" : coat,
    "spayed_neutered" : neutered,
    "house_trained" : house_trained,
    "declawed" : declawed,
    "special_needs" : special_needs,
    "shots" : shots,
    "name" : name,
    "description" : description,
    "photo" : photo,
    "status" : status,
    "publish_date" : publish_date,
    "contact_email" : contact_email,
    "contact_phone" : contact_phone,
    "contact_address1" : contact_address1,
    "contact_address2" : contact_address2,
    "contact_city" : contact_city,
    "contact_state" : contact_state,
    "contact_postcode" : contact_postcode,
    "contact_country" : contact_country
}

#store data to pandas
df_petData = pd.DataFrame(data = data_dict)
#df_petData

# to csv
#df_petData.to_csv(output_data_file, index=False)

# to mongoDB
# mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
mongo_client = pymongo.MongoClient("mongodb+srv://k9sam:1234@petfinder-qbryn.mongodb.net/test?retryWrites=true")

# mongoDB db name = pets
mongo_pet_db = mongo_client['pets'] 
#mongoDB collection name = pets_for_you
mongo_pet_collection = mongo_pet_db["pets_for_you"]

records = json.loads(df_petData.T.to_json()).values()
mongo_pet_collection.insert(records)





[ObjectId('5c9690497cb0170da838428a'),
 ObjectId('5c9690497cb0170da838428b'),
 ObjectId('5c9690497cb0170da838428c'),
 ObjectId('5c9690497cb0170da838428d'),
 ObjectId('5c9690497cb0170da838428e'),
 ObjectId('5c9690497cb0170da838428f'),
 ObjectId('5c9690497cb0170da8384290'),
 ObjectId('5c9690497cb0170da8384291'),
 ObjectId('5c9690497cb0170da8384292'),
 ObjectId('5c9690497cb0170da8384293'),
 ObjectId('5c9690497cb0170da8384294'),
 ObjectId('5c9690497cb0170da8384295'),
 ObjectId('5c9690497cb0170da8384296'),
 ObjectId('5c9690497cb0170da8384297'),
 ObjectId('5c9690497cb0170da8384298'),
 ObjectId('5c9690497cb0170da8384299'),
 ObjectId('5c9690497cb0170da838429a'),
 ObjectId('5c9690497cb0170da838429b'),
 ObjectId('5c9690497cb0170da838429c'),
 ObjectId('5c9690497cb0170da838429d'),
 ObjectId('5c9690497cb0170da838429e'),
 ObjectId('5c9690497cb0170da838429f'),
 ObjectId('5c9690497cb0170da83842a0'),
 ObjectId('5c9690497cb0170da83842a1'),
 ObjectId('5c9690497cb0170da83842a2'),
 ObjectId('5c9690497cb017