In [1]:
import pandas as pd
import requests

from config import api_key
from config import secret_key


In [2]:
# see https://curl.trillworks.com/ for curl --> python
# see https://www.petfinder.com/developers/v2/docs/ for API

access_token_query_url = "https://api.petfinder.com/v2/oauth2/token"

parameters = {
  'grant_type': 'client_credentials',
  'client_id': api_key,
  'client_secret': secret_key
}

access_token = requests.post(access_token_query_url, data=parameters).json()["access_token"]


In [10]:
# curl -H "Authorization: Bearer {YOUR_ACCESS_TOKEN}" GET https://api.petfinder.com/v2/{CATEGORY}/{ACTION}?{parameter_1}={value_1}&{parameter_2}={value_2}

# category = animals | types | organizations
# parameters = type | breed | size | gender | age | color | coat | status | name | organization | location | distance | sort | page | limit

#########    Option 1 - by city name    ############################

import pymongo
import time

#MongoDB setup
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
mongo_pet_db = mongo_client['pets']
mongo_pet_collection = mongo_pet_db["pets_by_city"]

cities = ["New York City, New York", 
          "Los Angeles, California",
          "Chicago, Illinois",
          "Houston, Texas",
          "Philadelphia, Pennsylvania",
          "Phoenix, Arizona",
          "San Antonio, Texas",
          "San Diego, California",
          "Dallas, Texas",
          "San Jose, California",
          "Austin, Texas",
          "Jacksonville, Florida",
          "Indianapolis, Indiana",
          "San Francisco, California",
          "Columbus, Ohio",
          "Fort Worth, Texas",
          "Charlotte, North Carolina",
          "Detroit, Michigan",
          "El Paso, Texas",
          "Memphis, Tennessee",
          "Boston, Massachusetts",
          "Seattle, Washington",
          "Denver, Colorado",
          "Washington, DC",
          "Nashville-Davidson, Tennessee",
          "Baltimore, Maryland",
          "Louisville/Jefferson, Kentucky",
          "Portland, Oregon",
          "Oklahoma , Oklahoma",
          "Milwaukee, Wisconsin",
          "Las Vegas, Nevada",
          "Albuquerque, New Mexico",
          "Tucson, Arizona",
          "Fresno, California",
          "Sacramento, California",
          "Long Beach, California",
          "Kansas , Missouri",
          "Mesa, Arizona",
          "Virginia Beach, Virginia",
          "Atlanta, Georgia",
          "Colorado Springs, Colorado",
          "Raleigh, North Carolina",
          "Omaha, Nebraska"
          "Miami, Florida",
          "Oakland, California",
          "Tulsa, Oklahoma",
          "Minneapolis, Minnesota",
          "Cleveland, Ohio",
          "Wichita, Kansas",
          "Arlington, Texas"]




base_query_url = "https://api.petfinder.com/v2/animals?type=dog&limit=100&location="

for city in cities:
    data_query_url = base_query_url + city

    headers = {
        'Authorization': 'Bearer {}'.format(access_token),
    }

    response = requests.get(data_query_url, headers=headers).json() 
    
    # save dataFrame to mongoDB
    mongo_pet_collection.insert(response["animals"])
    
    time.sleep(2)

    
############# end of option 1 ########################



In [15]:
# save response to json file

#import json
#with open("response.json", "w") as json_file:
#    json.dump(response, json_file)

In [14]:
################### Option 2 - by pages###########################

import time 
import pymongo

data_query_url_base = "https://api.petfinder.com/v2/animals?type=dog&limit=100"

mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
#mongo_client = pymongo.MongoClient("mongodb+srv://k9sam:<1013hpa>@petfinder-qbryn.mongodb.net/test")
#mongo_client = pymongo.MongoClient("mongodb+srv://k9sam:<49484951728065>@petfinder-qbryn.mongodb.net/test?retryWrites=true")
# mongoDB db name = pets
mongo_pet_db = mongo_client['pets'] 
mongo_pet_collection = mongo_pet_db["pets_by_page"]


for i in range(11):
  page_parameter = "&page=" + str(i+1)
  data_query_url = data_query_url_base + page_parameter
  
  headers = {
    'Authorization': 'Bearer {}'.format(access_token),
  }

  response = requests.get(data_query_url, headers=headers).json()

  mongo_pet_collection.insert(response["animals"])
  
  time.sleep(2)
  

###### end of option 2 ######################



In [15]:
# You need to clean up json file to save to pandas
import pandas as pd
import json

data = response["animals"]

url = []
species = []
breed = []
color = []
age = []
gender = []
size = []
coat = []
neutered = []
house_trained = []
declawed = []
special_needs = []
shots = []
name = []
description = []
photo = []
status = []
publish_date = []
contact_email = []
contact_phone = []
contact_address1 = []
contact_address2 = []
contact_city = []
contact_state = []
contact_postcode = []
contact_country = []

for item in data:
    url.append(item["url"])
    species.append(item["species"])
    breed.append(item["breeds"]["primary"])
    color.append(item["colors"]["primary"])
    age.append(item["age"])
    gender.append(item["gender"])
    size.append(item["size"])
    coat.append(item["coat"])
    neutered.append(item["attributes"]["spayed_neutered"])
    house_trained.append(item["attributes"]["house_trained"])
    declawed.append(item["attributes"]["declawed"])
    special_needs.append(item["attributes"]["special_needs"])
    shots.append(item["attributes"]["shots_current"])
    name.append(item["name"])
    description.append(item["description"])
    
    if len(item["photos"]) != 0:
        photo.append(item["photos"][0]["full"])
    else:
        photo.append("NULL")

    status.append(item["status"])
    publish_date.append(item["published_at"])
    contact_email.append(item["contact"]["email"])
    contact_phone.append(item["contact"]["phone"])
    contact_address1.append(item["contact"]["address"]["address1"])
    contact_address2.append(item["contact"]["address"]["address2"])
    contact_city.append(item["contact"]["address"]["city"])
    contact_state.append(item["contact"]["address"]["state"])
    contact_postcode.append(item["contact"]["address"]["postcode"])
    contact_country.append(item["contact"]["address"]["country"])
    


data_dict = {
    "url" : url,
    "species" : species,
    "breed" : breed,
    "color" : color,
    "age" : age,
    "gender" : gender,
    "size" : size,
    "coat" : coat,
    "neutered" : neutered,
    "house_trained" : house_trained,
    "declawed" : declawed,
    "special_needs" : special_needs,
    "shots" : shots,
    "name" : name,
    "description" : description,
    "photo" : photo,
    "status" : status,
    "publish_date" : publish_date,
    "contact_email" : contact_email,
    "contact_phone" : contact_phone,
    "contact_address1" : contact_address1,
    "contact_address2" : contact_address2,
    "contact_city" : contact_city,
    "contact_state" : contact_state,
    "contact_postcode" : contact_postcode,
    "contact_country" : contact_country
}

#store data to pandas
df_petData = pd.DataFrame(data = data_dict)
#df_petData

# to csv
#df_petData.to_csv(output_data_file, index=False)

# to mongoDB
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
#mongo_client = pymongo.MongoClient("mongodb+srv://k9sam:<49484951728065>@petfinder-qbryn.mongodb.net/test")
#mongo_client = pymongo.MongoClient("mongodb+srv://k9sam:<password>@petfinder-qbryn.mongodb.net/test")
# mongoDB db name = pets
mongo_pet_db = mongo_client['pets'] 
#mongoDB collection name = pets_for_you
mongo_pet_collection = mongo_pet_db["pets_for_you"]

records = json.loads(df_petData.T.to_json()).values()
mongo_pet_collection.insert(records)





[ObjectId('5c9690497cb0170da838428a'),
 ObjectId('5c9690497cb0170da838428b'),
 ObjectId('5c9690497cb0170da838428c'),
 ObjectId('5c9690497cb0170da838428d'),
 ObjectId('5c9690497cb0170da838428e'),
 ObjectId('5c9690497cb0170da838428f'),
 ObjectId('5c9690497cb0170da8384290'),
 ObjectId('5c9690497cb0170da8384291'),
 ObjectId('5c9690497cb0170da8384292'),
 ObjectId('5c9690497cb0170da8384293'),
 ObjectId('5c9690497cb0170da8384294'),
 ObjectId('5c9690497cb0170da8384295'),
 ObjectId('5c9690497cb0170da8384296'),
 ObjectId('5c9690497cb0170da8384297'),
 ObjectId('5c9690497cb0170da8384298'),
 ObjectId('5c9690497cb0170da8384299'),
 ObjectId('5c9690497cb0170da838429a'),
 ObjectId('5c9690497cb0170da838429b'),
 ObjectId('5c9690497cb0170da838429c'),
 ObjectId('5c9690497cb0170da838429d'),
 ObjectId('5c9690497cb0170da838429e'),
 ObjectId('5c9690497cb0170da838429f'),
 ObjectId('5c9690497cb0170da83842a0'),
 ObjectId('5c9690497cb0170da83842a1'),
 ObjectId('5c9690497cb0170da83842a2'),
 ObjectId('5c9690497cb017