## Import Dependencies

In [1]:
import petpy
from petpy import Petfinder
from config import API_key, API_secret
import json
import pymongo
import pandas as pd
from pprint import pprint

## API Call and Pandas DF Processing

In [2]:
pf = Petfinder(key=API_key, secret=API_secret)

In [3]:
# Use petfinder API with animals endpoint, w/ specific limits to get most possible results; 
# return as a pandas df
animal_df = pf.animals(results_per_page=100, pages=100, return_df=True)

  results_df = json_normalize(results[key])


In [4]:
# make dataframe copy to preserve API call results
animal2_df = animal_df.copy()

In [5]:
# get rid of extra organization_id column caugin an error 
my_columns = list(range(0,48))
animal2_df = animal2_df.iloc[:,my_columns]

In [6]:
# inspect list for column name formats
column_list = list(animal2_df.columns) 
column_list

['id',
 'organization_id',
 'url',
 'type',
 'species',
 'age',
 'gender',
 'size',
 'coat',
 'tags',
 'name',
 'description',
 'organization_animal_id',
 'photos',
 'videos',
 'status',
 'status_changed_at',
 'published_at',
 'distance',
 'breeds.primary',
 'breeds.secondary',
 'breeds.mixed',
 'breeds.unknown',
 'colors.primary',
 'colors.secondary',
 'colors.tertiary',
 'attributes.spayed_neutered',
 'attributes.house_trained',
 'attributes.declawed',
 'attributes.special_needs',
 'attributes.shots_current',
 'environment.children',
 'environment.dogs',
 'environment.cats',
 'primary_photo_cropped.small',
 'primary_photo_cropped.medium',
 'primary_photo_cropped.large',
 'primary_photo_cropped.full',
 'contact.email',
 'contact.phone',
 'contact.address.address1',
 'contact.address.address2',
 'contact.address.city',
 'contact.address.state',
 'contact.address.postcode',
 'contact.address.country',
 'animal_id',
 'animal_type']

In [7]:
# change column header names in animal2_df to remove dot notation for mongo
animal3_df = animal2_df.rename(columns = {
 'breeds.primary':'breeds_primary',
 'breeds.secondary': 'breeds_secondary',
 'breeds.mixed': 'breeds_mixed',
 'breeds.unknown':'breeds_unknown',
 'colors.primary': 'colors_primary',
 'colors.secondary': 'colors_secondary',
 'colors.tertiary': 'colors_tertiary',
 'attributes.spayed_neutered': 'attributes_spayed_neutered',
 'attributes.house_trained': 'attributes_house_trained',
 'attributes.declawed': 'attributes_declawed',
 'attributes.special_needs': 'attributes_special_needs',
 'attributes.shots_current': 'attributes_shots_current',
 'environment.children': 'environment_children',
 'environment.dogs': 'environment_dogs',
 'environment.cats': 'environment_cats',
 'primary_photo_cropped.small': 'primary_photo_cropped_small',
 'primary_photo_cropped.medium': 'primary_photo_cropped_medium',
 'primary_photo_cropped.large': 'primary_photo_cropped_large',
 'primary_photo_cropped.full': 'primary_photo_cropped_full',
 'contact.email': 'contact_email',
 'contact.phone': 'contact_phone',
 'contact.address.address1': 'contact_address_address1',
 'contact.address.address2': 'contact_address_address2',
 'contact.address.city': 'contact_address_city',
 'contact.address.state': 'contact_address_state',
 'contact.address.postcode': 'contact_address_postcode',
 'contact.address.country': 'contact_address_country'})

In [8]:
animal3_df.head()

Unnamed: 0,id,organization_id,url,type,species,age,gender,size,coat,tags,...,contact_email,contact_phone,contact_address_address1,contact_address_address2,contact_address_city,contact_address_state,contact_address_postcode,contact_address_country,animal_id,animal_type
0,50398452,IN64,https://www.petfinder.com/cat/gidget-50398452/...,Cat,Cat,Adult,Female,Medium,Short,[],...,animalcareservices@columbus.in.gov,812-376-2505,2730 Arnold St,,Columbus,IN,47203,US,50398452,cat
1,50398488,WI47,https://www.petfinder.com/dog/lesa-50398488/wi...,Dog,Dog,Adult,Female,Large,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398488,dog
2,50398489,WI47,https://www.petfinder.com/cat/sky-50398489/wi/...,Cat,Cat,Young,Female,Medium,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398489,cat
3,50398490,WI47,https://www.petfinder.com/dog/kimber-50398490/...,Dog,Dog,Adult,Male,Medium,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398490,dog
4,50398491,WI47,https://www.petfinder.com/cat/timmy-50398491/w...,Cat,Cat,Baby,Female,Small,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398491,cat


In [9]:
organizations_df = pf.organizations(results_per_page=100, pages=100, return_df=True)

  results_df = json_normalize(results[key])


In [10]:
# photos field is creating an error in pandas because it contains a list within the field. 
org_df = organizations_df.drop(columns=['photos'])

In [11]:
# change column header names to remove dot notation for mongo
org2_df = org_df.rename(columns = {
 'address.address1': "address_address1",
 'address.address2': "address_address2",
 'address.city': "address_city" ,
 'address.state': "address_state",
 'address.postcode': "address_postcode",
 'address.country': "address_country",
 'hours.monday': "hours_monday",
 'hours.tuesday': "hours_tuesday",
 'hours.wednesday': "hours_wednesday",
 'hours.thursday': "hours_thursday",
 'hours.friday': "hours_friday",
 'hours.saturday': "hours_saturday",
 'hours.sunday': "hours_sunday",
 'adoption.policy': "adoption_policy" ,
 'adoption.url': "adoption_url",
 'social_media.facebook': "socialmedia_facebook",
 'social_media.twitter': "socialmedia_twitter",
 'social_media.youtube': "socialmedia_youtube",
 'social_media.instagram': "socialmedia_instagram",
 'social_media.pinterest': "socialmedia_pinterest",
})

In [12]:
# now try to drop duplicates - based on all columns
org2_df.drop_duplicates()
# new length is 8932

Unnamed: 0,id,name,email,phone,url,website,mission_statement,distance,address_address1,address_address2,...,hours_saturday,hours_sunday,adoption_policy,adoption_url,socialmedia_facebook,socialmedia_twitter,socialmedia_youtube,socialmedia_instagram,socialmedia_pinterest,organization_id
0,PA912,The Shelter Animal Project,tsapadoption@gmail.com,,https://www.petfinder.com/member/us/pa/fairles...,,,,PO Box 24,,...,,,,,,,,,,pa912
1,GA762,Special Touch Animal Rescue,STARRadopt@aol.com,(404) 983-3581,https://www.petfinder.com/member/us/ga/acworth...,,We are a private foster home group. We are NOT...,,,,...,,,,,,,,,,ga762
2,OH749,Purr-fect Companions Sanctuary,purrfectcompanions@att.net,216-671-6369,https://www.petfinder.com/member/us/oh/lakewoo...,,,,P.O.Box 770992,,...,,,,,,,,,,oh749
3,TX876,Gina's Heart of Gold Reptile Rescue,gdisteldorf@comcast.net,(713) 329-9304,https://www.petfinder.com/member/us/tx/houston...,,,,,,...,,,,,,,,,,tx876
4,MO556,Montgomery City Animal Shelter/Project Preciou...,projectpreciouspaws@hotmail.com,1-573-310-4979,https://www.petfinder.com/member/us/mo/montgom...,,,,915 Benton Street,,...,,,,,,,,,,mo556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,FL271,Schnauzer and Fox Terrier Rescue League,rescue2468@yahoo.com,407-333-0524,https://www.petfinder.com/member/us/fl/altamon...,,,,,,...,,,,,,,,,,fl271
9996,WA353,Kitsap Animal Rescue & Education,adoptions@nwkare.org,(360) 602-6717,https://www.petfinder.com/member/us/wa/silverd...,https://nwkare.org/,Check us out at www.nwkare.org! Where you can ...,,,,...,,,As a rescue group that utilizes a foster care ...,https://forms.gle/8Gi4oUoP5EVMwfyB7,https://www.facebook.com/nwkare/,,,https://www.instagram.com/kare_nw/,,wa353
9997,IA220,Tama County Humane Society,tamacountyhumanesociety@yahoo.com,(641) 481-7001,https://www.petfinder.com/member/us/ia/tama/ta...,https://www.facebook.com/TamaCountyHumaneSociety/,The Tama County Humane Society is an organiza...,,1406 E 5th Street,,...,7am-11am,,See an animal that you like? Give us a call or...,https://docs.google.com/forms/d/e/1FAIpQLSdaBs...,https://www.facebook.com/Adoptable-at-Tama-Cou...,,,,,ia220
9998,CA2267,Almost Eden Rescue,almostedenrescue@yahoo.com,,https://www.petfinder.com/member/us/ca/ridgecr...,http://AlmostEdenRescue.com,We are a foster-based rescue and operate with...,,,,...,,,"To adopt a new family member, we will require ...",,https://www.facebook.com/AlmostEdenRescue/,,https://www.youtube.com/channel/UCGYWgri15xr-t...,,,ca2267


## Lat and Long DF and Add to Org Collection

In [13]:
zipfile_df = pd.read_csv("col_resources/zip_lat_lon.csv")

In [14]:
zipcode_df = zipfile_df[["Zip", "Latitude", "Longitude"]]

In [15]:
#need to convert datatypes in order to merge dataframes 
convert_zipdict = {'Zip': str}
zip_converted_df = zipcode_df.astype(convert_zipdict)

In [16]:
#need to convert datatypes in order to merge dataframes
convert_orgdict = {'address_postcode': str}
org_converted_df = org2_df.astype(convert_orgdict)

In [17]:
#rename field in zipcode to merge with org df 
new_zip_df = zip_converted_df.rename(columns={'Zip': 'address_postcode'})

In [18]:
#merge zip code and org dataframes - now every org should have a corresponding latlong
org_zip_df = pd.merge(new_zip_df, org_converted_df,  how= "inner", on=["address_postcode", "address_postcode"])

In [19]:
org_zip2_df = org_zip_df.drop(columns=['organization_id'])

In [20]:
# rename id column (with CAPS) as organization_id column
org_zip3_df = org_zip2_df.rename(columns={'id': 'organization_id'})

In [21]:
# convert datatypes in order to merge dataframes 
convert_animaldict = {'id': float}
animal_conv_df = animal3_df.astype(convert_animaldict)

In [22]:
animal_conv_df.head()

Unnamed: 0,id,organization_id,url,type,species,age,gender,size,coat,tags,...,contact_email,contact_phone,contact_address_address1,contact_address_address2,contact_address_city,contact_address_state,contact_address_postcode,contact_address_country,animal_id,animal_type
0,50398452.0,IN64,https://www.petfinder.com/cat/gidget-50398452/...,Cat,Cat,Adult,Female,Medium,Short,[],...,animalcareservices@columbus.in.gov,812-376-2505,2730 Arnold St,,Columbus,IN,47203,US,50398452,cat
1,50398488.0,WI47,https://www.petfinder.com/dog/lesa-50398488/wi...,Dog,Dog,Adult,Female,Large,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398488,dog
2,50398489.0,WI47,https://www.petfinder.com/cat/sky-50398489/wi/...,Cat,Cat,Young,Female,Medium,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398489,cat
3,50398490.0,WI47,https://www.petfinder.com/dog/kimber-50398490/...,Dog,Dog,Adult,Male,Medium,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398490,dog
4,50398491.0,WI47,https://www.petfinder.com/cat/timmy-50398491/w...,Cat,Cat,Baby,Female,Small,,[],...,elmbrookhs@ebhs.org,(262) 782-9261,20950 Enterprise Avenue,,Brookfield,WI,53045,US,50398491,cat


## Merge DFs to One and Send to MongoDB

In [23]:
# all_df = pd.merge(animal_df, org2_df,  how= "inner", on=["organization_id", "organization_id"])
all_df = pd.merge(animal_conv_df, org_zip3_df, on=["organization_id", "organization_id"])

In [24]:
conn = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn)

In [25]:
db = client.rescue_angels_db

In [26]:
final_mongo_dict = all_df.to_dict("records")

In [28]:
# db.final_data.insert_many(final_mongo_dict)

<pymongo.results.InsertManyResult at 0x17481e37188>