In [1]:
# The purpose of this notebook is to gather the data from the Lawrence Humane Society's website for better compilation
# of my top-ten picks for our NEW DOG!!!!!!! OH MY GAAAAHHHH!!!!
# First step: import everything I'll need.

from bs4 import BeautifulSoup
import requests
import pandas as pd


In [2]:
# urls = ['https://lawrencehumane.org/details/?animal_id=A059273', 'https://lawrencehumane.org/details/?animal_id=A060976',
#         'https://lawrencehumane.org/details/?animal_id=A061584', 'https://lawrencehumane.org/details/?animal_id=A061264',
#         'https://lawrencehumane.org/details/?animal_id=A061263', 'https://lawrencehumane.org/details/?animal_id=A061426',
#         'https://lawrencehumane.org/details/?animal_id=A048525', 'https://lawrencehumane.org/details/?animal_id=A060254',
#         'https://lawrencehumane.org/details/?animal_id=A060433', 'https://lawrencehumane.org/details/?animal_id=A058991'
# ]
# I initially thought I'd need to loop through the ten pages and scrape each one, but when I tried scraping one of them, 
# it returned the data for every pet at the shelter.
# New plan: turn all of this data into a pandas dataframe and filter it to return just the ten I want to look at.

url = 'https://lawrencehumane.org/details/?animal_id=A059273'

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html')
print(soup)


<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<link href="http://gmpg.org/xfn/11" rel="profile"/>
<meta content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" name="robots"/>
<!-- Google Tag Manager for WordPress by gtm4wp.com -->
<script data-cfasync="false" data-pagespeed-no-defer="" type="text/javascript">//<![CDATA[
	var gtm4wp_datalayer_name = "dataLayer";
	var dataLayer = dataLayer || [];
//]]>
</script>
<!-- End Google Tag Manager for WordPress by gtm4wp.com -->
<!-- This site is optimized with the Yoast SEO plugin v19.7.2 - https://yoast.com/wordpress/plugins/seo/ -->
<title>Details - Lawrence Humane</title>
<link href="https://lawrencehumane.org/details/" rel="canonical"/>
<meta content="en_US" property="og:locale"/>
<meta content="article" property="og:type"/>
<meta content="Details - Lawrence Humane" property="og:title"/>
<meta content="https://lawre

In [3]:
# It looks like the data for each pet is encased in a div with class='feed_details', so let's return those as a messy soup.
messy_soup = soup.find_all('div', class_ = 'feed_details')
messy_soup


[<div class="feed_details">
 <div class="animalname"><h3>Hi! I'M Trixie.</h3></div>
 <h4>#A057706</h4>
 <p>I am currently in foster care so email adoptions@lawrencehumane.org to learn more about me!</p>
 <h6>My name is Trixie</h6>
 <h6>I am a Female, White, Shorthaired Rabbit. </h6>
 <h6>The shelter staff think I am about 3 years old.</h6>
 <h6>I weigh approximately 3 pounds.</h6>
 <h6>My adoption price is $40.</h6>
 <h6>I have been at the shelter since 08/09/2023.</h6>
 <a class="animaldetails" href="https://lawrencehumane.org/details/?animal_id=A057706"></a>
 <img src="https://petharbor.com/get_image.asp?RES=Detail&amp;ID=A057706&amp;LOCATION=LAWR"/>
 <h6 class="dev_notice">This information was refreshed 15 minutes ago and may not represent all of the animals at the Lawrence Humane Society.</h6>
 </div>,
 <div class="feed_details">
 <div class="animalname"><h3>Hi! I'M Bandit.</h3></div>
 <h4>#A061533</h4>
 <p></p>
 <h6>My name is Bandit</h6>
 <h6>I am a Male, White, Rat. </h6>
 <h6>T

In [4]:
# Just poking around.
# It looks like most of the info I want on each pet in included in h6 tags.
test = messy_soup[0].find_all('h6')
for item in test:
    print(item.text)


My name is Trixie
I am a Female, White, Shorthaired Rabbit. 
The shelter staff think I am about 3 years old.
I weigh approximately 3 pounds.
My adoption price is $40.
I have been at the shelter since 08/09/2023.
This information was refreshed 15 minutes ago and may not represent all of the animals at the Lawrence Humane Society.


In [5]:
# A plan is starting to form. Let's make a database with the following headers, then we'll fill it up.
headers = ['Picture', 'URL', 'Name', 'Sex', 'Breed', 'Age', 'Weight']

df = pd.DataFrame(columns = headers)
df


Unnamed: 0,Picture,URL,Name,Sex,Breed,Age,Weight


In [6]:
# Now for the fun part! 
# Let's loop through the soup, clean up the elements for each pet and add it to our dataframe!
for item in messy_soup:
    picture = item.find('img')['src']
    name = item.find('h3').text.replace("Hi! I'M ", "").replace(".", "")
    url = item.find('a').get('href')
    sex = item.find_all('h6')[1].text.replace('I am a ', '').split(',')[0]
    breed = ",".join(item.find_all('h6')[1].text.replace('.', '').split(',')[1:])
    age = item.find_all('h6')[2].text.replace('.', '').split('about')[1]
    weight = item.find_all('h6')[3].text.replace('.', '').split('approximately')[1]
    individual_row_data = [picture, url, name, sex, breed, age, weight]
    length = len(df)
    df.loc[length] = individual_row_data
    
df.head()


Unnamed: 0,Picture,URL,Name,Sex,Breed,Age,Weight
0,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Trixie,Female,"White, Shorthaired Rabbit",3 years old,3 pounds
1,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Bandit,Male,"White, Rat",1 year old,1 pounds
2,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,,Male,"Gray, Domestic Longhair",5 years old,9 pounds
3,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,,Unknown Gender,"Black, Domestic Shorthair",Unknown,
4,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Turkey,Female,"Yellow, Parakeet",5 months old,


In [7]:
# Now I have a dataframe with every animal at the shelter! Neat!
# Let's filter it down to the ten dogs I had my eye on.
top_ten = ['Maximus', 'Eeyore', 'Crumb', 'Wilbur', 'Iggy', 'Dj Pauly D', 'Rosie', 'Xavier', 'Booie', 'Dixie']
top_ten_df = df[df['Name'].isin(top_ten)]
top_ten_df


Unnamed: 0,Picture,URL,Name,Sex,Breed,Age,Weight
10,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Maximus,Male,"Brown, Mixed Shepherd",11 months old,38 pounds
23,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Eeyore,Male,"Black, Pit Bull Terrier",13 weeks old,11 pounds
28,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Crumb,Female,"Brown Brindle, Mastiff",14 weeks old,16 pounds
73,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Dj Pauly D,Male,"White, Pit Bull Terrier",11 weeks old,7 pounds
75,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Iggy,Male,"Red, Pit Bull Terrier",7 months old,42 pounds
89,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Rosie,Female,"Black, Alaskan Husky",2 years and 3 months old,59 pounds
102,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Booie,Male,"Yellow, Labrador Retriever",2 years and 5 months old,57 pounds
103,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Xavier,Male,"White, Dalmatian",3 years old,48 pounds
122,https://petharbor.com/get_image.asp?RES=Detail...,https://lawrencehumane.org/details/?animal_id=...,Dixie,Female,"Black, German Shepherd Dog",5 months old,25 pounds


In [34]:
# This is exactly what I wanted! 
# Now to save this as a csv to send to my husband, who will laugh at me for turning this into a data project.
top_ten_df.to_csv('top_ten.csv')
