In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

# Specify url: url
url = 'https://dogtime.com/dog-breeds/profiles/'

# Package the request, send the request and catch the response: r
r = requests.get(url)

# Extracts the response as html: html_doc
html_doc = r.text

# Create a BeautifulSoup object from the HTML: soup
soup = BeautifulSoup(html_doc, "lxml")


# Prettify the BeautifulSoup object: pretty_soup
pretty_soup = soup.prettify()



In [2]:
text = soup.text
print(text)

 All Dog Breeds - Complete List of Dog ProfilesPB = window.PB || {};PB.hashlessUrl = "//dogtime.com/dog-breeds/profiles/";
window.grumi = {key: '2a236ed9-fb8c-429e-ab47-cacac34a3be6'
}; !function(a9,a,p,s,t,A,g){if(a[a9])return;function q(c,r){a[a9]._Q.push([c,r])}a[a9]={init:function(){q("i",arguments)},fetchBids:function(){q("f",arguments)},setDisplayBids:function(){},_Q:[]};A=p.createElement(s);A.async=!0;A.src=t;g=p.getElementsByTagName(s)[0];g.parentNode.insertBefore(A,g)}("apstag",window,document,"script","//c.amazon-adsystem.com/aax2/apstag.js");
apstag.init({pubID: '3342',adServer: 'googletag'
});var PREBID_TIMEOUT = 25000,googletag      = googletag || {};googletag.cmd  = googletag.cmd || [];googletag.cmd.push(function() {googletag.pubads().disableInitialLoad();});
var pbjs     = pbjs || {};pbjs.que = pbjs.que || [];var PB                  = PB || {};PB.postSlotCallback = PB.postSlotCallback || [];PB.preSlotCallback  = PB.preSlotCallback || [];PB.PBJS             = PB.PBJS || {

In [3]:
breed_links = []

for link in soup.find_all('a'):
    if 'dog-breeds/' in link.get('href'):#extracting all breed links for the main page
        breed_links.append(link.get('href'))
        
breed_links = list(np.unique(breed_links))# removing duplicate links


In [4]:
def get_traits(soup):
    """This class parses the soup to get the main trait labels 
         from the breed page and returns them as a list"""
    
    titles = soup.find_all(class_ = 'characteristic-title')
    traits = []
    
    for item in titles:
        #if item.div == None: #uncomment to get only major traits
            traits.append((item.get_text()).strip())
      
    return traits  


In [5]:
def get_stars(soup):
    """This class parses the soup to get the star ratings for
    each main trait from the breed page and returns them as a list"""
    
    star_ratings = []
    star_blocks = soup.find_all(class_ = 'characteristic-star-block')
    for item in star_blocks:
        #if item.get_text() == '':#uncomment to get only major traits
            star_ratings.append((item.div)['class'][1][-1])
    return star_ratings

In [6]:
full_dict = {}

for breed_link in breed_links:
    
    # Package the request, send the request and catch the response: r
    r = requests.get(breed_link)

    # Extracts the response as html: html_doc
    html_doc = r.text

    # Create a BeautifulSoup object from the HTML: soup
    soup = BeautifulSoup(html_doc, "lxml")

    #Extracts breed name, traits and corresponding star ranking
    breed_name = soup.h1.get_text()
    traits = get_traits(soup)
    stars = get_stars(soup)
    dog_attributes_dict={}
    
    
    #add breed traits to dictionary
    i = 0
    for trait in traits:
        dog_attributes_dict[trait] = stars[i]
        i += 1

    #adds breed to full dictionary
    full_dict[breed_name] = dog_attributes_dict

    print(breed_name)
    
print(full_dict)
 

Afador
Affenpinscher
Afghan Hound
Airedale Terrier
Akbash
Akita
Alaskan Klee Kai
Alaskan Malamute
American Bulldog
American English Coonhound
American Eskimo Dog
American Foxhound
American Leopard Hound
American Pit Bull Terrier
American Pugabull
American Staffordshire Terrier
American Water Spaniel
Anatolian Shepherd Dog
Appenzeller Sennenhunde
Auggie
Aussiedoodle
Aussiepom
Australian Cattle Dog
Australian Kelpie
Australian Retriever
Australian Shepherd
Australian Shepherd Husky
Australian Shepherd Lab Mix
Australian Terrier
Azawakh
Barbet
Basenji
Bassador
Basset Fauve de Bretagne
Basset Hound
Basset Retriever
Bavarian Mountain Scent Hound
Beabull
Beagle
Beaglier
Bearded Collie
Bedlington Terrier
Belgian Malinois
Belgian Sheepdog
Belgian Tervuren
Berger Picard
Bernedoodle
Bernese Mountain Dog
Bichon Frise
Biewer Terrier
Black and Tan Coonhound
Black Mouth Cur
Black Russian Terrier
Bloodhound
Blue Lacy
Bluetick Coonhound
Bocker
Boerboel
Boglen Terrier
Bolognese
Borador
Border Collie
Bo

In [9]:
#convert dictionary to dataframe
df = pd.DataFrame(full_dict).T
df

Unnamed: 0,Adaptability,Adapts Well to Apartment Living,Good For Novice Owners,Sensitivity Level,Tolerates Being Alone,Tolerates Cold Weather,Tolerates Hot Weather,All Around Friendliness,Affectionate with Family,Incredibly Kid Friendly Dogs,...,Easy To Train,Intelligence,Potential For Mouthiness,Prey Drive,Tendency To Bark Or Howl,Wanderlust Potential,Exercise Needs,Energy Level,Intensity,Potential For Playfulness
Afador,2,1,1,3,3,4,2,3,4,2,...,1,5,4,4,4,4,4,4,4,3
Affenpinscher,3,5,4,3,1,3,3,3,5,1,...,2,4,4,3,2,2,3,4,3,4
Afghan Hound,4,5,3,5,2,5,5,4,5,5,...,1,4,3,5,2,5,4,5,2,4
Airedale Terrier,2,1,2,3,2,3,3,4,4,4,...,4,5,5,5,4,4,5,5,3,5
Akbash,3,1,2,3,4,4,2,4,5,4,...,3,4,3,1,3,1,2,2,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Whoodle,3,4,1,3,1,4,2,4,4,4,...,2,4,1,2,1,3,4,4,4,4
Wirehaired Pointing Griffon,3,1,3,4,1,4,3,5,5,5,...,5,5,3,4,4,4,4,5,3,5
Xoloitzcuintli,3,5,1,5,1,3,3,3,5,3,...,3,5,3,5,5,5,3,3,3,3
Yorkipoo,4,5,5,4,3,2,3,4,5,4,...,4,4,3,3,5,2,3,5,3,4


In [8]:
#write dataframe to csv
df.to_csv (r'dogtraits.csv', header=True) #Don't forget to add '.csv' at the 