# Capstone Project: Dog Toy Recommendation System 
### Data Collection 
In order to collect my data, I will use Selenium and ChromeDriver in order to scrape dog toy reviews from Chewy. 

In [1]:
# Code in my selenium_practice.py file so far for scraping data

import pandas as pd

# imports 
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

In [2]:
def scrape_toy_title(page_source):
    toy_list = []
    soup = BeautifulSoup(page_source, 'lxml')
    
    # Getting the toy's title 
    section = soup.find('section', id='right-column')
    title = section.find('div', id='product-title').find('h1').get_text().strip()
    return title

In [3]:
def scrape_toy_price(page_source):
    # Getting the toy's price 
    soup = BeautifulSoup(page_source, 'lxml')
    price = soup.find('div', id='pricing').find(
        'ul', class_='product-pricing').find(
        'li', class_='our-price').find(
        'p', class_='price').find(
        'span', class_='ga-eec__price').get_text().strip()
    return price

In [4]:
def scrape_toy_description(page_source):
    soup = BeautifulSoup(page_source, 'lxml')
    try:
        descriptions =  soup.find(
            'div', class_='cw-tabs__body container').find(
            'article', id='descriptions').find(
            'section', class_='descriptions__content cw-tabs__content--left').find_all(
            'p')
        text_list = []
        for description in descriptions:
            text = description.get_text()
            text_list.append(text)
    
    except:
        description =  soup.find(
                'div', class_='cw-tabs__body container').find(
                'article', id='descriptions').find(
                'section', class_='descriptions__content cw-tabs__content--left').find(
                'p')
        text_list = []
        text = description.get_text()
        text_list.append(text)
    
    else: 
        pass
    return text_list
    

In [5]:
# def scrape_toy_description(page_source):
#     soup = BeautifulSoup(page_source, 'lxml')
#     descriptions =  soup.find(
#             'div', class_='cw-tabs__body container').find(
#             'article', id='descriptions').find(
#             'section', class_='descriptions__content cw-tabs__content--left').find_all(
#             'p')
#     text_list = []
#     for description in descriptions:
#         text = description.get_text()
#         text_list.append(text)
#     return text_list

In [6]:
def scrape_toy_keybenefits(page_source):
    soup = BeautifulSoup(page_source, 'lxml')
    ul = soup.find(
        'div', class_='cw-tabs__body container').find(
        'article', id='descriptions').find(
        'section', class_='descriptions__content cw-tabs__content--left').find(
        'ul')
    lis = ul.find_all('li')
    text_list = []
    for li in lis:
        text = li.get_text()
        text_list.append(text)

#             If you want each key benefit to be in its own list run this instead 
#             text_item = []
#             text = li.get_text()
#             text_item.append(text)
#             text_list.append(text_item)

    return text_list

In [7]:
def scrape_toy_rating(page_source):
    soup = BeautifulSoup(page_source, 'lxml')
    picture = soup.find(
        'div', class_='product-header-extras').find(
        'div', class_='ugc ugc-head').find(
        'picture')
    img = picture.find('img') # How do I access the img and then the stuff inside the img? 
    rating = img['src']
    return rating[-7:-4] # Grabbing the number itself from the 'src' attribute 

In [8]:
def scrape_toy_reviews(page_source): 
    soup = BeautifulSoup(page_source, 'lxml')
    reviews = soup.find_all('span', class_='ugc-list__review__display')
#     print(len(reviews))
#     print(reviews[0].get_text())
    text_list = []
    for review in reviews:
        review.get_text()
        text_list.append(review)
    return text_list

# Need to figure out the best ways to get all the reviews 

In [9]:
def scrape_toy(page_source):
    # Getting elements off page
    toy_dict = {}
    
    # toy title
    toy_title = scrape_toy_title(page_source)
    toy_dict['title'] = toy_title

    # toy price 
    toy_price = scrape_toy_price(page_source)
    toy_dict['price'] = toy_price
    
    # toy description 
    toy_description = scrape_toy_description(page_source)
    toy_dict['descriptions'] = toy_description
    
    try:
        # toy key benefits 
        toy_keybenefits = scrape_toy_keybenefits(page_source)
        toy_dict['key_benefits'] = toy_keybenefits
    except:
        pass
    
    # toy rating -- NEEDS FIXING
    toy_rating = scrape_toy_rating(page_source)
    toy_dict['rating'] = toy_rating

    # toy reviews
    toy_reviews = scrape_toy_reviews(page_source)
    toy_dict['reviews'] = toy_reviews
    return toy_dict

In [10]:
def scrape_toy_page(toy_cat_dict, toy_subcat, toy_links): #products
#     # Lopping through all products and scraping
#     toys_links =[]
#     for product in products:
#         link = product.get_attribute('href')
#         toys_links.append(link)

    toy_subcat_dict = {}
    for link in toy_links:
        driver.get(link)
        page_source = driver.page_source
        toy_dict = scrape_toy(page_source)
        toy_subcat_dict[link] = toy_dict

    toy_cat_dict[toy_subcat] = toy_subcat_dict

In [11]:
def number_of_toys(page_source):
    soup = BeautifulSoup(page_source, 'lxml')
    numbers = soup.find_all('span', class_='category-count')
#     print(numbers[0].text)
    subcat_numbers = []
    for span in numbers:
        number = span.text
        subcat_numbers.append(int(number[1:-1]))
    return subcat_numbers

WORK ON ROPE TOY STUFF HERE

In [12]:
# Grabing rope toy numbers since formatting it different
def number_of_rope_toys(page_source):
    soup = BeautifulSoup(page_source, 'lxml')
    numbers = soup.find_all('span', class_='category-count')
    print(numbers[0])

In [13]:
def grab_subcat_links(link, number_of_toys):
    # https://www.chewy.com/b/moderate-2718
    # https://www.chewy.com/b/moderate_c2718_p5
    
    main_href = f'{link[:-5]}_c{link[-4:]}_p'    
    subcat_pages = []
    subcat_pages.append(link)
    for i in range(2, round(number_of_toys / 36)+1):
        href = f'{main_href}{i}'
        subcat_pages.append(href)
    return subcat_pages

AND HERE

In [None]:
# just for Rope and Toys since there are no subcategories in this category
# def grab_all_ropetoy(link, number of toys:)

In [15]:
def get_links(page_source):
    soup = BeautifulSoup(page_source, 'lxml')
    subcats = soup.find_all('a', class_='facet_selection')
    links_list = []
    for subcat in subcats:
        link = subcat['href']
        full_link = f'https://www.chewy.com{link}'
        links_list.append(full_link)
    return links_list

In [16]:
def grab_toy_links(subcat_pages):
    toys_links =[]
    for page in subcat_pages:
        driver.get(page)
        products = driver.find_elements_by_class_name('product')
        # Lopping through all products on first page 
        for product in products:
            link = product.get_attribute('href')
            toys_links.append(link)
    return toys_links

In [17]:
# CHEW TOYS 


DRIVER_PATH = '/Users/haleytaft/Downloads/chromedriver'
driver = webdriver.Chrome( executable_path=DRIVER_PATH) 
original_link = "https://www.chewy.com/b/toys-315"
driver.get(original_link)

# To first just look at CHEW TOYS
chew_toys_link = driver.find_element_by_link_text('Chew Toys')
chew_toys_link.click()

# Defining a larger dictionary to hold subcat dictionaries
chew_toys = {}

# Going to MODERATE chew toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Moderate")))
element.click()

# Checking for number of toys in each subcategory (looking at side bar)
cat_page_source = driver.page_source
chew_numbers = number_of_toys(cat_page_source)

# Getting all first page links for each subcategory
chew_links = get_links(cat_page_source)
print(chew_links)

# Getting links for all pages for moderate toys 
mod_pages_links = grab_subcat_links(chew_links[0], chew_numbers[0])
all_moderate_links = grab_toy_links(mod_pages_links)

# Collecting all MODERATE chew toys 
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Chew Toys")))
scrape_toy_page(chew_toys, 'moderate', all_moderate_links)

# Back to Chew Toys
driver.get('https://www.chewy.com/b/chew-toys-316')

print('Done with Moderate Toys')

################################################################################################

# To get to TOUGH chew toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Tough")))
element.click()

# Getting links for all pages for tough toys 
tough_pages_links = grab_subcat_links(chew_links[1], chew_numbers[1])
print(tough_pages_links)
all_tough_links = grab_toy_links(tough_pages_links)

# Collecting all TOUGH chew toys 
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Chew Toys")))
scrape_toy_page(chew_toys, 'tough', all_tough_links)

#To get back to Chew Toys
driver.get('https://www.chewy.com/b/chew-toys-316')

print("Done with Tough Toys")

################################################################################################
# To get to EXTREME chew toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Extreme")))
element.click()

# Getting links for all pages for extreme toys 
extreme_pages_links = grab_subcat_links(chew_links[2], chew_numbers[2])
all_extreme_links = grab_toy_links(extreme_pages_links)

# To look at the extreme chew toys 
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Chew Toys")))
scrape_toy_page(chew_toys, 'extreme', all_extreme_links)

print('Done with Extreme Toys and Chew Toys')

KeyboardInterrupt: 

In [283]:
chew_toy_list = []
for subcat in ['moderate', 'tough', 'extreme']:
    for index, link in enumerate(chew_toys[subcat]):
        chew_toys[subcat][link]['subcat'] = subcat
        chew_toys[subcat][link]['cat'] = 'chew toys'
        chew_toy_list.append(chew_toys[subcat][link])
chew_toy_df = pd.DataFrame(chew_toy_list)

In [289]:
# convert chew toy data frame to csv -- uncomment to rerun 
# chew_toy_df.to_csv('./data/chewtoy_df.csv', index=False)

In [24]:
plush_toys = {}

99

In [39]:
# PlUSH TOYS

DRIVER_PATH = '/Users/haleytaft/Downloads/chromedriver'
driver = webdriver.Chrome( executable_path=DRIVER_PATH) 
driver.get("https://www.chewy.com/b/toys-315")

# To first just look at CHEW TOYS
chew_toys_link = driver.find_element_by_link_text('Plush Toys')
chew_toys_link.click()

plush_toys = {}

# Looking a the Stuffed toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Stuffed Toys")))
element.click()

# Checking for number of toys in each subcategory (looking at side bar)
cat_page_source = driver.page_source
plush_numbers = number_of_toys(cat_page_source)

# Getting all first page links for each subcategory
plush_links = get_links(cat_page_source)
print(plush_links)

# Getting links for all pages for stuffed toys 
stuffed_pages_links = grab_subcat_links(plush_links[0], plush_numbers[0])
print(len(stuffed_pages_links))
all_stuffed_links = grab_toy_links(stuffed_pages_links)
print(len(all_stuffed_links))

# To look at the STUFFED plush toys 
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Plush Toys")))

# # Scraping
plush_toys_1 = {}
scrape_toy_page(plush_toys_1, 'stuffed', all_stuffed_links[:100])
print('Done with 1st round')
plush_toys_2 = {}
scrape_toy_page(plush_toys_2, 'stuffed', all_stuffed_links[101:200])
print('Done with 2nd round')
plush_toys_3 = {}
scrape_toy_page(plush_toys_3, 'stuffed', all_stuffed_links[201:300])
print('Done with 3rd round')
plush_toys_4 = {}
scrape_toy_page(plush_toys_4, 'stuffed', all_stuffed_links[301:400])
print('Done with 4th round')
plush_toys_5 = {}
scrape_toy_page(plush_toys_5, 'stuffed', all_stuffed_links[401:500])
print('Done with 5th round')
plush_toys_6 = {}
scrape_toy_page(plush_toys_6, 'stuffed', all_stuffed_links[501:600])
print('Done with 6th round')
plush_toys_7 = {}
scrape_toy_page(plush_toys_7, 'stuffed', all_stuffed_links[601:700])
print('Done with 7th round')
plush_toys_8 = {}
scrape_toy_page(plush_toys_8, 'stuffed', all_stuffed_links[701:800])
print('Done with 8th round')
plush_toys_9 = {}
scrape_toy_page(plush_toys_9, 'stuffed', all_stuffed_links[801:900])
print('Done with 9th round')
plush_toys_10 = {}
scrape_toy_page(plush_toys_10, 'stuffed', all_stuffed_links[901:1000])
print('Done with 10th round')
plush_toys_11 = {}
scrape_toy_page(plush_toys_11, 'stuffed', all_stuffed_links[1001:1100])
print('Done with 11th round')
plush_toys_12 = {}
scrape_toy_page(plush_toys_12, 'stuffed', all_stuffed_links[1101:1200])
print('Done with 12th round')
plush_toys_13 = {}
scrape_toy_page(plush_toys_13, 'stuffed', all_stuffed_links[1201:1300])
print('Done with 13th round')
plush_toys_14 = {}
scrape_toy_page(plush_toys_14, 'stuffed', all_stuffed_links[1301:1403])
print('Done with 14th round')
    
#To get back to Chew Toys
driver.get('https://www.chewy.com/b/plush-toys-320')

print("Done with Stuffed subcategory!")

# ##########################################################################################################

# Looking a the Unstuffed toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Unstuffed Toys")))
element.click()

# Getting links for all pages for unstuffed toys 
unstuffed_pages_links = grab_subcat_links(plush_links[1], plush_numbers[1])
print(unstuffed_pages_links)
all_unstuffed_links = grab_toy_links(unstuffed_pages_links)
print(len(all_unstuffed_links))

# To look at the unstuffed plush toys 
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Plush Toys")))

# Scraping 
plush_toys_15 = {}
scrape_toy_page(plush_toys, 'unstuffed', all_unstuffed_links[:50])
print("Done with 1st unstuffed toys")
plush_toys_16 = {}
scrape_toy_page(plush_toys, 'unstuffed', all_unstuffed_links[51:100])
print("Done with 2nd unstuffed toys")
plush_toys_17 = {}
scrape_toy_page(plush_toys, 'unstuffed', all_unstuffed_links[101:173])
print("Done with 3rd unstuffed toys")

print("Done with Unstuffed subcategory!")

print('Done with Plush category!')


['https://www.chewy.com/b/stuffed-toys-2333', 'https://www.chewy.com/b/unstuffed-toys-2334']
39
1403
Done with 1st round
Done with 2nd round


KeyboardInterrupt: 

In [None]:
plush_toy_list = []
for subcat in ['stuffed', 'unstuffed']:
    for index, link in enumerate(plush_toys[subcat]):
        plush_toys[subcat][link]['subcat'] = subcat
        plush_toys[subcat][link]['cat'] = 'plush toys'
        plush_toy_list.append(plush_toys[subcat][link])
plush_toy_df = pd.DataFrame(plush_toy_list)

In [None]:
plush_toy_df

In [None]:
plush_toy_df.to_csv('./data/plushtoy_df.csv')

In [18]:
# plush_df_list = []
# for cat in plush_toys:
#     for toy in cat:
#         plush_df_list.append(toy)
# plush_df = pd.DataFrame(plush_df_list)
# plush_df

Unnamed: 0,0,1,2,3,4
0,KONG Cozie Marvin the Moose Plush Dog Toy,$4.99,"[The KONG Cozies are cute, soft and cuddly plu...",[Soft and cuddly plush toys made with an extra...,[[I bought this as a soft cuddly toy for our n...
1,KONG Cozie Baily the Blue Dog Toy,$5.82,"[The KONG Cozies are cute, soft and cuddly plu...",[Soft and cuddly plush toys made with an extra...,[[My 20 month old Doberman Shepherd pup LOVES ...
2,Frisco Plush with Rope Squeaking Monkey Dog Toy,$5.98,"[Toss, squeak, repeat—bring on every game with...",[Plush monkey dog toy features a built-in sque...,[[We got one of these a year ago for our new p...
3,KONG Floppy Knots Dog Toy,$9.49,[Help your dog spend his boundless reserves of...,[Dog toy made with internal knotted rope and s...,[[I got this for my 10 week old Dane puppy as ...
4,Multipet Lamb Chop Squeaky Plush Dog Toy,$5.99,"[Nostalgic for you and big, adorable fun for y...","[Super plush dog toy., Classic character will ...",[[I love the ease and convenience of shopping ...
...,...,...,...,...,...
67,Pet Qwerks Krinkle & Squeak Newspaper Stuffing...,$5.03,"[If you think squeak toys are old news, you ha...",[Designed with crinkle paper that creates an e...,[[Our Jack loves this toy. He is a very aggres...
68,"Rocket & Rex Assorted Dog Toys, 6 count",$19.99,[Rocket & Rex Assorted Dog Toys provide your f...,[Assorted pack of toys to provide your furry f...,[[Nice variety of toys with different textures...
69,Ethical Pet Skinneeez Giraffe Stuffing-Free Sq...,$6.29,[Skinneeez stuffing free dog toy satisfies a d...,"[Stuffing free dog toy, Endless fun no stuffin...",[[Love this little giraffe. It's wonderful NO...
70,Multipet Loofa Floppy Light-Weight Squeaky Stu...,$5.71,"[All of the fun, none of the mess! Now pups ge...","[Super cuddly, plush dog toy with a squeaker, ...",[[My pup automatically loved this toy the minu...


In [42]:
# FETCH TOYS 

DRIVER_PATH = '/Users/haleytaft/Downloads/chromedriver'
driver = webdriver.Chrome( executable_path=DRIVER_PATH) 
driver.get("https://www.chewy.com/b/toys-315")

# To first just look at CHEW TOYS
chew_toys_link = driver.find_element_by_link_text('Fetch Toys')
chew_toys_link.click()

fetch_toys = {}

# Looking a the Balls toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Balls")))
element.click()

# Checking for number of toys in each subcategory (looking at side bar)
cat_page_source = driver.page_source
fetch_numbers = number_of_toys(cat_page_source)

# Getting all first page links for each subcategory
fetch_links = get_links(cat_page_source)
print(fetch_links)

# Getting links for all pages for ball toys 
ball_pages_links = grab_subcat_links(fetch_links[0], fetch_numbers[0])
all_ball_links = grab_toy_links(ball_pages_links)

# To look at the ball fetch toys -- NEED TO FIGURE OUT HOW TO ACCESS THEM
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Fetch Toys")))

# Scraping
scrape_toy_page(fetch_toys, 'balls', all_ball_links)

driver.get('https://www.chewy.com/b/fetch-toys-317')

print('Done with Balls subcategory!')

############################################################################################

# Looking a the Discs toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Discs")))
element.click()

# Getting links for all pages for disc toys 
disc_pages_links = grab_subcat_links(fetch_links[1], fetch_numbers[1])
all_disc_links = grab_toy_links(disc_pages_links)

# To look at the disc fetch toys 
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Fetch Toys")))

# Scraping
scrape_toy_page(fetch_toys, 'discs', all_disc_links)

driver.get('https://www.chewy.com/b/fetch-toys-317')

print("Done with Disc subcategory!")

############################################################################################

# # Looking a the Launcher toys
# element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Launchers")))
# element.click()

# # Getting links for all pages for launchers toys 
# launcher_pages_links = grab_subcat_links(fetch_links[2], fetch_numbers[2])
# all_launcher_links = grab_toy_links(launcher_pages_links)

# # To look at the launcher fetch toys 
# check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Fetch Toys")))

# # Scraping
# scrape_toy_page(fetch_toys, 'launchers', all_launcher_links)

# driver.get('https://www.chewy.com/b/fetch-toys-317')

# print('Done with Launchers subcategory!')

############################################################################################

# Looking a the Stick toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Sticks")))
element.click()

# Getting links for all pages for moderate toys 
stick_pages_links = grab_subcat_links(fetch_links[2], fetch_numbers[2])
all_stick_links = grab_toy_links(stick_pages_links)

# To look at the stick fetch toys 
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Fetch Toys")))

# Scraping
scrape_toy_page(fetch_toys, 'sticks', all_stick_links)

print("Done with Sticks subcategory!")

print('Done with Fetch category!')


['https://www.chewy.com/b/balls-2329', 'https://www.chewy.com/b/discs-2330', 'https://www.chewy.com/b/sticks-2332']
Done with Balls subcategory!
Done with Disc subcategory!
Done with Sticks subcategory!
Done with Fetch category!


In [44]:
fetch_toy_list = []
for subcat in ['balls', 'discs', 'sticks']: #'launchers', 
    for index, link in enumerate(fetch_toys[subcat]):
        fetch_toys[subcat][link]['subcat'] = subcat
        fetch_toys[subcat][link]['cat'] = 'fetch toys'
        fetch_toy_list.append(fetch_toys[subcat][link])
fetch_toy_df = pd.DataFrame(fetch_toy_list)

In [45]:
fetch_toy_df

Unnamed: 0,title,price,descriptions,key_benefits,rating,reviews,subcat,cat
0,Pet Qwerks Puppy Puck Dog Toy,$6.96,[Let the games begin with the Pet Qwerks Puppy...,[Durable dog toy is great for both indoor and ...,1_8,[[He absolutely adores this puck. We take him ...,balls,fetch toys
1,KONG Classic Flyer Frisbee Dog Toy,$10.99,[The KONG Flyer is the best soft rubber disc o...,[Made out of KONG Classic durable rubber for a...,4_3,[[We've tried a few different frisbees and thi...,discs,fetch toys
2,"Chuckit! Flying Squirrel Dog Toy, Color Varies",$8.69,[As if chasing a running squirrel wasn't enoug...,"[Multilayer construction for added durability,...",4_3,[[We have a 7 yr. old Australian Shepherd who ...,discs,fetch toys
3,Chuckit! Paraflight Flyer Dog Toy,$10.56,"[It’s a bird, it’s a plane, no it’s the Chucki...","[Multilayer construction for added durability,...",4_3,[[this Frisbee has held up for months through ...,discs,fetch toys
4,"KONG Puppy Flyer Dog Toy, Color Varies",$9.99,[The Puppy Flyer Dog Toy is perfectly sized fo...,[Perfect size for teaching your puppy how to p...,4_2,"[[This was a gift for my ""Grand-dog"" LuLu....b...",discs,fetch toys
...,...,...,...,...,...,...,...,...
248,"Jax and Bones Elton the Octopus Rope Dog Toy, ...",$16.50,[Give your dog a toy you can trust with the na...,"[Made from 100% eco-friendly, chemical-free an...",3_1,[[It is small but fits a puppy to play and che...,sticks,fetch toys
249,Carolina Pet Sherpa Bone Dog Pillow Toy,$24.99,[Give your dog a fluffy bone with the Carolina...,[Fun and functional pillow is shaped like your...,5_0,[[Saki absolutely loves her huge stuffed bone!...,sticks,fetch toys
250,"Tuffy's Jr Bone2 Squeaky Plush Dog Toy, Pink L...",$12.47,[Tuffy’s Jr Bone2 Dog Toy is designed for smal...,[Durable and paw-fect for interactive play lik...,3_5,[[This is one of 3 Tuffy toys we purchased for...,sticks,fetch toys
251,"Jax and Bones Kramer the Crab Rope Dog Toy, Small",$15.00,[Give your dog a toy you can trust with the na...,"[Made from 100% eco-friendly, chemical-free an...",1_0,"[[This is not a good buy for the money, in my ...",sticks,fetch toys


In [46]:
fetch_toy_df.to_csv('./data/fetchtoy_df.csv')

In [21]:
# fetch_df_list = []
# for cat in fetch_toys:
#     for toy in cat:
#         fetch_df_list.append(toy)
# fetch_df = pd.DataFrame(fetch_df_list)
# fetch_df

Unnamed: 0,0,1,2,3,4
0,Chuckit! Ultra Rubber Ball Tough Dog Toy,$5.53,"[Trees, rocks, earth...it doesn't matter what ...",[High bouncing balls that can be used in the w...,[[My dog LOVES chewing his toys and tearing th...
1,As Seen on TV Wobble Wag Giggle Ball Squeaky D...,$11.99,[Wobble Wag Giggle Ball Dog Toy is an interact...,"[Makes fun giggle sounds when moved around, Ke...",[[... ours would say “new favorite toy”! If yo...
2,"KONG Squeezz Ball Dog Toy, Color Varies",$3.49,[The KONG Squeezz Ball has a recessed squeaker...,[Perfect for games of fetch because it's easy ...,[[Our Beagle Casey loves this green ball. I a...
3,KONG Squeakair Balls Packs Dog Toy,$2.19,[The KONG AirDog Squeakair Ball combines two c...,[Combines benefits of traditional tennis ball ...,[[This is the third sized ball I have tried wi...
4,Frisco Fetch Squeaking Colorful Tennis Ball Do...,$4.29,"[Whether you’re playing singles or doubles, th...",[Set of tennis fetch balls provide bouncing ac...,[[These balls are among my black lab’s favorit...
...,...,...,...,...,...
139,KONG Core Strength Bone Dog Toy,$11.99,[KONG’s Core Strength Bone is specially design...,[Comes in more than one size so you can choose...,[[This quickly became my doberman’s favorite t...
140,Nylabone Strong Chew Camo Bone Duck Flavored D...,$7.79,[Help fulfill your dog’s natural chewing insti...,[Made with natural rubber that’s durable but h...,[[This is our fourth of this line of rubber Du...
141,"Multipet Chilly Bone Dog Chew Toy, Color Varies",$4.21,[Multipet Chilly Bones are freezable and great...,[Freezable and great for teething puppies or c...,[[I am giving this product one star. While it ...
142,Pet Qwerks Dinosaur BarkBone Bacon Flavor Toug...,$13.14,[Give your furry friend a plaything to stimula...,[Bone toy infused with real bacon flavoring to...,[[I LOVE these - I bought 3 for my four big ch...


ROPE TOYS ARE A WORK IN PROGRESS -- maybe just run all the functions in here bc you only do it all once?

In [23]:
# ROPE & TUG TOYS

DRIVER_PATH = '/Users/haleytaft/Downloads/chromedriver'
driver = webdriver.Chrome( executable_path=DRIVER_PATH) 
driver.get("https://www.chewy.com/b/toys-315")

# To first just look at CHEW TOYS
chew_toys_link = driver.find_element_by_link_text('Rope & Tug Toys')
chew_toys_link.click()

rope_tug_toys = {}

# To look at the rope & tug toys -- NEED TO FIGURE OUT HOW TO ACCESS THEM
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Toys")))

products = driver.find_elements_by_class_name('product')
print(len(products))

scrape_toy_page(rope_tug_toys, products)



36


In [25]:
rope_tug_df_list = []
for cat in rope_tug_toys:
    for toy in cat:
        rope_tug_df_list.append(toy)
rope_tug_df = pd.DataFrame(rope_tug_df_list)
rope_tug_df

Unnamed: 0,0,1,2,3,4
0,Frisco Plush with Rope Squeaking Monkey Dog Toy,$5.98,"[Toss, squeak, repeat—bring on every game with...",[Plush monkey dog toy features a built-in sque...,[[We got one of these a year ago for our new p...
1,KONG Floppy Knots Dog Toy,$9.49,[Help your dog spend his boundless reserves of...,[Dog toy made with internal knotted rope and s...,[[I got this for my 10 week old Dane puppy as ...
2,Frisco Rope with Squeaking Ball Dog Toy,$5.88,[Get ready for some high-flying fun with the F...,"[Built-in, extra-loud squeaker in the ball is ...",[[Received this Toy yesterday. As a toy that h...
3,Frisco Rope with Double Loop Dog Toy,$5.00,[Let the games begin with the Frisco Rope Doub...,[Ideal for everyday play and made of a cotton-...,"[[I just received this yesterday, this morning..."
4,USA Bones & Chews Cotton Rope with Hooves Dog ...,$8.98,"[For the ultimate play time, the USA Bones & C...",[Offers a chewing challenge and a tug toy all ...,[[We needed something to occupy our 8 month ol...
5,USA Bones & Chews Cotton Rope with Bones Dog T...,$8.98,"[For the ultimate play time, the Rope & Bones ...",[Offers a chewing challenge and a tug toy all ...,[[I really wish that the description of this i...
6,Frisco Rope Small to Medium Assorted Dog Toys,$9.98,"[Let the games begin with these knotty, twisty...",[Assorted rope toy set provides the playtime v...,"[[While the price point is quite good, these t..."
7,Otterly Pets Assorted Medium to Large Rope Dog...,$17.95,[Watch as your pup’s tail wags with excitement...,"[Durable dog toys are made from a strong rope,...",[[Fantastic set of tough rope toys for our Gre...
8,Frisco Plush with Inside Rope Squeaking Cow Do...,$7.98,"[Toss, squeak, repeat—bring on every game with...",[Plush cow dog toy features a built-in squeake...,[[I have tried so many dog toys and nothing co...
9,"KONG Tugga Wubba Dog Toy, Color Varies",$7.89,"[The KONG Tugga Wubba is a fun, interactive tu...","[Fun, interactive tug and toss toy brings fun ...",[[I bought this toy for my rescue lab and he l...


In [51]:
# INTERACTIVE TOYS

DRIVER_PATH = '/Users/haleytaft/Downloads/chromedriver'
driver = webdriver.Chrome( executable_path=DRIVER_PATH) 
driver.get("https://www.chewy.com/b/toys-315")

# To first just look at CHEW TOYS
chew_toys_link = driver.find_element_by_link_text('Interactive Toys')
chew_toys_link.click()

interactive_toys = {}

# The interactive toys
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Treat Toys & Dispensers")))
element.click()

# Checking for number of toys in each subcategory (looking at side bar)
cat_page_source = driver.page_source
interactive_numbers = number_of_toys(cat_page_source)

# Getting all first page links for each subcategory
interactive_links = get_links(cat_page_source)
print(interactive_links)

# Getting links for all pages for treat toys & dispensers
dispenser_pages_links = grab_subcat_links(interactive_links[0], interactive_numbers[0])
all_dispenser_links = grab_toy_links(dispenser_pages_links)

# To look at the dog treat toys & dispenser interactive toys
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Interactive Toys")))

# Scraping
scrape_toy_page(interactive_toys, 'treat toys & dispensers', all_dispenser_links)

driver.get('https://www.chewy.com/b/interactive-toys-319')

print('Done with Treat Toys & Dispensers subcategory')

############################################################################################################

# Treat Dispenser Refills
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Treat Dispenser Refills")))
element.click()

# Getting links for all pages for treat toys & refills
refills_pages_links = grab_subcat_links(interactive_links[1], interactive_numbers[1])
all_refills_links = grab_toy_links(refills_pages_links)

# To look at the dog treat dispensers refills interactive toys
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Interactive Toys")))

# Scraping
scrape_toy_page(interactive_toys, 'treat dispenser refills', all_refills_links)

driver.get('https://www.chewy.com/b/interactive-toys-319')

print('Done with Treat Dispenser Refills subcategory!')

########################################################################################################

# Puzzle toys and Games 
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Puzzle Toys & Games")))
element.click()

# Getting links for all pages for puzzle toys & games
game_pages_links = grab_subcat_links(interactive_links[2], interactive_numbers[2])
all_game_links = grab_toy_links(game_pages_links)

# To look at the dog puzzle toys & games
check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Interactive Toys")))

# Scraping
scrape_toy_page(interactive_toys, 'puzzle toys & games', all_game_links)

driver.get('https://www.chewy.com/b/interactive-toys-319')

print('Done with Puzzle Toys & Games')

#########################################################################################################

# # Automatic Ball Launchers
# element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Automatic Ball Launchers")))
# element.click()

# # Getting links for all pages for automatic ball launchers
# auto_pages_links = grab_subcat_links(interactive_links[3], interactive_numbers[3])
# all_auto_links = grab_toy_links(auto_pages_links)

# # To look at the dog automatic ball launchers
# check = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "Interactive Toys")))

# interactive_toys_2 = {}

# # Scraping
# scrape_toy_page(interactive_toys_2, 'automatic ball launchers', all_auto_links)

# print("Done with Automatic Ball Launchers subcategory!")

print("Done with Interactive Toys category!")

['https://www.chewy.com/b/treat-toys-dispensers-2336', 'https://www.chewy.com/b/treat-dispenser-refills-11139', 'https://www.chewy.com/b/puzzle-toys-games-2335']
Done with Treat Toys & Dispensers subcategory
Done with Treat Dispenser Refills subcategory!
Done with Puzzle Toys & Games
Done with Interactive Toys category!


In [50]:
interactive_toys

{}

In [None]:
# interactive_toys_2

In [53]:
interactive_toy_list = []
for subcat in ['treat toys & dispensers', 'treat dispenser refills', 'puzzle toys & games']: #, 'automatic ball launchers'
    for index, link in enumerate(interactive_toys[subcat]):
        interactive_toys[subcat][link]['subcat'] = subcat
        interactive_toys[subcat][link]['cat'] = 'interactive toys'
        interactive_toy_list.append(interactive_toys[subcat][link])
interactive_toy_df = pd.DataFrame(interactive_toy_list)

In [54]:
# interactive_df_list = []
# for cat in interactive_toys:
#     for toy in cat:
#         interactive_df_list.append(toy)
# interactive_df = pd.DataFrame(interactive_df_list)
# interactive_df

In [55]:
interactive_toy_df.to_csv('./data/interactivetoy_df.csv')