# Grainger - Category Chart 

Code developed to build the category chart for the Fastenal website. The objective is to collect all of the urls for each one of the product categories and subcategories and their names so we can build the hierarchy and also use the urls for the data scraping of all of the part numbers 

## Import all dependencies and start selenium 

We are using selenium and BeautifulSoup for the scraping process

In [1]:
#Finding the location of chromedriver.exe
#https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [2]:
# Import your newly installed selenium package
from selenium import webdriver
from splinter import Browser
from selenium.webdriver.common.keys import Keys
import time
time.sleep(3)
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup


# Now create an 'instance' of your driver
WebDriver = {'executable_path': 'chromedriver'}

# A new Chrome (or other browser) window should open up
browser = Browser('chrome', **WebDriver, headless=False)

# Defining width and height of the browser
browser.driver.set_window_size(1750, 1250)

#For Mac users - Opening the targer url
#driver.get("https://www.fastenal.com/product/abrasives/coated-and-non-woven-abrasives/fiber-and-sanding-discs/609478?categoryId=609478&level=3&isExpanded=true&productFamilyId=26373&view=2")

#For Windows users - Opening the target url
url ='https://www.grainger.com/category?analytics=nav'
browser.visit(url)

## Start the browser and making the soup 

In [3]:
#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

## Initialize searches 

### First we need to create the searches for the initial page that contains all of the Product categories  

#### All Products

Inspecting the website, we found that the Product category information is in a div inside the class 'col-xs-12 category-list margin--top-0', and it is consistent across all the pages, so we can create a function that gives us access to that div initially 

In [5]:
#access the urls in the HTML to create the url list for All Products
products = soup.findAll('ul',class_='categories__list')
products

#Create an empty list to hold all the hrefs
all_products_urls=[]
#For loop to extract all href from a tags
for i in (products[0]('a')):
    #access each url
    link=i['href']
    #append the urls found to the empty list we created
    all_products_urls.append(link)
    #print (products)
#display the results
all_products_urls

['/category/abrasives',
 '/category/adhesives-sealants-and-tape',
 '/category/cleaning-and-janitorial',
 '/category/electrical',
 '/category/electronics-appliances-and-batteries',
 '/category/fasteners',
 '/category/fleet-and-vehicle-maintenance',
 '/category/furniture-hospitality-and-food-service',
 '/category/hvac-and-refrigeration',
 '/category/hardware',
 '/category/hydraulics',
 '/category/lab-supplies',
 '/category/lighting',
 '/category/lubrication',
 '/category/machining',
 '/category/material-handling',
 '/category/motors',
 '/category/office-supplies',
 '/category/outdoor-equipment',
 '/category/paint-equipment-and-supplies',
 '/category/plumbing',
 '/category/pneumatics',
 '/category/power-transmission',
 '/category/pumps',
 '/category/raw-materials',
 '/category/reference-and-learning-supplies',
 '/category/safety',
 '/category/security',
 '/category/test-instruments',
 '/category/tools',
 '/category/welding']

In [6]:
#Url to extract Safety categories
url2 ='https://www.grainger.com/category/safety'
browser.visit(url2)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [7]:
#access the urls in the HTML to create the url list for Facility identification sub category
facilityid = soup.findAll('ul',class_='categories__list')
facilityid
#Create an empty list to hold all the hrefs
facility_id_urls=[]
#For loop to extract all href from a tags
for i in (facilityid[0]('a')):
    #access each url
    products=i['href']
    #append the urls found to the empty list we created
    facility_id_urls.append(products)
    #print (products)
#display the results
facility_id_urls

['/category/safety/arc-flash-protection',
 '/category/safety/asbestos-abatement',
 '/category/safety/confined-space-equipment',
 '/category/safety/disposable-and-chemical-resistant-clothing',
 '/category/safety/emt-and-rescue-supplies',
 '/category/safety/emergency-eye-wash-and-shower-equipment',
 '/category/safety/emergency-preparedness',
 '/category/safety/emergency-and-exam-room-supplies',
 '/category/safety/ergonomics',
 '/category/safety/exam-room-furnishings',
 '/category/safety/eye-protection-and-accessories',
 '/category/safety/face-protection',
 '/category/safety/fall-protection',
 '/category/safety/fire-fighting-clothing-and-accessories',
 '/category/safety/fire-protection',
 '/category/safety/first-aid-and-wound-care',
 '/category/safety/flame-resistant-and-arc-flash-clothing',
 '/category/safety/footwear-and-footwear-accessories',
 '/category/safety/gas-detection',
 '/category/safety/gloves-and-hand-protection',
 '/category/safety/head-protection',
 '/category/safety/hearin

In [None]:
#Url to extract hand protection categories
url4 ='https://www.fastenal.com/product/safety/hand-and-arm-protection/600681?categoryId=600681&level=2&isExpanded=true'
browser.visit(url4)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
#access the urls in the HTML to create the url list for hand protection sub category
handpro = soup.findAll('div',class_='col-xs-12 category-list category-body')
handpro
#Create an empty list to hold all the hrefs
hand_protection_urls=[]
#For loop to extract all href from a tags
for i in (handpro[0]('a')):
    #access each url
    products=i['href']
    #append the urls found to the empty list we created
    hand_protection_urls.append(products)
    #print (products)
#display the results
hand_protection_urls

In [None]:
#Url to extract work wear categories
url5 ='https://www.fastenal.com/product/safety/work-wear/613935?categoryId=613935&level=2&isExpanded=true'
browser.visit(url5)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
#access the urls in the HTML to create the url list for work wear sub category
workwear = soup.findAll('div',class_='col-xs-12 category-list category-body')
workwear
#Create an empty list to hold all the hrefs
work_wear_urls=[]
#For loop to extract all href from a tags
for i in (workwear[0]('a')):
    #access each url
    products=i['href']
    #append the urls found to the empty list we created
    work_wear_urls.append(products)
    #print (products)
#display the results
work_wear_urls

In [None]:
#Url to extract matting categories
url6 ='https://www.fastenal.com/product/safety/matting/600789?categoryId=600789&level=2&isExpanded=true'
browser.visit(url6)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
#access the urls in the HTML to create the url list for matting sub category
matting = soup.findAll('div',class_='col-xs-12 category-list category-body')
matting
#Create an empty list to hold all the hrefs
matting_urls=[]
#For loop to extract all href from a tags
for i in (matting[0]('a')):
    #access each url
    products=i['href']
    #append the urls found to the empty list we created
    matting_urls.append(products)
    #print (products)
#display the results
matting_urls