# Grainger - Category Chart 

Code developed to build the category chart for the Fastenal website. The objective is to collect all of the urls for each one of the product categories and subcategories and their names so we can build the hierarchy and also use the urls for the data scraping of all of the part numbers 

## Import all dependencies and start selenium 

We are using selenium and BeautifulSoup for the scraping process

In [None]:
#Finding the location of chromedriver.exe
#https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

In [1]:
# Import your newly installed selenium package
from selenium import webdriver
from splinter import Browser
from selenium.webdriver.common.keys import Keys
import time
time.sleep(3)
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup


# Now create an 'instance' of your driver
WebDriver = {'executable_path': 'chromedriver'}

# A new Chrome (or other browser) window should open up
browser = Browser('chrome', **WebDriver, headless=False)

# Defining width and height of the browser
browser.driver.set_window_size(1750, 1250)

#For Mac users - Opening the targer url
#driver.get("https://www.fastenal.com/product/abrasives/coated-and-non-woven-abrasives/fiber-and-sanding-discs/609478?categoryId=609478&level=3&isExpanded=true&productFamilyId=26373&view=2")

#For Windows users - Opening the target url
url ='https://www.grainger.com/category?analytics=nav'
browser.visit(url)

## Start the browser and making the soup 

In [2]:
#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

## Initialize searches 

### First we need to create the searches for the initial page that contains all of the Product categories  

#### All Products

Inspecting the website, we found that the Product category information is in a div inside the class 'col-xs-12 category-list margin--top-0', and it is consistent across all the pages, so we can create a function that gives us access to that div initially 

In [3]:
#access the urls in the HTML to create the url list for All Products
products = soup.findAll('ul',class_='categories__list')
products

#Create an empty list to hold all the hrefs
all_products_urls=[]
#For loop to extract all href from a tags
for i in (products[0]('a')):
    #access each url
    link=i['href']
    #append the urls found to the empty list we created
    all_products_urls.append(link)
    #print (products)
#display the results
all_products_urls

['/category/abrasives',
 '/category/adhesives-sealants-and-tape',
 '/category/cleaning-and-janitorial',
 '/category/electrical',
 '/category/electronics-appliances-and-batteries',
 '/category/fasteners',
 '/category/fleet-and-vehicle-maintenance',
 '/category/furniture-hospitality-and-food-service',
 '/category/hvac-and-refrigeration',
 '/category/hardware',
 '/category/hydraulics',
 '/category/lab-supplies',
 '/category/lighting',
 '/category/lubrication',
 '/category/machining',
 '/category/material-handling',
 '/category/motors',
 '/category/office-supplies',
 '/category/outdoor-equipment',
 '/category/paint-equipment-and-supplies',
 '/category/plumbing',
 '/category/pneumatics',
 '/category/power-transmission',
 '/category/pumps',
 '/category/raw-materials',
 '/category/reference-and-learning-supplies',
 '/category/safety',
 '/category/security',
 '/category/test-instruments',
 '/category/tools',
 '/category/welding']

In [4]:
#Extracting all categories

#Set up an empty list
categories_list=[]
#Find all the text contained in span tags
spans = soup.findAll('span', class_='category-text')
#For loop to iterate through all the list
for span in spans:
    #Get each text from each tag, and strip blank spaces
    categories=span.text.strip()
    #Append each list to the general list
    categories_list.append(categories)
#Display results
categories_list

['Abrasives',
 'Adhesives, Sealants and Tape',
 'Cleaning and Janitorial',
 'Electrical',
 'Electronics, Appliances, and Batteries',
 'Fasteners',
 'Fleet and Vehicle Maintenance',
 'Furniture, Hospitality and Food Service',
 'HVAC and Refrigeration',
 'Hardware',
 'Hydraulics',
 'Lab Supplies',
 'Lighting',
 'Lubrication',
 'Machining',
 'Material Handling',
 'Motors',
 'Office Supplies',
 'Outdoor Equipment',
 'Paint, Equipment and Supplies',
 'Plumbing',
 'Pneumatics',
 'Power Transmission',
 'Pumps',
 'Raw Materials',
 'Reference and Learning Supplies',
 'Safety',
 'Security',
 'Test Instruments',
 'Tools',
 'Welding']

Safety Categories

In [5]:
#For Windows users - Opening the target url
url2 ='https://www.grainger.com/category/safety'
browser.visit(url2)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [6]:
#Extracting all categories
#Set up an empty list
subcategories_list=[]
#Find all the text contained in span tags
spans = soup.findAll('span', class_='category-text')
#spans

#For loop to iterate through all the list
for span in spans:
    #Get each text from each tag, and strip blank spaces
    subcategories=span.text.strip()
    #Append each list to the general list
    subcategories_list.append(subcategories)
#Display results
subcategories_list

['Arc Flash Protection',
 'Asbestos Abatement',
 'Confined Space Equipment',
 'Disposable and Chemical Resistant Clothing',
 'EMT and Rescue Supplies',
 'Emergency Eye Wash and Shower Equipment',
 'Emergency Preparedness',
 'Emergency and Exam Room Supplies',
 'Ergonomics',
 'Exam Room Furnishings',
 'Eye Protection and Accessories',
 'Face Protection',
 'Fall Protection',
 'Fire Fighting Clothing and Accessories',
 'Fire Protection',
 'First Aid and Wound Care',
 'Flame Resistant and Arc Flash Clothing',
 'Footwear and Footwear Accessories',
 'Gas Detection',
 'Gloves and Hand Protection',
 'Head Protection',
 'Hearing Protection',
 'IV Supplies',
 'Identification Products',
 'Lockout Tagout',
 'Matting',
 'Medical Diagnostics and Monitoring Equipment',
 'Medical Oxygen Delivery and Equipment',
 'Noise Control',
 'Portable Coolers and Beverages',
 'Rainwear',
 'Rehabilitation and Durable Medical Equipment',
 'Replacement Parts',
 'Respiratory',
 'Safety Storage',
 'Signs',
 'Spill Con

Gloves and Hand Protection

In [7]:
#For Windows users - Opening the target url
url2 ='https://www.grainger.com/category/safety/gloves-and-hand-protection'
browser.visit(url2)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [8]:
#Extracting all categories
#Set up an empty list
subcategories_list=[]
#Find all the text contained in span tags
spans = soup.findAll('span', class_='category-text')
#spans

#For loop to iterate through all the list
for span in spans:
    #Get each text from each tag, and strip blank spaces
    subcategories=span.text.strip()
    #Append each list to the general list
    subcategories_list.append(subcategories)
#Display results
subcategories_list

['Antistatic Gloves',
 'Arc Flash Gloves',
 'Chemical Resistant Gloves',
 'Coated Gloves',
 'Cold-Condition Gloves',
 'Cryogenic Gloves',
 'Cut-Resistant Gloves',
 'Disposable Gloves',
 'Electrical Glove Accessories',
 'Electrical Gloves',
 'Extrication and Rescue Gloves',
 'Finger Cots',
 'Firefighter Gloves',
 'Flame Retardant Gloves',
 'General Purpose Gloves',
 'Glove Box Gloves',
 'Glove Dispensers',
 'Glove Holder Clips',
 'Hand and Finger Guards',
 'Heat Resistant Gloves',
 'Heated Gloves and Hand Warmers',
 'Impact Resistant and Anti-Vibration Gloves',
 'Inspection Gloves and Glove Liners',
 'Leather Work and Drivers Gloves',
 'Mechanics Gloves',
 'Military, Police and Tactical Gloves',
 'Welding Gloves']

Antistatic Gloves

In [13]:
#For Windows users - Opening the target url
url3 ='https://www.grainger.com/category/safety/gloves-and-hand-protection/antistatic-gloves'
browser.visit(url3)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [14]:
#Extracting all categories
#Set up an empty list
subcategories_list=[]
#Find all the text contained in span tags
spans = soup.findAll('tr', class_='search-table-view__web-parent-table-row')
spans
#spans2=spans[0].attrs['data-sku']
#spans2

[<tr class="search-table-view__web-parent-table-row" data-automated-test="web-parent-table-row--3JFP3" data-breadcrumb-cat-id="" data-context-type="Primary" data-sku="3JFP3">
 <td class="search-table-view__web-parent-table-row-cell " data-automated-test="item--antistaticgloves" headers="WP5668693-header-id-1 WP5668693-subheader-id-1">
                             Antistatic Gloves
                     </td>
 <td class="search-table-view__web-parent-table-row-cell " data-automated-test="cuff--rolled" headers="WP5668693-header-id-2 WP5668693-subheader-id-1">
                             Rolled
                     </td>
 <td class="search-table-view__web-parent-table-row-cell " data-automated-test="color--graywhite" headers="WP5668693-header-id-3 WP5668693-subheader-id-1">
                             Gray/White
                     </td>
 <td class="search-table-view__web-parent-table-row-cell " data-automated-test="size--xs" headers="WP5668693-header-id-4 WP5668693-subheader-id-1">
   

In [15]:
#For loop to iterate through all the list
for span in spans:
    #Get each text from each tag, and strip blank spaces
    #subcategories=spans.attrs['data-sku']
    #Append each list to the general list
    subcategories_list.append(span.attrs['data-sku'])
#Display results
subcategories_list

['3JFP3',
 '9WRN3',
 '3JFP4',
 '8CAW1',
 '3NGY2',
 '19L033',
 '19L034',
 '19L035',
 '19L036',
 '19L037',
 '19L038',
 '19L039',
 '19L040',
 '19L041',
 '19L042',
 '19L043',
 '1DPF5',
 '1DPF6',
 '1DPF7',
 '1DPF8',
 '1DPF9',
 '1DPG1']

Search for SKU and Brand combined. These two values will be used to create the end url to extract all the sku-leve details

In [28]:
brand=[]
sku=[]
for row in soup.findAll('tr',class_='search-table-view__web-parent-table-row'):
    first_column = row.findAll('td')[4].contents[0].strip()
    third_column = row.findAll('button')[0]
    #print (first_column)
    brand.append(first_column)
    sku.append(third_column.text.strip())
brand

['ANSELL',
 'ANSELL',
 'ANSELL',
 'ANSELL',
 'ANSELL',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'CONDOR',
 'HONEYWELL',
 'HONEYWELL',
 'HONEYWELL',
 'HONEYWELL',
 'HONEYWELL',
 'HONEYWELL']

Template to extract all other categories under safety

In [None]:
#For Windows users - Opening the target url
url2 ='https://www.grainger.com/category/safety/gloves-and-hand-protection'
browser.visit(url2)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
#Extracting all categories
#Set up an empty list
subcategories_list=[]
#Find all the text contained in span tags
spans = soup.findAll('span', class_='category-text')
#spans

#For loop to iterate through all the list
for span in spans:
    #Get each text from each tag, and strip blank spaces
    subcategories=span.text.strip()
    #Append each list to the general list
    subcategories_list.append(subcategories)
#Display results
subcategories_list

In [None]:
#Url to extract Safety categories
url2 ='https://www.grainger.com/category/safety'
browser.visit(url2)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
#access the urls in the HTML to create the url list for Facility identification sub category
facilityid = soup.findAll('ul',class_='categories__list')
facilityid
#Create an empty list to hold all the hrefs
facility_id_urls=[]
#For loop to extract all href from a tags
for i in (facilityid[0]('a')):
    #access each url
    products=i['href']
    #append the urls found to the empty list we created
    facility_id_urls.append(products)
    #print (products)
#display the results
facility_id_urls