# Grainger - Category Chart 

Code developed to build the category chart for the Fastenal website. The objective is to collect all of the urls for each one of the product categories and subcategories and their names so we can build the hierarchy and also use the urls for the data scraping of all of the part numbers 

## Import all dependencies and start selenium 

We are using selenium and BeautifulSoup for the scraping process

In [None]:
#Finding the location of chromedriver.exe
#https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

In [1]:
# Import your newly installed selenium package
from selenium import webdriver
from splinter import Browser
from selenium.webdriver.common.keys import Keys
import time
time.sleep(3)
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import pandas as pd

# Now create an 'instance' of your driver
WebDriver = {'executable_path': 'chromedriver'}
# A new Chrome (or other browser) window should open up
browser = Browser('chrome', **WebDriver, headless=False)
# Defining width and height of the browser
browser.driver.set_window_size(1750, 1250)

# Initialize searches 

## All Safety Categories - Level 3

In [2]:
#For Windows users - Opening the target url
url ='https://www.grainger.com/category/safety'
browser.visit(url)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#Extract all categories from the span tag
spans = soup.findAll('a', class_='route category-link') 
#List comprehension to extract the initial url from the span class
all_safety_category=[span['href'] for span in spans]
#List comprehension to build the end url
all_safety_category_links=[f'https://www.grainger.com/{link}' for link in all_safety_category]
all_safety_category_links

['https://www.grainger.com//category/safety/arc-flash-protection',
 'https://www.grainger.com//category/safety/asbestos-abatement',
 'https://www.grainger.com//category/safety/confined-space-equipment',
 'https://www.grainger.com//category/safety/disposable-and-chemical-resistant-clothing',
 'https://www.grainger.com//category/safety/emt-and-rescue-supplies',
 'https://www.grainger.com//category/safety/emergency-eye-wash-and-shower-equipment',
 'https://www.grainger.com//category/safety/emergency-preparedness',
 'https://www.grainger.com//category/safety/emergency-and-exam-room-supplies',
 'https://www.grainger.com//category/safety/ergonomics',
 'https://www.grainger.com//category/safety/exam-room-furnishings',
 'https://www.grainger.com//category/safety/eye-protection-and-accessories',
 'https://www.grainger.com//category/safety/face-protection',
 'https://www.grainger.com//category/safety/fall-protection',
 'https://www.grainger.com//category/safety/fire-fighting-clothing-and-accesso

## All Safety Categories - Level 4

In [3]:
#Create the empty list that will hold the urls
test_urls=[]
#For loop to iterate through all urls
for x in all_safety_category_links:
    #Opening the browser
    browser.visit(x)
    #setting a sleep time between iterations
    time.sleep(3)
    #activating soup
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    #finding the tags that contain the urls
    spans = soup.findAll('a', class_='route category-link')
    #Nested loop to iterate through the specific tags in each url
    for spa in spans:
        #setting an error handling method
        try: 
            #extracting the url of each category on each url
            all_safety_category=spa['href']
        #identifying the error
        except IndexError:
            #printing error message
            print('#-----------')
            print ('This is the end url')
            
        #appending to the empty list all urls found
        test_urls.append(all_safety_category)
#displaying the results
test_urls
#Only missing URL is https://www.grainger.com//category/safety/respiratory

['/category/safety/arc-flash-protection/arc-protection-blankets',
 '/category/safety/arc-flash-protection/electrical-safety-accessories',
 '/category/safety/arc-flash-protection/electrical-sticks-and-rescue-hooks',
 '/category/safety/arc-flash-protection/grounding-equipment',
 '/category/safety/arc-flash-protection/insulating-line-hoses',
 '/category/safety/arc-flash-protection/shock-protection-blankets',
 '/category/safety/asbestos-abatement/asbestos-disposal-bags',
 '/category/safety/asbestos-abatement/asbestos-encapsulants',
 '/category/safety/confined-space-equipment/air-powered-confined-space-fans-and-blowers',
 '/category/safety/confined-space-equipment/confined-space-carts',
 '/category/safety/confined-space-equipment/confined-space-entry-accessories',
 '/category/safety/confined-space-equipment/confined-space-entry-systems',
 '/category/safety/confined-space-equipment/confined-space-entry-winches',
 '/category/safety/confined-space-equipment/confined-space-fan-and-blower-access

In [5]:
#List comprehension to build the end url
all_urls_level_4=[f'https://www.grainger.com/{link}' for link in test_urls]
all_urls_level_4

['https://www.grainger.com//category/safety/arc-flash-protection/arc-protection-blankets',
 'https://www.grainger.com//category/safety/arc-flash-protection/electrical-safety-accessories',
 'https://www.grainger.com//category/safety/arc-flash-protection/electrical-sticks-and-rescue-hooks',
 'https://www.grainger.com//category/safety/arc-flash-protection/grounding-equipment',
 'https://www.grainger.com//category/safety/arc-flash-protection/insulating-line-hoses',
 'https://www.grainger.com//category/safety/arc-flash-protection/shock-protection-blankets',
 'https://www.grainger.com//category/safety/asbestos-abatement/asbestos-disposal-bags',
 'https://www.grainger.com//category/safety/asbestos-abatement/asbestos-encapsulants',
 'https://www.grainger.com//category/safety/confined-space-equipment/air-powered-confined-space-fans-and-blowers',
 'https://www.grainger.com//category/safety/confined-space-equipment/confined-space-carts',
 'https://www.grainger.com//category/safety/confined-space-

## All Safety Categories - Level 5

In [6]:
#Create the empty list that will hold the urls
urls_level_5=[]
#For loop to iterate through all urls
for x in all_urls_level_4:
    #Opening the browser
    browser.visit(x)
    #setting a sleep time between iterations
    time.sleep(3)
    #activating soup
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    #finding the tags that contain the urls
    spans = soup.findAll('a', class_='route category-link')
    #Nested loop to iterate through the specific tags in each url
    for spa in spans:
        #setting an error handling method
        try: 
            #extracting the url of each category on each url
            all_safety_category=spa['href']
        #identifying the error
        except IndexError:
            #printing error message
            print('#-----------')
            print ('This is the end url')
            
        #appending to the empty list all urls found
        urls_level_5.append(all_safety_category)
#displaying the results
urls_level_5

['/category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightstick-accessories',
 '/category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightstick-station-accessories',
 '/category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightstick-stations',
 '/category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightsticks',
 '/category/safety/eye-protection-and-accessories/eye-protection-accessories/disposable-eye-shields',
 '/category/safety/eye-protection-and-accessories/eye-protection-accessories/eyewear-dispensers',
 '/category/safety/eye-protection-and-accessories/eye-protection-accessories/eyewear-germicidal-cabinets',
 '/category/safety/eye-protection-and-accessories/eye-protection-accessories/eyewear-retainers',
 '/category/safety/eye-protection-and-accessories/eye-protection-accessories/lens-cleaning-station-solution-refills',
 '/category/safety/eye-p

In [7]:
#List comprehension to build the end url
all_urls_level_5=[f'https://www.grainger.com/{link}' for link in urls_level_5]
all_urls_level_5

['https://www.grainger.com//category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightstick-accessories',
 'https://www.grainger.com//category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightstick-station-accessories',
 'https://www.grainger.com//category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightstick-stations',
 'https://www.grainger.com//category/safety/emergency-preparedness/emergency-lightsticks-and-accessories/emergency-lightsticks',
 'https://www.grainger.com//category/safety/eye-protection-and-accessories/eye-protection-accessories/disposable-eye-shields',
 'https://www.grainger.com//category/safety/eye-protection-and-accessories/eye-protection-accessories/eyewear-dispensers',
 'https://www.grainger.com//category/safety/eye-protection-and-accessories/eye-protection-accessories/eyewear-germicidal-cabinets',
 'https://www.grainger.com//category/safety/eye-protection-and-a

In [9]:
df = pd.DataFrame(all_urls_level_4, columns=["colummn"])
df.to_csv('all_urls_level_4.csv', index=False)

In [8]:
df = pd.DataFrame(all_urls_level_5, columns=["colummn"])
df.to_csv('all_urls_level_5.csv', index=False)

In [None]:
#Create dataframe for antistatic gloves
df_antistatic_gloves = pd.DataFrame(
    {'SKU': sku_list,
     'Definition': definition_list,
     'Product Description':product_text,
     'Manufacturer Model':mfr_model_list,
     'Brand': brand_list,
     'Category':cat_link_list,
     'Category Group':cat_group_list,
     'UNSPSC':unspsc_list,
     'Price':price_list,
     'Price Type':web_price_list,
     'Country of Origin':country_of_origin,
     'UOM':uom_list,
     'Compliance and Restrictions':compliance_list,
     'Link':item_link_list,
     'Image Link':image_list
    })
df_antistatic_gloves.to_csv('antistatic_gloves.csv', encoding='utf-8')
df_antistatic_gloves

Testing blocks

In [None]:
url3 ='https://www.grainger.com//category/safety/rehabilitation-and-durable-medical-equipment'
browser.visit(url3)

#activating soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
#For Windows users - Opening the target url
spans = soup.findAll('a', class_='route category-link')
cats=[]
for spa in spans:
    cat=spa['href']
    cats.append(cat)
cats

Testing blocks