# Building a dataset of Benjamin Moore paint colors
Scraping the paint color names and color codes from each of Benjamin Moore's [nine color family webpages](https://www.benjaminmoore.com/en-us/paint-colors/color-families).

The information I want appears inside the two classes `colorName` and `colorCode` for every tile on each page. But each color family webpage has pagination, with 49 tiles per page, and requires clicking through to get the next set of 49 tiles. 

In [23]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs

In [21]:
url = "https://www.benjaminmoore.com/en-us/paint-colors/red"
html = requests.get(url).text
soup = bs(html)

In [22]:
paints = soup.select("[class='colorInfoLink']")
paints[0]

<div class="colorInfoLink"><a class="cdpLink" href="/en-us/paint-colors/color/2074-70/easter-bonnet"><p class="colorName" data-testid="ColorTile_ColorName">Easter Bonnet</p><p class="colorCode">2074-70</p></a></div>

In [7]:
len(paints)

49

In [8]:
paints[48]

<div class="colorInfoLink"><a class="cdpLink" href="/en-us/paint-colors/color/2076-10/crushed-velvet"><p class="colorName" data-testid="ColorTile_ColorName">Crushed Velvet</p><p class="colorCode">2076-10</p></a></div>

In [9]:
for paint in paints:
    name = paint.select(".colorName")[0]
    print(name.text.strip())

Easter Bonnet
Bunny Nose Pink
Exotic Fuchsia
Lilac Pink
Twilight Magenta
Summer Plum
Elderberry Wine
Charming Pink
Passion Pink
Pink Taffy
Pink Raspberry
Pre-Dawn Sky
Mulberry
Dark Burgundy
Rosemist
Orleans Violet
Cranberry Ice
Melrose Pink
Vintage Claret
Bottle of Bordéaux
Radicchio
Baby Dreams
Misty Rose
Countryside Pink
Fashion Rose
Old Claret
Cascabel Chile
Bewitched
Primrose Petals
Sweet Naivete
Pink Pansy
Begonia
Berry Fizz
Cranberry Cocktail
Raisin Torte
I Love You Pink
Valentine's Day
Pretty Pink
Spring Azalea
Hot Lips
Gypsy Pink
Magenta
Nursery Pink
Dog's Ear
Easter Pink
Raspberry Mousse
Crushed Berries
Royal Flush
Crushed Velvet


In [10]:
for paint in paints:
    code = paint.select(".colorCode")[0]
    print(code.text.strip())

2074-70
2074-60
2074-50
2074-40
2074-30
2074-20
CSP-470
2075-70
2075-60
2075-50
2075-40
2075-30
2075-20
2075-10
1366
1374
1362
1363
1364
1357
CC-32
2083-70
1360
1361
1356
2083-30
CSP-445
CSP-450
1367
2083-60
2083-50
2083-40
CSP-440
2083-20
2083-10
2077-70
2077-60
2077-50
2077-40
2077-30
2077-20
2077-10
2076-70
2076-60
2076-50
2076-40
2076-30
2076-20
2076-10


In [11]:
paints_df = pd.DataFrame([{
    "color_name": paint.select(".colorName")[0].text.strip(),
    "color_code": paint.select(".colorCode")[0].text.strip()
} for paint in paints])

paints_df

Unnamed: 0,color_name,color_code
0,Easter Bonnet,2074-70
1,Bunny Nose Pink,2074-60
2,Exotic Fuchsia,2074-50
3,Lilac Pink,2074-40
4,Twilight Magenta,2074-30
5,Summer Plum,2074-20
6,Elderberry Wine,CSP-470
7,Charming Pink,2075-70
8,Passion Pink,2075-60
9,Pink Taffy,2075-50


# Browser automation and pagination

In [25]:
import os
import random
import time
import pandas as pd

from playwright.async_api import async_playwright, expect
import asyncio

In [30]:
# get red colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/red'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 14:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <13:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5
parsing page number 6
parsing page number 7
parsing page number 8
parsing page number 9
parsing page number 10
parsing page number 11
parsing page number 12
parsing page number 13


In [47]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

602
602
Travers Red


In [61]:
# making a red datafram
red_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
red_colors

Unnamed: 0,color_name,color_code
0,White Opulence,OC-69
1,Alabaster,OC-129
2,Gardenia,AF-10
3,Pink Damask,OC-72
4,Sand Dollar,OC-71
...,...,...
167,White Diamond,OC-61
168,Harwood Putty,CW-5
169,Capitol White,CW-10
170,Parish White,CW-15


In [62]:
# adding color_fam column
red_colors['color_fam'] = 'red'
red_colors

Unnamed: 0,color_name,color_code,color_fam
0,White Opulence,OC-69,red
1,Alabaster,OC-129,red
2,Gardenia,AF-10,red
3,Pink Damask,OC-72,red
4,Sand Dollar,OC-71,red
...,...,...,...
167,White Diamond,OC-61,red
168,Harwood Putty,CW-5,red
169,Capitol White,CW-10,red
170,Parish White,CW-15,red


In [68]:
# get orange colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/orange'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 12:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <11:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5
parsing page number 6
parsing page number 7
parsing page number 8
parsing page number 9
parsing page number 10
parsing page number 11


In [69]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

518
518
Tucker Orange


In [70]:
orange_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
orange_colors

Unnamed: 0,color_name,color_code
0,Peach Parfait,2175-70
1,Soft Shell,015
2,Pale Pink Satin,008
3,Bermuda Pink,016
4,Phoenix Sand,017
...,...,...
513,Tavern Ochre,CW-375
514,Coffeehouse Ochre,CW-385
515,English Ochre,CW-290
516,Hale Orange,CW-295


In [71]:
orange_colors['color_fam'] = 'orange'
orange_colors

Unnamed: 0,color_name,color_code,color_fam
0,Peach Parfait,2175-70,orange
1,Soft Shell,015,orange
2,Pale Pink Satin,008,orange
3,Bermuda Pink,016,orange
4,Phoenix Sand,017,orange
...,...,...,...
513,Tavern Ochre,CW-375,orange
514,Coffeehouse Ochre,CW-385,orange
515,English Ochre,CW-290,orange
516,Hale Orange,CW-295,orange


In [73]:
# get yellow colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/yellow'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 6:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <5:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5


In [74]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

245
245
Yellow Oxide


In [75]:
yellow_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
yellow_colors

Unnamed: 0,color_name,color_code
0,Windham Cream,HC-6
1,Honeywheat,179
2,Country Comfort,305
3,Nacho Cheese,2018-40
4,Oxford Gold,315
...,...,...
240,Barley,199
241,Concord Ivory,HC-12
242,Stuart Gold,HC-10
243,Spicy Mustard,2154-20


In [76]:
yellow_colors['color_fam'] = 'yellow'
yellow_colors

Unnamed: 0,color_name,color_code,color_fam
0,Windham Cream,HC-6,yellow
1,Honeywheat,179,yellow
2,Country Comfort,305,yellow
3,Nacho Cheese,2018-40,yellow
4,Oxford Gold,315,yellow
...,...,...,...
240,Barley,199,yellow
241,Concord Ivory,HC-12,yellow
242,Stuart Gold,HC-10,yellow
243,Spicy Mustard,2154-20,yellow


In [78]:
# get green colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/green'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 14:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <13:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5
parsing page number 6
parsing page number 7
parsing page number 8
parsing page number 9
parsing page number 10
parsing page number 11
parsing page number 12
parsing page number 13


In [79]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

619
619
Greenhow Moss


In [80]:
green_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
green_colors

Unnamed: 0,color_name,color_code
0,Jasper Opal,387
1,City Scape Morning,368
2,Mulholland Yellow,369
3,Golden Delicious,390
4,Yellow Tone,370
...,...,...
614,Palmer Green,CW-475
615,Windsor Green,CW-505
616,Green Earth,CW-455
617,Green Umber,CW-460


In [81]:
green_colors['color_fam'] = 'green'
green_colors

Unnamed: 0,color_name,color_code,color_fam
0,Jasper Opal,387,green
1,City Scape Morning,368,green
2,Mulholland Yellow,369,green
3,Golden Delicious,390,green
4,Yellow Tone,370,green
...,...,...,...
614,Palmer Green,CW-475,green
615,Windsor Green,CW-505,green
616,Green Earth,CW-455,green
617,Green Umber,CW-460,green


In [82]:
# get blue colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/blue'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 10:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <9:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5
parsing page number 6
parsing page number 7
parsing page number 8
parsing page number 9


In [83]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

409
409
Everard Blue


In [84]:
blue_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
blue_colors

Unnamed: 0,color_name,color_code
0,White Rain,708
1,Palladian Blue,HC-144
2,Bali,702
3,Wythe Blue,HC-143
4,Stratton Blue,HC-142
...,...,...
404,Williamsburg Wythe Blue,CW-590
405,Azurite,CW-670
406,Anderson Blue,CW-565
407,Mayo Teal,CW-570


In [85]:
blue_colors['color_fam'] = 'blue'
blue_colors

Unnamed: 0,color_name,color_code,color_fam
0,White Rain,708,blue
1,Palladian Blue,HC-144,blue
2,Bali,702,blue
3,Wythe Blue,HC-143,blue
4,Stratton Blue,HC-142,blue
...,...,...,...
404,Williamsburg Wythe Blue,CW-590,blue
405,Azurite,CW-670,blue
406,Anderson Blue,CW-565,blue
407,Mayo Teal,CW-570,blue


In [86]:
# get purple colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/purple'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 6:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <5:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5


In [87]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

233
233
Carter Plum


In [88]:
purple_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
purple_colors

Unnamed: 0,color_name,color_code
0,Windmill Wings,2067-60
1,Summer Blue,2067-50
2,Blue Lapis,2067-40
3,Watertown,818
4,Twilight Blue,2067-30
...,...,...
228,Mink Violet,1252
229,Pinch of Spice,1449
230,Fresco Urbain,1253
231,Barrett Brick,CW-350


In [89]:
purple_colors['color_fam'] = 'purple'
purple_colors

Unnamed: 0,color_name,color_code,color_fam
0,Windmill Wings,2067-60,purple
1,Summer Blue,2067-50,purple
2,Blue Lapis,2067-40,purple
3,Watertown,818,purple
4,Twilight Blue,2067-30,purple
...,...,...,...
228,Mink Violet,1252,purple
229,Pinch of Spice,1449,purple
230,Fresco Urbain,1253,purple
231,Barrett Brick,CW-350,purple


In [90]:
# get gray colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/gray'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 10:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <9:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5
parsing page number 6
parsing page number 7
parsing page number 8
parsing page number 9


In [91]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

425
425
Tavern Charcoal


In [92]:
gray_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
gray_colors

Unnamed: 0,color_name,color_code
0,White Winged Dove,1457
1,Angelica,AF-665
2,Lilac Hush,CSP-490
3,Gotham,CSP-385
4,Coachman's Cape®,CSP-90
...,...,...
420,Bruton White,CW-710
421,Bone Black,CW-715
422,Geddy Gray,CW-720
423,Randolph Gray,CW-85


In [93]:
gray_colors['color_fam'] = 'gray'
gray_colors

Unnamed: 0,color_name,color_code,color_fam
0,White Winged Dove,1457,gray
1,Angelica,AF-665,gray
2,Lilac Hush,CSP-490,gray
3,Gotham,CSP-385,gray
4,Coachman's Cape®,CSP-90,gray
...,...,...,...
420,Bruton White,CW-710,gray
421,Bone Black,CW-715,gray
422,Geddy Gray,CW-720,gray
423,Randolph Gray,CW-85,gray


In [94]:
# get neutral colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/neutral'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 18:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <17:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4
parsing page number 5
parsing page number 6
parsing page number 7
parsing page number 8
parsing page number 9
parsing page number 10
parsing page number 11
parsing page number 12
parsing page number 13
parsing page number 14
parsing page number 15
parsing page number 16
parsing page number 17


In [95]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

796
796
Raleigh Sorrel


In [96]:
neutral_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
neutral_colors

Unnamed: 0,color_name,color_code
0,White Winged Dove,1457
1,Angelica,AF-665
2,Lilac Hush,CSP-490
3,Gotham,CSP-385
4,Coachman's Cape®,CSP-90
...,...,...
791,Franklin White,CW-200
792,Randolph Bisque,CW-185
793,Raleigh Tan,CW-190
794,Chowning's Tan,CW-195


In [97]:
neutral_colors['color_fam'] = 'neutral'
neutral_colors

Unnamed: 0,color_name,color_code,color_fam
0,White Winged Dove,1457,neutral
1,Angelica,AF-665,neutral
2,Lilac Hush,CSP-490,neutral
3,Gotham,CSP-385,neutral
4,Coachman's Cape®,CSP-90,neutral
...,...,...,...
791,Franklin White,CW-200,neutral
792,Randolph Bisque,CW-185,neutral
793,Raleigh Tan,CW-190,neutral
794,Chowning's Tan,CW-195,neutral


In [98]:
# get white colors
# NOTE: if it has a lot of pages, a popup will appear, must manually click "Decline offer."
url = 'https://www.benjaminmoore.com/en-us/paint-colors/white'

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)  # Change to headless=True to run in the background
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(url)
    
    page.wait_for_load_state('domcontentloaded')
    color_names = []
    color_codes = []
    # This is the "next page" button
    next_page_locator = "//ul[@class='Paginationstyles__List-sc-vsxlli-1 fHZOVO']/li[@class='Paginationstyles__ListItem-sc-vsxlli-2 Paginationstyles__Arrow-sc-vsxlli-4 hKHaro kVaZpH']/button[@aria-label='Go to next page']"
    page_num = 1
    
    while page_num < 5:
        print(f'parsing page number {page_num}')
        page.locator("text=Decline Offer").click()
        await page.is_visible('tbody')
        html = await page.inner_html('div.ColorByFamilyDetailstyles__ColorByFamilyDetailWrapper-sc-zolz9i-0.fegvrJ.colorByFamilyDetail') 
        soup = bs(html, 'html.parser')
        container = soup.select('.gridContainer')[0]
        colors = container.find_all(class_='colorInfoContainer')
        # Parse here and append it to the list
        for color in colors:
           color_names.append(color.select(".colorName")[0].text.strip())
           color_codes.append(color.select(".colorCode")[0].text.strip()) 
        # Click next page
        if page_num <4:
           await page.click(next_page_locator)
        page_num+=1
        time.sleep(1)

  page.wait_for_load_state('domcontentloaded')
  page.locator("text=Decline Offer").click()


parsing page number 1
parsing page number 2
parsing page number 3
parsing page number 4


In [99]:
print(len(color_names))
print(len(color_codes))
print(color_names[-1])

172
172
Geddy White


In [100]:
white_colors = pd.DataFrame(zip(color_names, color_codes), columns=['color_name','color_code'])
white_colors

Unnamed: 0,color_name,color_code
0,White Opulence,OC-69
1,Alabaster,OC-129
2,Gardenia,AF-10
3,Pink Damask,OC-72
4,Sand Dollar,OC-71
...,...,...
167,White Diamond,OC-61
168,Harwood Putty,CW-5
169,Capitol White,CW-10
170,Parish White,CW-15


In [101]:
white_colors['color_fam'] = 'white'
white_colors

Unnamed: 0,color_name,color_code,color_fam
0,White Opulence,OC-69,white
1,Alabaster,OC-129,white
2,Gardenia,AF-10,white
3,Pink Damask,OC-72,white
4,Sand Dollar,OC-71,white
...,...,...,...
167,White Diamond,OC-61,white
168,Harwood Putty,CW-5,white
169,Capitol White,CW-10,white
170,Parish White,CW-15,white


In [104]:
colors_df = pd.concat([red_colors, 
                        orange_colors, 
                        yellow_colors, 
                        green_colors, 
                        blue_colors, 
                        purple_colors, 
                        gray_colors, 
                        neutral_colors, 
                        white_colors,], ignore_index=True, sort=False)
colors_df

Unnamed: 0,color_name,color_code,color_fam
0,White Opulence,OC-69,red
1,Alabaster,OC-129,red
2,Gardenia,AF-10,red
3,Pink Damask,OC-72,red
4,Sand Dollar,OC-71,red
...,...,...,...
3584,White Diamond,OC-61,white
3585,Harwood Putty,CW-5,white
3586,Capitol White,CW-10,white
3587,Parish White,CW-15,white


In [105]:
colors_df['color_name'].nunique()

2796

In [106]:
color_names = colors_df['color_name'].drop_duplicates()
color_names

0        White Opulence
1             Alabaster
2              Gardenia
3           Pink Damask
4           Sand Dollar
             ...       
3412     Franklin White
3413    Randolph Bisque
3414        Raleigh Tan
3415     Chowning's Tan
3416     Raleigh Sorrel
Name: color_name, Length: 2796, dtype: object

In [107]:
names_df = pd.DataFrame(color_names).reset_index()
names_df

Unnamed: 0,index,color_name
0,0,White Opulence
1,1,Alabaster
2,2,Gardenia
3,3,Pink Damask
4,4,Sand Dollar
...,...,...
2791,3412,Franklin White
2792,3413,Randolph Bisque
2793,3414,Raleigh Tan
2794,3415,Chowning's Tan


In [108]:
names_df = names_df.drop(['index'], axis=1)
names_df

Unnamed: 0,color_name
0,White Opulence
1,Alabaster
2,Gardenia
3,Pink Damask
4,Sand Dollar
...,...
2791,Franklin White
2792,Randolph Bisque
2793,Raleigh Tan
2794,Chowning's Tan


In [109]:
names_df.to_csv('all_colors.csv', index = False)

In [110]:
colors_df.to_csv('family_pages.csv', index = False)