# Scraping Links

In [1]:
# Scrape product links from saatva.com

import requests
from bs4 import BeautifulSoup
import re

product_links = {}

for product_type in ['mattresses', 'furniture', 'bedding']:

    url = f"https://www.saatva.com/{product_type}"
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    product_type_links = ["https://www.saatva.com" + link['href'] for link in soup.find_all('a', href=re.compile(f'.*/{product_type}/.*'))]

    product_links[product_type] = [i for i in set(product_type_links)]

product_links

{'mattresses': ['https://www.saatva.com/mattresses/saatva-classic',
  'https://www.saatva.com/mattresses/saatva-hd',
  'https://www.saatva.com/mattresses/saatva-rx',
  'https://www.saatva.com/mattresses/saatva-youth',
  'https://www.saatva.com/mattresses/zenhaven',
  'https://www.saatva.com/mattresses/dog-bed',
  'https://www.saatva.com/mattresses/solaire',
  'https://www.saatva.com/mattresses/loom-and-leaf',
  'https://www.saatva.com/mattresses/memory-foam-hybrid',
  'https://www.saatva.com/mattresses/crib-mattress',
  'https://www.saatva.com/mattresses/saatva-latex-hybrid',
  'https://www.saatva.com/mattresses/compare'],
 'furniture': ['https://www.saatva.com/furniture/kanan-bedroom-rug',
  'https://www.saatva.com/furniture/navi-bedroom-rug',
  'https://www.saatva.com/furniture/adamas-bedroom-rug',
  'https://www.saatva.com/furniture/adjustable-base',
  'https://www.saatva.com/furniture/foundation',
  'https://www.saatva.com/furniture/santorini',
  'https://www.saatva.com/furniture/h

# Get basic product info using selenium

some bs I need to run for selenium to work in colab

In [2]:
%%shell
# Ubuntu no longer distributes chromium-browser outside of snap
#
# Proposed solution: https://askubuntu.com/questions/1204571/how-to-install-chromium-without-snap

# Add debian buster
cat > /etc/apt/sources.list.d/debian.list <<'EOF'
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF

# Add keys
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A

apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg

# Prefer debian repo for chromium* packages only
# Note the double-blank lines between entries
cat > /etc/apt/preferences.d/chromium.pref << 'EOF'
Package: *
Pin: release a=eoan
Pin-Priority: 500


Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300


Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF

# Install chromium and chromium-driver
apt-get update
apt-get install chromium chromium-driver

# Install selenium
pip install selenium

Executing: /tmp/apt-key-gpghome.RpWDJlE00t/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
gpg: key DCC9EFBF77E11517: public key "Debian Stable Release Key (10/buster) <debian-release@lists.debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.7mquEc6vr2/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
gpg: key DC30D7C23CBBABEE: public key "Debian Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.GvNkfOZEcF/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
gpg: key 4DFAB270CAA96DFA: public key "Debian Security Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Get:1 http://deb.debian.org/debian buster InRelease [122 kB]
Get:2 http://deb.debian.org/debian bust



In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

service = Service(executable_path=r'/usr/bin/chromedriver')
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(service=service, options=options)

In [4]:
all_products_dict = {}

In [5]:
# 19 mins
import time
import numpy as np
bad_links = []

# iterate over the types of products
for product_type in ['mattresses', 'furniture', 'bedding']:

    # iterate over the links for each type of product
    for link in product_links[product_type]:

        if 'compare' in link:
            continue

        # skip the link if it's already in the dictionary
        if link.replace('https://www.saatva.com', '') in all_products_dict:
            print(f'ALREADY DONE: {link}')
            continue

        # load the page of the current and check if it loaded correctly
        print(f'LOADING: {link}')
        driver.get(link)
        soup = BeautifulSoup(driver.page_source, "html.parser")
        try:
            title = soup.find_all('div', class_='productPanel__title--container')[0]
        except IndexError:
            print(f'FAILED: {link}')
            bad_links.append(link)
            continue

        # create product dictionary from data on website
        product_dict = {}

        product_dict['link'] = link
        product_dict['product_name'] = title.find_all('h1')[0].text
        product_dict['product_desc'] = title.find_all('p')[0].text
        product_dict['product_type'] = product_type

        # get rating and num reviews if possible
        try:
            product_dict['product_rating'] = float(title.find_all('span')[0].text.split()[0])
            product_dict['num_reviews'] = int(title.find_all('a')[0].text.split()[0])
        except ValueError:
            product_dict['product_rating'] = np.nan
            product_dict['num_reviews'] = 0

        finally:
            # driver.quit()
            all_products_dict[link.replace('https://www.saatva.com', '')] = product_dict
            # time.sleep(5)
        print(product_dict)

all_products_dict
driver.quit()

LOADING: https://www.saatva.com/mattresses/crib-mattress
{'link': 'https://www.saatva.com/mattresses/crib-mattress', 'product_name': 'Crib Mattress', 'product_desc': 'Our dual-sided nontoxic crib mattress for little ones', 'product_type': 'mattresses', 'product_rating': 4.8, 'num_reviews': 4}
LOADING: https://www.saatva.com/mattresses/saatva-youth
{'link': 'https://www.saatva.com/mattresses/saatva-youth', 'product_name': 'Saatva Youth Mattress', 'product_desc': 'Our versatile innerspring youth mattress designed for growing kids', 'product_type': 'mattresses', 'product_rating': 4.9, 'num_reviews': 18}
LOADING: https://www.saatva.com/mattresses/saatva-hd
{'link': 'https://www.saatva.com/mattresses/saatva-hd', 'product_name': 'Saatva HD Mattress', 'product_desc': 'The first luxury hybrid innerspring for bigger bodies', 'product_type': 'mattresses', 'product_rating': 4.9, 'num_reviews': 119}
LOADING: https://www.saatva.com/mattresses/saatva-classic
{'link': 'https://www.saatva.com/mattress

# Scrape price and size information using selenium

Mattress Additional Info

In [6]:
driver = webdriver.Chrome(service=service, options=options)
# iterate over different sizes and firmesses of mattresses
for size in ['king', 'queen', 'full', 'twin', 'crib', 'split+king','cal+king', 'split+cal+king', 'twin+xl']:

    for firmness in ['plush+soft', 'medium+firm', 'firm']:
        print(size, firmness)

        # use saatva search tool to get all mattresses that fit size and firmness parameters
        link = f'https://www.saatva.com/mattresses?sizes={size}&firmnesses={firmness}'
        driver.get(link)
        soup = BeautifulSoup(driver.page_source, "html.parser")
        relevant_beds = soup.find_all('div', class_="col col--xs-12 col--sm-8 col--md-6 col--lg-4 u-flexAlign--center u-marginBottom--2xl")

        # get strikethrough price and actual price (price with sale) from each bed
        # multi-price represents that the wbsite gave a price range, we will have to go back later to fix this
        for bed in relevant_beds:
            link = bed.find_all('a')[0]['href']
            price = bed.find_all('div', class_="detailProductTile__price")[0].text.replace("$", "").replace(",", "")
            multi_price = False
            try:
                strikethrough_price = bed.find_all('span', class_="detailProductTile__strikeThrough")[0].text.replace("$", "").replace(",", "")
                price = price[:len(price) - len(strikethrough_price)]
            except IndexError:
                strikethrough_price = price

            try:
                price, strikethrough_price = int(price), int(strikethrough_price)
            except ValueError:
                price = price[-4:]
                strikethrough_price = strikethrough_price[-4:]
                price, strikethrough_price = int(price), int(strikethrough_price)
                multi_price = True

            print(link, price)

            all_products_dict[link][f'{size}_{firmness}'] = price, multi_price,

driver.quit()
all_products_dict

king plush+soft
/mattresses/saatva-classic 2195
/mattresses/solaire 3995
king medium+firm
/mattresses/saatva-classic 2195
/mattresses/loom-and-leaf 2595
/mattresses/saatva-latex-hybrid 2245
/mattresses/solaire 3995
/mattresses/memory-foam-hybrid 1895
/mattresses/zenhaven 3545
/mattresses/saatva-hd 3545
/mattresses/saatva-rx 3395
king firm
/mattresses/saatva-classic 2195
/mattresses/loom-and-leaf 2595
/mattresses/solaire 3995
/mattresses/zenhaven 3545
/mattresses/saatva-hd 3545
queen plush+soft
/mattresses/saatva-classic 1695
/mattresses/solaire 3345
queen medium+firm
/mattresses/saatva-classic 1695
/mattresses/loom-and-leaf 2095
/mattresses/saatva-latex-hybrid 1895
/mattresses/solaire 3345
/mattresses/memory-foam-hybrid 1495
/mattresses/zenhaven 2945
/mattresses/saatva-hd 2945
/mattresses/saatva-rx 2945
queen firm
/mattresses/saatva-classic 1695
/mattresses/loom-and-leaf 2095
/mattresses/solaire 3345
/mattresses/zenhaven 2945
/mattresses/saatva-hd 2945
full plush+soft
/mattresses/saatv

{'/mattresses/crib-mattress': {'link': 'https://www.saatva.com/mattresses/crib-mattress',
  'product_name': 'Crib Mattress',
  'product_desc': 'Our dual-sided nontoxic crib mattress for little ones',
  'product_type': 'mattresses',
  'product_rating': 4.8,
  'num_reviews': 4,
  'crib_firm': (355, False)},
 '/mattresses/saatva-youth': {'link': 'https://www.saatva.com/mattresses/saatva-youth',
  'product_name': 'Saatva Youth Mattress',
  'product_desc': 'Our versatile innerspring youth mattress designed for growing kids',
  'product_type': 'mattresses',
  'product_rating': 4.9,
  'num_reviews': 18,
  'full_plush+soft': (855, False),
  'full_medium+firm': (855, False),
  'twin_plush+soft': (795, False),
  'twin_medium+firm': (795, False),
  'twin+xl_plush+soft': (935, True),
  'twin+xl_medium+firm': (935, True)},
 '/mattresses/saatva-hd': {'link': 'https://www.saatva.com/mattresses/saatva-hd',
  'product_name': 'Saatva HD Mattress',
  'product_desc': 'The first luxury hybrid innerspring f

Furniture additional info

In [8]:
size_dict = {'platform+bed+frames': ['king', 'queen', 'cal+king'],
             'standard+bed+frames': ['king', 'queen', 'cal+king'],
             'adjustable+bases': ['king', 'queen', 'full', 'twin', 'split+king','cal+king', 'split+cal+king', 'twin+xl'],
             'mattress+foundation': ['king', 'queen', 'full', 'twin', 'split+king','cal+king', 'split+cal+king', 'twin+xl'],
             'benches+ottomans': [None],
             'bedroom+rugs': [None],
             'chairs': [None]
             }

material_dict = {'platform+bed+frames': ['linen', 'velvet', 'leather'],
             'standard+bed+frames': ['linen', 'velvet'],
             'adjustable+bases': [None],
             'mattress+foundation': [None],
             'benches+ottomans': ['linen', 'velvet'],
             'bedroom+rugs': [None],
             'chairs': ['linen', 'velvet']
             }

# iterate over different types of furniture, sizes of furniture, and materials of furniture
for type_ in ['platform+bed+frames', 'adjustable+bases', 'benches+ottomans', 'standard+bed+frames', 'mattress+foundation', 'chairs', 'bedroom+rugs']:
    # each furniture type only has certain types of sizes and materials, so selectively iterate over those
    # if we did not do that, queries that do not fit any products return all products of that type instead of none
    for size in size_dict[type_]:
        for material in material_dict[type_]:

            # use saatva search tool to get all furniture that fit parameters
            driver = webdriver.Chrome(service=service, options=options)
            print(type_, size, material)
            link = f'https://www.saatva.com/furniture?sizes={size}&types={type_}&materials={material}'
            driver.get(link)
            soup = BeautifulSoup(driver.page_source, "html.parser")
            relevant_beds = soup.find_all('div', class_="col col--xs-12 col--sm-8 col--md-6 col--lg-4 u-flexAlign--center u-marginBottom--2xl")

            # get strikethrough price and actual price (price with sale) from each
            # multi-price represents that the wbsite gave a price range, we will have to go back later to fix this
            # ignore naming, this is just copied from the mattresses section and I was too lazy to change all the names
            for bed in relevant_beds:
                link = bed.find_all('a')[0]['href']
                price = bed.find_all('div', class_="detailProductTile__price")[0].text.replace("$", "").replace(",", "")
                multi_price = False
                try:
                    strikethrough_price = bed.find_all('span', class_="detailProductTile__strikeThrough")[0].text.replace("$", "").replace(",", "")
                    price = price[:len(price) - len(strikethrough_price)]
                except IndexError:
                    strikethrough_price = price

                try:
                    price, strikethrough_price = int(price), int(strikethrough_price)
                except ValueError:
                    price = price[-4:]
                    strikethrough_price = strikethrough_price[-4:]
                    price, strikethrough_price = int(price), int(strikethrough_price)
                    multi_price = True

                print(link, price)

                all_products_dict[link][f'{type_}_{size}_{material}'] = price, multi_price,

            driver.quit()
            time.sleep(5)
    all_products_dict

platform+bed+frames king linen
/furniture/santorini 1395
/furniture/valencia 1595
/furniture/porto 1595
/furniture/halle 1695
/furniture/copenhagen 1395
/furniture/amalfi-platform-bed 995
/furniture/lucerne 1895
/furniture/sydney 1595
/furniture/santorini-platform-bed 995
/furniture/rhodes 1395
/furniture/halle-storage-platform-bed 1396
/furniture/cassis 2095
platform+bed+frames king velvet
/furniture/santorini 1395
/furniture/valencia 1595
/furniture/porto 1595
/furniture/halle 1695
/furniture/copenhagen 1395
/furniture/amalfi-platform-bed 995
/furniture/lucerne 1895
/furniture/sydney 1595
/furniture/santorini-platform-bed 995
/furniture/rhodes 1395
/furniture/halle-with-storage 2095
/furniture/halle-storage-platform-bed 1396
/furniture/cassis 2095
platform+bed+frames king leather
/furniture/siena-leather 2745
platform+bed+frames queen linen
/furniture/santorini 1295
/furniture/valencia 1395
/furniture/porto 1395
/furniture/halle 1495
/furniture/copenhagen 1295
/furniture/amalfi-platf

bedding additional info

In [10]:
size_dict = {'sheets+type': ['king', 'queen', 'full', 'twin', 'split+king','cal+king', 'split+cal+king', 'twin+xl'],
             'duvet+covers': ['king', 'queen', 'full', 'twin', 'split+king','cal+king', 'split+cal+king', 'twin+xl'],
             'blankets+and+quilts': ['king', 'queen', 'full', 'twin', 'cal+king', 'split+cal+king', 'twin+xl'],
             'towels': [None],
             'pillows+type': ['king', 'queen'],
             'comforters': ['king', 'queen', 'full', 'twin', 'cal+king', 'split+cal+king', 'twin+xl'],
             'mattress+pad+and+toppers': ['king', 'queen', 'full', 'twin', 'split+king','cal+king', 'split+cal+king', 'twin+xl'],
             'eye+masks': [None]
             }

material_dict = {'sheets+type': ['sateen', 'linen', 'percale', 'organic+cotton', 'flannel'],
             'duvet+covers': ['sateen', 'linen', 'percale', 'organic+cotton'],
             'blankets+and+quilts':  ['sateen', 'velvet', 'percale', 'flannel'],
             'towels': [None],
             'pillows+type': ['memory+foam+material', 'natural+latex', 'down+alternative', 'organic+cotton'],
             'comforters': ['percale', 'down+alternative', 'organic+cotton'],
             'mattress+pad+and+toppers': ['memory+foam+material', 'natural+latex', 'organic+cotton'],
             'eye+masks': [None]
             }

# iterate over different types of bedding, sizes of bedding, and materials of bedding
for type_ in ['sheets+type', 'duvet+covers', 'blankets+and+quilts', 'pillows+type', 'comforters', 'mattress+pad+and+toppers', 'eye+masks', 'towels']:
    # initialize driver here because bedding searches are faster and would sometimes crash the driver
    driver = webdriver.Chrome(service=service, options=options)

    # each furniture type only has certain types of sizes and materials, so selectively iterate over those
    # if we did not do that, queries that do not fit any products return all products of that type instead of none
    for size in size_dict[type_]:
        for material in material_dict[type_]:

            # use saatva search tool to get all bedding that fit parameters
            print(type_, size, material)
            link = f'https://www.saatva.com/bedding?types={type_}&sizes={size}&materials={material}'
            driver.get(link)
            soup = BeautifulSoup(driver.page_source, "html.parser")
            relevant_beds = soup.find_all('div', class_="col col--xs-12 col--sm-8 col--md-6 col--lg-4 u-flexAlign--center u-marginBottom--2xl")

            # get strikethrough price and actual price (price with sale) from each
            # multi-price represents that the wbsite gave a price range, we will have to go back later to fix this
            # ignore naming, this is just copied from the mattresses section and I was too lazy to change all the names
            for bed in relevant_beds:
                link = bed.find_all('a')[0]['href']
                price = bed.find_all('div', class_="detailProductTile__price")[0].text.replace("$", "").replace(",", "")
                multi_price = False
                try:
                    strikethrough_price = bed.find_all('span', class_="detailProductTile__strikeThrough")[0].text.replace("$", "").replace(",", "")
                    price = price[:len(price) - len(strikethrough_price)]
                except IndexError:
                    strikethrough_price = price

                try:
                    price, strikethrough_price = int(price), int(strikethrough_price)
                except ValueError:
                    price = price[-4:] if not (type_ == 'pillows+type' and size == 'queen') else price[:4]
                    strikethrough_price = strikethrough_price[-4:] if not (type_ == 'pillows+type' and size == 'queen') else strikethrough_price[:4]
                    price, strikethrough_price = int(price), int(strikethrough_price)
                    multi_price = True

                print(link, price, multi_price)

                try:
                    all_products_dict[link][f'{type_}_{size}_{material}'] = price, multi_price,
                except KeyError:
                    bad_links.append(link)
            time.sleep(10)
    driver.quit()
    time.sleep(30)
all_products_dict

# TODO - towels

eye+masks None None
/bedding/silk-eye-mask 60 False
/bedding/weighted-silk-eye-mask 75 False
towels None None
/bedding/plush-towels 175 True
/bedding/waffle-towels 175 True


{'/mattresses/crib-mattress': {'link': 'https://www.saatva.com/mattresses/crib-mattress',
  'product_name': 'Crib Mattress',
  'product_desc': 'Our dual-sided nontoxic crib mattress for little ones',
  'product_type': 'mattresses',
  'product_rating': 4.8,
  'num_reviews': 4,
  'crib_firm': (355, False)},
 '/mattresses/saatva-youth': {'link': 'https://www.saatva.com/mattresses/saatva-youth',
  'product_name': 'Saatva Youth Mattress',
  'product_desc': 'Our versatile innerspring youth mattress designed for growing kids',
  'product_type': 'mattresses',
  'product_rating': 4.9,
  'num_reviews': 18,
  'full_plush+soft': (855, False),
  'full_medium+firm': (855, False),
  'twin_plush+soft': (795, False),
  'twin_medium+firm': (795, False),
  'twin+xl_plush+soft': (935, True),
  'twin+xl_medium+firm': (935, True)},
 '/mattresses/saatva-hd': {'link': 'https://www.saatva.com/mattresses/saatva-hd',
  'product_name': 'Saatva HD Mattress',
  'product_desc': 'The first luxury hybrid innerspring f

Adding in bad links manually

In [11]:
bad_links

['https://www.saatva.com/bedding/organic-cotton-channel-quilt',
 'https://www.saatva.com/bedding/organic-velvet-quilt',
 '/bedding/organic-cotton-channel-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-cotton-channel-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-cotton-channel-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-cotton-channel-quilt',
 '/bedding/organic-cotton-channel-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-cotton-channel-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-velvet-quilt',
 '/bedding/organic-cotton-channel-quilt',
 '/bedding/featherbed-mattress-topper',
 '/bedding/featherbed-mattress-topper',
 '/bedding/featherbed-mattress-topper',
 '/bedding/featherbed-mattress-topper',
 '/bedding/featherbed-mattress-topper',
 '/bedding/featherbed-mattress-topper',
 '/bed

In [12]:
# add in product info manually for products that the web scraper didn't work on
# no idea why this happened but it was easier to add them in after than try and find the root of the problem

all_products_dict['/bedding/organic-velvet-quilt'] = {
    'link': 'https://www.saatva.com/bedding/organic-velvet-quilt',
    'product_name': 'Organic Velvet Diamond Quilt',
    'product_desc': 'A luxuriously plush velvet quilt that brings cozy elegance to your bedroom',
    'product_type': 'bedding',
    'product_rating': np.nan,
    'num_reviews': 0,
    'blankets+and+quilts_full_velvet': (345, False),
    'blankets+and+quilts_queen_velvet': (345, False),
    'blankets+and+quilts_king_velvet': (395, False),
    'blankets+and+quilts_cal+king_velvet': (395, False),

}

all_products_dict['/bedding/organic-cotton-channel-quilt'] = {
    'link': 'https://www.saatva.com/bedding/organic-cotton-channel-quilt',
    'product_name': 'Organic Velvet Diamond Quilt',
    'product_desc': 'A silky-soft, lightweight quilt with a relaxed yet refined lived-in look',
    'product_type': 'bedding',
    'product_rating': np.nan,
    'num_reviews': 0,
    'blankets+and+quilts_twin_organic+cotton': (215, False),
    'blankets+and+quilts_twin+xl_organic+cotton': (215, False),
    'blankets+and+quilts_full_organic+cotton': (295, False),
    'blankets+and+quilts_queen_organic+cotton': (295, False),
    'blankets+and+quilts_king_organic+cotton': (325, False),
    'blankets+and+quilts_cal+king_organic+cotton': (325, False),
}

turn scraped disctionary into customization dictionary so it will be easier for the GPT model to process

In [20]:
# quick reformatting of the product dictionary to put all customizations together

all_products_dict
condensed_products_dict = {}

for link, info_dict in all_products_dict.items():
    item_dict = {}
    customization_dict = {}
    for k, v in info_dict.items():

        if k in ['product_name', 'product_desc', 'product_type', 'product_desc', 'product_type', 'product_rating', 'num_reviews']:
            item_dict[k] = v
        else:
            if k in ['link', 'customizations']:
                continue
            customization_dict[k] = v[0]

    item_dict['customizations'] = customization_dict
    condensed_products_dict[info_dict['link']] = item_dict

condensed_products_dict

{'https://www.saatva.com/mattresses/crib-mattress': {'product_name': 'Crib Mattress',
  'product_desc': 'Our dual-sided nontoxic crib mattress for little ones',
  'product_type': 'mattresses',
  'product_rating': 4.8,
  'num_reviews': 4,
  'customizations': {'crib_firm': 355}},
 'https://www.saatva.com/mattresses/saatva-youth': {'product_name': 'Saatva Youth Mattress',
  'product_desc': 'Our versatile innerspring youth mattress designed for growing kids',
  'product_type': 'mattresses',
  'product_rating': 4.9,
  'num_reviews': 18,
  'customizations': {'full_plush+soft': 855,
   'full_medium+firm': 855,
   'twin_plush+soft': 795,
   'twin_medium+firm': 795,
   'twin+xl_plush+soft': 935,
   'twin+xl_medium+firm': 935}},
 'https://www.saatva.com/mattresses/saatva-hd': {'product_name': 'Saatva HD Mattress',
  'product_desc': 'The first luxury hybrid innerspring for bigger bodies',
  'product_type': 'mattresses',
  'product_rating': 4.9,
  'num_reviews': 119,
  'customizations': {'king_med

fix prices for when the scraper did not get the correct prices

In [21]:
# fix for the multi-price issue listed above
# again, don;t know why this happens but it was easier to jsut put it in manually

price_dicts = {
    'https://www.saatva.com/furniture/adjustable-base-plus': {'twin': 1049, 'twin+xl': 1099, 'full': 1199, 'queen': 1299, 'king':1795, 'split+king': 2248, 'cal+king': 2248, 'split+cal+king': 2248},
    'https://www.saatva.com/furniture/marbella': {'queen': 1695, 'king':1895, 'cal+king': 1895},
    'https://www.saatva.com/bedding/organic-percale-duvet-cover-set':{'twin': 235, 'twin+xl': 235, 'full': 315, 'queen': 315, 'king': 365, 'cal+king': 365, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/heavyweight-comforter': {'twin': 265, 'twin+xl': 265, 'full': 345, 'queen': 345, 'king': 395, 'cal+king': 395, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/banded-percale-duvet-set': {'twin': 215, 'twin+xl': 215, 'full': 245, 'queen': 245, 'king': 265, 'cal+king': 265, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/down-alternative-comforter': {'twin': 235, 'twin+xl': 235, 'full': 315, 'queen': 315, 'king': 365, 'cal+king': 365, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/lightweight-down-alternative-comforter': {'twin': 205, 'twin+xl': 205, 'full': 265, 'queen': 265, 'king': 315, 'cal+king': 315, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/linen-duvet-cover-set': {'twin': 295, 'twin+xl': 295, 'full': 395, 'queen': 395, 'king': 445, 'cal+king': 445, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/weighted-blanket': {'twin': 345, 'queen': 445, 'king': 445, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/embroidered-sateen-duvet-set': {'twin': 215, 'twin+xl': 215, 'full': 245, 'queen': 245, 'king': 265, 'cal+king': 265, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/waffle-knit-duvet-cover-set': {'twin': 225, 'twin+xl': 225, 'full': 275, 'queen': 275, 'king': 295, 'cal+king': 295, 'split+cal+king': np.nan},
    'https://www.saatva.com/bedding/organic-sateen-duvet-cover-set': {'twin': 195, 'twin+xl': 195, 'full': 225, 'queen': 225, 'king': 245, 'cal+king': 245, 'split+cal+king': np.nan},

}

for link in price_dicts:
    output_dict = {}
    dict_to_fix = condensed_products_dict[link]['customizations']

    for customization, price in dict_to_fix.items():
        potential_size = customization.split('_')

        for size in potential_size:
            if size in price_dicts[link].keys() and price_dicts[link][size] == price_dicts[link][size]:
                output_dict[customization] = price_dicts[link][size]

    condensed_products_dict[link]['customizations'] = output_dict

condensed_products_dict['https://www.saatva.com/furniture/adjustable-base-plus']

{'product_name': 'Saatva Adjustable Base Plus',
 'product_desc': 'Our deluxe adjustable base with even more customized comfort options',
 'product_type': 'furniture',
 'product_rating': nan,
 'num_reviews': 0,
 'customizations': {'adjustable+bases_king_None': 1795,
  'adjustable+bases_queen_None': 1299,
  'adjustable+bases_full_None': 1199,
  'adjustable+bases_twin_None': 1049,
  'adjustable+bases_split+king_None': 2248,
  'adjustable+bases_cal+king_None': 2248,
  'adjustable+bases_split+cal+king_None': 2248,
  'adjustable+bases_twin+xl_None': 1099}}

In [25]:
import pandas as pd

condensed_products_df = pd.DataFrame.from_dict(condensed_products_dict, orient='index')
condensed_products_df

Unnamed: 0,product_name,product_desc,product_type,product_rating,num_reviews,customizations
https://www.saatva.com/mattresses/crib-mattress,Crib Mattress,Our dual-sided nontoxic crib mattress for litt...,mattresses,4.8,4,{'crib_firm': 355}
https://www.saatva.com/mattresses/saatva-youth,Saatva Youth Mattress,Our versatile innerspring youth mattress desig...,mattresses,4.9,18,"{'full_plush+soft': 855, 'full_medium+firm': 8..."
https://www.saatva.com/mattresses/saatva-hd,Saatva HD Mattress,The first luxury hybrid innerspring for bigger...,mattresses,4.9,119,"{'king_medium+firm': 3545, 'king_firm': 3545, ..."
https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,"{'king_plush+soft': 2195, 'king_medium+firm': ..."
https://www.saatva.com/mattresses/saatva-rx,Saatva Rx,The first luxury mattress specially designed f...,mattresses,,0,"{'king_medium+firm': 3395, 'queen_medium+firm'..."
...,...,...,...,...,...,...
https://www.saatva.com/bedding/plush-towels,Plush Towel Collection,Our reinvented take on the classic luxury hote...,bedding,,0,{'towels_None_None': 175}
https://www.saatva.com/bedding/down-alternative-pillow,Down Alternative Pillow,Hypoallergenic with a plush and airy feel that...,bedding,4.8,24,"{'pillows+type_king_down+alternative': 135, 'p..."
https://www.saatva.com/bedding/herringbone-knit-blanket,Herringbone Knit Blanket,A medium-weight cotton blanket in a classic he...,bedding,,0,{}
https://www.saatva.com/bedding/organic-velvet-quilt,Organic Velvet Diamond Quilt,A luxuriously plush velvet quilt that brings c...,bedding,,0,"{'blankets+and+quilts_full_velvet': 345, 'blan..."


#Scrape Metadata using selenium

In [None]:
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome(service=service, options=options)
product_metadata_dict = {}

# iterate over product links to each page
for product_type in ['mattresses', 'furniture', 'bedding']:

    for link in product_links[product_type]:
        if 'compare' in link:
            continue
        a, b, c, d, e, f, g, h = True, True, True, True, True, True, True, True
        metadata_dict = {}
        driver.get(link)

        # from looking at the bebsite, there were 7 different divs that saatva employed to display information about products
        # this function searched each page for those divs and if it finds them it adds them to a metadata dictionary
        try:
            product_info_section = driver.find_element(By.XPATH, './/section[@id="productPanel"]')
            metadata_dict['product_info'] = product_info_section.text
        except:
            a=False

        try:
            product_value_metadata = driver.find_element(By.XPATH, "//section[contains(translate(@class, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'value')]")
            metadata_dict['product_value'] = product_value_metadata.text
        except:
            b=False

        try:
            furniture_overview_metadata = driver.find_element(By.XPATH, "//div[contains(translate(@class, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'furnitureoverview')]")
            metadata_dict['furniture_overview'] = furniture_overview_metadata.text
        except:
            c=False

        try:
            product_overview_metadata = driver.find_element(By.XPATH, "//div[contains(translate(@class, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'productdetails')]")
            metadata_dict['product_overview'] = product_overview_metadata.text
        except:
            d=False

        try:
            fabric_info_metadata = driver.find_element(By.XPATH, "//section[contains(translate(@class, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'fabric')]")
            metadata_dict['fabric_info'] = fabric_info_metadata.text
        except:
            e=False

        try:
            linen_info_metadata = driver.find_element(By.XPATH, "//section[contains(translate(@class, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'linen')]")
            metadata_dict['linen_info'] = linen_info_metadata.text
        except:
            f=False

        try:
            pillow_info_metadata = driver.find_element(By.XPATH, "//section[contains(translate(@class, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'pillow')]")
            metadata_dict['pillow_info'] = pillow_info_metadata.text
        except:
            g=False

        try:
            towel_info_metadata = driver.find_element(By.XPATH, "//div[contains(translate(@class, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'towel')]")
            metadata_dict['towel_info'] = towel_info_metadata.text
        except:
            h=False


        print(a, end = '\t')
        print(b, end = '\t')
        print(c, end = '\t')
        print(d, end = '\t')
        print(e, end = '\t')
        print(f, end = '\t')
        print(g, end = '\t')
        print(h, end = '\t')
        print(product_type, end='\t')
        print(link)

        product_metadata_dict[link] = metadata_dict

driver.quit()

In [None]:
import json
from google.colab import drive
drive.mount('/content/drive')

with open('/content/drive/MyDrive/Job Search 2023/coding challenge/instalily/data/product_metadata.json', 'w') as f:
    json.dump(product_metadata_dict, f, indent=4)

# Combine product data and metadata

In [None]:
import json
from google.colab import drive
drive.mount('/content/drive')

with open('/content/drive/MyDrive/Job Search 2023/coding challenge/instalily/data/product_metadata.json', 'r') as f:
    product_metadata_dict = json.load(f)

len(product_metadata_dict)

In [30]:
condensed_products_df['metadata'] = condensed_products_df.apply(lambda row: product_metadata_dict[row.name], axis=1)
condensed_products_df

Unnamed: 0,product_name,product_desc,product_type,product_rating,num_reviews,customizations,metadata
https://www.saatva.com/mattresses/crib-mattress,Crib Mattress,Our dual-sided nontoxic crib mattress for litt...,mattresses,4.8,4,{'crib_firm': 355},{'product_info': 'Tap to zoom Crib Mattress Ou...
https://www.saatva.com/mattresses/saatva-youth,Saatva Youth Mattress,Our versatile innerspring youth mattress desig...,mattresses,4.9,18,"{'full_plush+soft': 855, 'full_medium+firm': 8...",{'product_info': 'Tap to zoom Saatva Youth Mat...
https://www.saatva.com/mattresses/saatva-hd,Saatva HD Mattress,The first luxury hybrid innerspring for bigger...,mattresses,4.9,119,"{'king_medium+firm': 3545, 'king_firm': 3545, ...",{'product_info': 'Tap to zoom Saatva HD Mattre...
https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,"{'king_plush+soft': 2195, 'king_medium+firm': ...",{'product_info': 'Tap to zoom Awarded “Best Ov...
https://www.saatva.com/mattresses/saatva-rx,Saatva Rx,The first luxury mattress specially designed f...,mattresses,,0,"{'king_medium+firm': 3395, 'queen_medium+firm'...",{'product_info': 'Tap to zoom NEW Saatva Rx Th...
...,...,...,...,...,...,...,...
https://www.saatva.com/bedding/plush-towels,Plush Towel Collection,Our reinvented take on the classic luxury hote...,bedding,,0,{'towels_None_None': 175},{'product_info': 'NEW Plush Towel Collection O...
https://www.saatva.com/bedding/down-alternative-pillow,Down Alternative Pillow,Hypoallergenic with a plush and airy feel that...,bedding,4.8,24,"{'pillows+type_king_down+alternative': 135, 'p...",{'product_info': 'Down Alternative Pillow Hypo...
https://www.saatva.com/bedding/herringbone-knit-blanket,Herringbone Knit Blanket,A medium-weight cotton blanket in a classic he...,bedding,,0,{},{'product_info': 'Herringbone Knit Blanket A m...
https://www.saatva.com/bedding/organic-velvet-quilt,Organic Velvet Diamond Quilt,A luxuriously plush velvet quilt that brings c...,bedding,,0,"{'blankets+and+quilts_full_velvet': 345, 'blan...",{}


In [51]:
condensed_products_df.to_csv('/content/drive/MyDrive/Job Search 2023/coding challenge/instalily/data/condensed_product_df.csv')

# Build embeddings dataframe

convert to dataframe

In [1]:
# convert dict to dataframe
# for this dataframe, each row represents a type of product, so a queen and a king size of the same mattress are different rows
# this is so that if the user querys about a firmness or size or material, the vector lookup will hopefully \
# only show products that fit those parameters, and then we can use teh metadata for those specific products \
# for the chatbot. And products that fit some of the user's query but not all will hopefully be omitted or \
# at least lower down in the significance.
# For instance, if a user asks about queen size mattresses, this will hopefully allow the lookup to \
# ignore results for mattresses that only come in twin or twinxl

def get_product_info(product_dict, key, price):
    output_dict = product_dict.copy()

    if output_dict['product_type'] == 'mattresses':
        output_dict['size'], output_dict['firmness'] = key.split('_')
        output_dict['price'], output_dict['multi_price'] = price

        if output_dict['size'] == None:
            del output_dict['size']
        if output_dict['firmness'] == None:
            del output_dict['firmness']

    if output_dict['product_type'] == 'furniture':
        output_dict['type'], output_dict['size'], output_dict['material'] = key.split('_')
        output_dict['price'], output_dict['multi_price'] = price

        if output_dict['material'] == 'momery+foam+material':
            return None

        if output_dict['type'] in [None, "None"]:
            del output_dict['type']
        if output_dict['size'] in [None, "None"]:
            del output_dict['size']
        if output_dict['material'] in [None, "None"]:
            del output_dict['material']


    if output_dict['product_type'] == 'bedding':
        output_dict['type'], output_dict['size'], output_dict['material'] = key.split('_')
        output_dict['price'], output_dict['multi_price'] = price

        if output_dict['material'] == 'momery+foam+material':
            return None

        if output_dict['type'] in [None, "None"]:
            del output_dict['type']
        if output_dict['size'] in [None, "None"]:
            del output_dict['size']
        if output_dict['material'] in [None, "None"]:
            del output_dict['material']


    return output_dict

In [24]:
import pandas as pd

product_df_dict = []
for link, data in all_products_dict.items():
    product_dict = {k:v for k, v in data.items() if k == 'link' or 'product' in k or k=='num_reviews'}

    for k in data.keys():
        if k in product_dict.keys():
            continue

        product_info = get_product_info(product_dict, k, data[k])
        if product_info is not None:
            product_df_dict.append(product_info)
        else:
            print(k)

product_df = pd.DataFrame(product_df_dict)
product_df

Unnamed: 0,link,product_name,product_desc,product_type,product_rating,num_reviews,size,firmness,price,multi_price,type,material
0,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,king,plush+soft,2195,False,,
1,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,king,medium+firm,2195,False,,
2,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,king,firm,2195,False,,
3,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,queen,plush+soft,1695,False,,
4,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,queen,medium+firm,1695,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...
644,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,twin+xl,,215,False,blankets+and+quilts,organic+cotton
645,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,full,,295,False,blankets+and+quilts,organic+cotton
646,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,queen,,295,False,blankets+and+quilts,organic+cotton
647,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,king,,325,False,blankets+and+quilts,organic+cotton


fix some scraper mistakes

In [25]:
from sqlalchemy.sql.elements import True_
from tables.tests.common import print_versions
def find_relevent_multi_price(row):
    if row['multi_price'] == False:
        return False

    if row['product_type'] == 'mattresses':
        return False

    if row['type'] == 'bedroom+rugs':
        if row['link'] in product_df.query('type in ["adjustable+bases", "standard+bed+frames", "platform+bed+frames", "mattress+foundation"]')['link'].unique():
            return -1

    if row['link'] in [
        'https://www.saatva.com/bedding/essential-sheet-set',
        'https://www.saatva.com/bedding/organic-mattress-pad',
        'https://www.saatva.com/bedding/linen-sheet-set',
        'https://www.saatva.com/bedding/embroidered-hotel-style-sheets',
        'https://www.saatva.com/bedding/banded-percale-sheet-set',
        'https://www.saatva.com/bedding/microcoil-mattress-topper',
        'https://www.saatva.com/bedding/flannel-sheet-set',
        'https://www.saatva.com/bedding/waterproof-mattress-protector',
        'https://www.saatva.com/bedding/organic-cotton-sheets',
        'https://www.saatva.com/bedding/percale-sheet-set',
        'https://www.saatva.com/bedding/graphite-memory-foam-mattress-topper',
        'https://www.saatva.com/bedding/high-density-foam-mattress-topper',
        'https://www.saatva.com/bedding/sateen-cotton-sheet-set',
        'https://www.saatva.com/bedding/latex-mattress-topper',
        'https://www.saatva.com/furniture/adjustable-base'


        ]:
        if row['size'] == 'split+cal+king':
            return -1
        return False

    if row['link'] in [
        'https://www.saatva.com/bedding/the-saatva-pillow',
        'https://www.saatva.com/bedding/cloud-memory-foam-pillow',
        'https://www.saatva.com/bedding/memory-foam-pillow',
        'https://www.saatva.com/bedding/down-alternative-pillow'
    ]:
        return False

    return row['multi_price']

def fix_multi_price_price(row, price=True):

    price_dicts = {
        'https://www.saatva.com/furniture/adjustable-base-plus': {'twin': 1049, 'twin+xl': 1099, 'full': 1199, 'queen': 1299, 'king':1795, 'split+king': 2248, 'cal+king': 2248, 'split+cal+king': 2248},
        'https://www.saatva.com/furniture/marbella': {'queen': 1695, 'king':1895, 'cal+king': 1895},
        'https://www.saatva.com/bedding/organic-percale-duvet-cover-set':{'twin': 235, 'twin+xl': 235, 'full': 315, 'queen': 315, 'king': 365, 'cal+king': 365, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/heavyweight-comforter': {'twin': 265, 'twin+xl': 265, 'full': 345, 'queen': 345, 'king': 395, 'cal+king': 395, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/banded-percale-duvet-set': {'twin': 215, 'twin+xl': 215, 'full': 245, 'queen': 245, 'king': 265, 'cal+king': 265, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/down-alternative-comforter': {'twin': 235, 'twin+xl': 235, 'full': 315, 'queen': 315, 'king': 365, 'cal+king': 365, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/lightweight-down-alternative-comforter': {'twin': 205, 'twin+xl': 205, 'full': 265, 'queen': 265, 'king': 315, 'cal+king': 315, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/linen-duvet-cover-set': {'twin': 295, 'twin+xl': 295, 'full': 395, 'queen': 395, 'king': 445, 'cal+king': 445, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/weighted-blanket': {'twin': 345, 'queen': 445, 'king': 445, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/embroidered-sateen-duvet-set': {'twin': 215, 'twin+xl': 215, 'full': 245, 'queen': 245, 'king': 265, 'cal+king': 265, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/waffle-knit-duvet-cover-set': {'twin': 225, 'twin+xl': 225, 'full': 275, 'queen': 275, 'king': 295, 'cal+king': 295, 'split+cal+king': np.nan},
        'https://www.saatva.com/bedding/organic-sateen-duvet-cover-set': {'twin': 195, 'twin+xl': 195, 'full': 225, 'queen': 225, 'king': 245, 'cal+king': 245, 'split+cal+king': np.nan},

    }
    if row['link'] in price_dicts:
        if not price:
            return False
        price_dicts = price_dicts[row['link']]
        try:
            return price_dicts[row['size']]
        except:
            return np.nan

    if not price:
        return row['multi_price']
    return row['price']


product_df['multi_price'] = product_df.apply(find_relevent_multi_price, axis=1)
product_df['multi_price'] = product_df.apply(lambda row: fix_multi_price_price(row, False), axis=1)
product_df['price'] = product_df.apply(lambda row: fix_multi_price_price(row, True), axis=1)
product_df

product_df.query('multi_price == True')

Unnamed: 0,link,product_name,product_desc,product_type,product_rating,num_reviews,size,firmness,price,multi_price,type,material
120,https://www.saatva.com/furniture/adamas-bedroo...,Adamas Handcrafted Bedroom Rug,A versatile rug with a timelessly stylish pattern,furniture,,0,,,2345.0,True,bedroom+rugs,
121,https://www.saatva.com/furniture/myra-bedroom-rug,Myra Handcrafted Bedroom Rug,A handsomely modern rug with a sophisticated y...,furniture,,0,,,2445.0,True,bedroom+rugs,
125,https://www.saatva.com/furniture/cirrus-bedroo...,Cirrus Handcrafted Bedroom Rug,A contemporary & versatile rug with timeless a...,furniture,,0,,,2445.0,True,bedroom+rugs,
154,https://www.saatva.com/furniture/navi-bedroom-rug,Navi Handcrafted Bedroom Rug,A cozy & on-trend rug with a shearling look,furniture,,0,,,1995.0,True,bedroom+rugs,
176,https://www.saatva.com/furniture/kanan-bedroom...,Kanan Handcrafted Bedroom Rug,A modern rug with a grounding texture and an u...,furniture,,0,,,1995.0,True,bedroom+rugs,
177,https://www.saatva.com/furniture/terna-bedroom...,Terna Handcrafted Bedroom Rug,A dimensional area rug brimming with global-in...,furniture,,0,,,1495.0,True,bedroom+rugs,
447,https://www.saatva.com/bedding/waffle-towels,Waffle Towel Collection,Our light & airy towel inspired by the world’s...,bedding,,0,,,175.0,True,towels,
524,https://www.saatva.com/bedding/plush-towels,Plush Towel Collection,Our reinvented take on the classic luxury hote...,bedding,,0,,,175.0,True,towels,


In [26]:
product_df = product_df.query('price == price and multi_price in [True, False]')
product_df

Unnamed: 0,link,product_name,product_desc,product_type,product_rating,num_reviews,size,firmness,price,multi_price,type,material
0,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,king,plush+soft,2195.0,False,,
1,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,king,medium+firm,2195.0,False,,
2,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,king,firm,2195.0,False,,
3,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,queen,plush+soft,1695.0,False,,
4,https://www.saatva.com/mattresses/saatva-classic,Saatva Classic Mattress,America's best-selling online luxury innerspring,mattresses,4.8,3000,queen,medium+firm,1695.0,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...
644,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,twin+xl,,215.0,False,blankets+and+quilts,organic+cotton
645,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,full,,295.0,False,blankets+and+quilts,organic+cotton
646,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,queen,,295.0,False,blankets+and+quilts,organic+cotton
647,https://www.saatva.com/bedding/organic-cotton-...,Organic Velvet Diamond Quilt,"A silky-soft, lightweight quilt with a relaxed...",bedding,,0,king,,325.0,False,blankets+and+quilts,organic+cotton


In [None]:
product_df.to_csv('/content/drive/MyDrive/Job Search 2023/coding challenge/instalily/data/product_df.csv')