### Walmart Webscraping

# Importing required packages

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Hitting the website link from where the information needs to be scraped

In [2]:
response = requests.get('https://www.walmart.com/search/?query=bath%20and%20body%20works%20body%20mist&cat_id=1085666&typeahead=bath%20and%20bod')

### Using beautiful soup to convert the content from the above information into a readable format

In [3]:
soup = BeautifulSoup(response.content, 'html.parser')

### Finding the related information from the html id

In [4]:
# Get the product search result
psearch = soup.find(id='searchProductResult')

In [5]:
# Finding all the external links in the search catalog
out1 = psearch.find_all(href=True)    

In [6]:
# Fetch the product description, image reference and product link
cnt = 0
prod_desc = []
prod_img = []
prod_link = []
while(cnt < len(out1)):
    prod_img.append(out1[cnt].find('img')['data-image-src'])
    prod_desc.append(out1[cnt].find('img', alt=True)['alt'])
    prod_link.append('https://walmart.com' + out1[cnt]['href'])
    cnt += 3

In [7]:
#Create a brand variable
prod_brand = []
for item in prod_desc:
    if 'Bath' in item and 'Body' in item and 'Works' in item:
        prod_brand.append("Bath & Body Works")
    else:
        prod_brand.append("NA")

In [8]:
# Fetch the relevant attributes from the product
prod_attributes = []
for item in prod_desc:
    temp = item.split('Works')[1].split(" ")
    cnt = 0
    attr_string = ""
    for attr in temp:
        if len(attr) > 1 and attr.isalpha() and attr not in ('oz', 'At', 'The', 'for'):
            attr_string += "," + attr
        cnt += 1
        if cnt > 4:
            break
    prod_attributes.append(attr_string[1:])

In [9]:
#looking at product attributes
prod_attributes

['Women',
 'Warm,Vanilla,Sugar,Fine',
 'Georgia,Peach,Sweet',
 'Paris,Amour,Fine,Fragrance',
 'Champagne,Apple,Honey',
 'Fine,Fragrance,Mist',
 'Fine,Fragrance,Dark',
 'Sweet,Pea',
 'Signature,Collection,Fragrance,Mist',
 'Twilight,Woods']

In [10]:
# Fetch the quantity of the product (in oz)
prod_quantity = []
for item in prod_desc:
    if '8' in item:
        prod_quantity.append(8.0)
    else:
        prod_quantity.append(4.9)

In [11]:
# Fetching Product reviews 
out2 = psearch.find_all('div', 'search-result-product-rating')
cust_reviews = []
review_links = []
for item in out2:
    review_links.append(item.find('div', 'stars').find(href=True)['href'])
    cust_reviews.append(item.find('div', 'stars').text)

In [12]:
#looking at customer reviews
cust_reviews

['Average rating:4out of5stars, based on7reviews7ratings',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:0out of5stars, based on0reviews',
 'Average rating:3out of5stars, based on1reviews1ratings']

In [13]:
# Extract ratings and number of reviews
rating_out_of_5 = []
no_of_reviews = []
for item in cust_reviews:
    rating_out_of_5.append(int(item.split(":")[1][:1])) 
    no_of_reviews.append(int(item.split(" on")[1][:1]))

In [14]:
# Product price information
plist = psearch.find_all('div', 'product-price-with-fulfillment')

In [15]:
# Shipping information of a product
plist[0].find('div', 'free-shipping').text

'Free delivery'

In [16]:
# Other attributes of the product such as price, merchant sold by, currency type, type of delivery

prod_price = []
sold_by = []
prod_currency = []
delivery_type = []
for item in plist:
    prod_price.append(float(item.find_all('span', attrs={'class': 'visuallyhidden'})[1].text[1:]))
    sold_by.append(item.find_all('span', attrs={'class': 'marketplace-sold-by-company-name'})[0].text)
    if item.find('div', 'free-shipping'):
        delivery_type.append(item.find('div', 'free-shipping').text)
    else:
        delivery_type.append('Paid Delivery')
    if item.find_all('span', attrs={'class': 'price-currency'})[0].text == '$':
        prod_currency.append('USD')

In [17]:
#creating variables
prod_dict = {'prod-desc': {}} 

In [18]:
#searching for price description to provide information on the cost
psearch.find_all('div', 'search-result-gridview-item')[2].find('div', class_='prod-FlagList-container').text

'Reduced Price'

In [19]:
# Normal price vs price reduced

prod_price_reduced = []
for item in psearch.find_all('div', 'search-result-gridview-item'):
    if item.find('div', class_='prod-FlagList-container'):
        prod_price_reduced.append(item.find('div', class_='prod-FlagList-container').text)
    else:
        prod_price_reduced.append('Normal price')

In [20]:
#looking at variables to see if code worked
prod_price_reduced

['Normal price',
 'Normal price',
 'Reduced Price',
 'Normal price',
 'Reduced Price',
 'Normal price',
 'Normal price',
 'Normal price',
 'Reduced Price',
 'Normal price']

In [21]:
#Cleaning data
product_ids = [i.split("/")[-1].split("#")[0] for i in review_links]

In [22]:
#Removing Bath & Body Works
for i in range(0, len(prod_desc)):
    prod_desc[i] = prod_desc[i].replace("Bath & Body Works", '').replace("by", '').replace("   ",'')

Description = prod_desc

In [23]:
#Creating a dataframe
df = pd.DataFrame.from_dict({ 'Product Id' : product_ids,
                            'Description':prod_desc,
                             'Brand' : prod_brand,
                             'Price': prod_price,
                             'Price_Type': prod_price_reduced,
                             'Currency': prod_currency,
                             'Product_qty_in_oz' : prod_quantity,
                             'Product_Attributes' : prod_attributes,
                             'Sold By': sold_by,
                             'Delivery Type': delivery_type,
                             'Product Link': prod_link,
                             'Product Image Link': prod_img,
                             'Rating_out_of_5' : rating_out_of_5,
                             'Number_of_reviews' : no_of_reviews,
                             'Review Links': review_links,})

# printing it in a proper dataframe

In [24]:
df

Unnamed: 0,Product Id,Description,Brand,Price,Price_Type,Currency,Product_qty_in_oz,Product_Attributes,Sold By,Delivery Type,Product Link,Product Image Link,Rating_out_of_5,Number_of_reviews,Review Links
0,148116836,A Thousand Wishesfor Women - 8 oz Fine Fragran...,Bath & Body Works,12.65,Normal price,USD,8.0,Women,VivaBella,Free delivery,https://walmart.com/ip/A-Thousand-Wishes-by-Ba...,https://i5.walmartimages.com/asr/ecdd0f7c-6f4f...,4,7,/ip/A-Thousand-Wishes-by-Bath-Body-Works-for-W...
1,515406751,Warm Vanilla Sugar Fine Fragrance Mist For Wo...,Bath & Body Works,12.65,Normal price,USD,8.0,"Warm,Vanilla,Sugar,Fine",VivaBella,Free delivery,https://walmart.com/ip/Bath-Body-Works-Warm-Va...,https://i5.walmartimages.com/asr/8cb72467-9319...,0,0,/ip/Bath-Body-Works-Warm-Vanilla-Sugar-Fine-Fr...
2,797145954,Georgia Peach & Sweet Tea 8.0 oz Fine Fragran...,Bath & Body Works,11.01,Reduced Price,USD,8.0,"Georgia,Peach,Sweet",Beauty Encounter,Free delivery,https://walmart.com/ip/Bath-Body-Works-Georgia...,https://i5.walmartimages.com/asr/6938797c-e534...,0,0,/ip/Bath-Body-Works-Georgia-Peach-Sweet-Tea-8-...
3,851317757,Paris Amour Fine Fragrance Mist 8 oz (New Look),Bath & Body Works,15.82,Normal price,USD,8.0,"Paris,Amour,Fine,Fragrance",AMI Ventures Inc,Free delivery,https://walmart.com/ip/Bath-Body-Works-Paris-A...,https://i5.walmartimages.com/asr/b8066280-b2e1...,0,0,/ip/Bath-Body-Works-Paris-Amour-Fine-Fragrance...
4,285320769,Champagne Apple & Honey 4.9 oz Illuminating F...,Bath & Body Works,14.49,Reduced Price,USD,4.9,"Champagne,Apple,Honey",Beauty Encounter,Free delivery,https://walmart.com/ip/Bath-Body-Works-Champag...,https://i5.walmartimages.com/asr/9e00ab88-dd8a...,0,0,/ip/Bath-Body-Works-Champagne-Apple-Honey-4-9-...
5,144436548,Cucumber MelonFine Fragrance Mist 8 oz for Women,Bath & Body Works,18.74,Normal price,USD,8.0,"Fine,Fragrance,Mist",the amazin' jungle,Paid Delivery,https://walmart.com/ip/Cucumber-Melon-by-Bath-...,https://i5.walmartimages.com/asr/e1625a49-83cf...,0,0,/ip/Cucumber-Melon-by-Bath-Body-Works-Fine-Fra...
6,423380663,"Bath and Body Works Fine Fragrance Mist, Dark ...",Bath & Body Works,21.24,Normal price,USD,8.0,"Fine,Fragrance,Dark",the amazin' jungle,Paid Delivery,https://walmart.com/ip/Bath-and-Body-Works-Fin...,https://i5.walmartimages.com/asr/b3d761b2-90b9...,0,0,/ip/Bath-and-Body-Works-Fine-Fragrance-Mist-Da...
7,574955735,Bath Body Works Sweet Pea 8.0 oz Fine Fragranc...,Bath & Body Works,24.99,Normal price,USD,8.0,"Sweet,Pea",the amazin' jungle,Paid Delivery,https://walmart.com/ip/Bath-Body-Works-Sweet-P...,https://i5.walmartimages.com/asr/fbf8164b-9aa3...,0,0,/ip/Bath-Body-Works-Sweet-Pea-8-0-oz-Fine-Frag...
8,983667229,Signature Collection Fragrance Mist Coconut L...,Bath & Body Works,10.75,Reduced Price,USD,8.0,"Signature,Collection,Fragrance,Mist",Beauty Encounter,Paid Delivery,https://walmart.com/ip/Bath-Body-Works-Signatu...,https://i5.walmartimages.com/asr/1250439e-043b...,0,0,/ip/Bath-Body-Works-Signature-Collection-Fragr...
9,352004376,Bath Body Works Twilight Woods 8.0 oz Fine Fra...,Bath & Body Works,18.74,Normal price,USD,8.0,"Twilight,Woods",the amazin' jungle,Free delivery,https://walmart.com/ip/Bath-Body-Works-Twiligh...,https://i5.walmartimages.com/asr/ca721ee3-f9a7...,3,1,/ip/Bath-Body-Works-Twilight-Woods-8-0-oz-Fine...


In [25]:
#Demonstating the success of obtaining 15 variables.
df.shape

(10, 15)