In [46]:
from urllib.request import Request, urlopen
from selenium import webdriver
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import numpy as np
import time
import random
import os
import json

In [50]:
class MyWalmart():
    def get_soup(self, url):
        """
        This function get the beautifulsoup object of a webpage.

        Args:
            url (str): the link string of webpage

        Returns:
            soup (obj): beautifulsoup object
        """
        request = Request(url, headers={'User-Agent': 'Resistance is futile'})
        response = urlopen(request)
        return BeautifulSoup(response, "html.parser")
    
    def get_product_links_from_page(self, url):
        """
        This function gets the links of the product on the product search page page.

        Args:
            url (str): link to joblist page

        Returns:
            links (list): list of links to individual products of the keyword
        """
             
        soup = self.get_soup(url)
        head = "https://www.walmart.com"  
        links = []
        for item in soup.find_all("a", href=True):
            if 'link-identifier=' in str(item) and '/ip/' in str(item):  
                link = item['href']
                links.append(head+link)
        return links
     
    def get_products(self, keyword):
        """
        Args:
            keyword (str): example: 'Christmas tree'

        Returns:
        """
        gap_min = 5 #min sleep time between clicks
        gap_max = 10 #max sleep time between clicks
        
        url = "https://www.walmart.com/search?q="
        keyword_list = keyword.split()
        
        for i in range(len(keyword_list)):
            url+='%20'
            url+=keyword_list[i]    

            #get links individual products with the keyword
        links = self.get_product_links_from_page(url)
        #print(links)
        #get information of individual products
        for link in links:
            gap = np.random.uniform(gap_min,gap_max) 
            time.sleep(gap)
            data = self.get_info_from_product_page(link)

            print(json.dumps(data))
            
    def get_info_from_product_page(self, url):
        """
        This function get info from the webpage of an individual product.

        Args:
            url (str): link to the product webpage

        Returns:
            data (dict): dictionary with keywords: 
                         item, price, link
        """
        soup = self.get_soup(url)
        data = {}
        
        try:
            data['item'] = soup.find('h1', itemprop="name").getText()
            data["price"] = soup.find('span', itemprop="price").getText()
            data["link"] = str(url)
        except:
            pass
        return data


In [51]:
w = MyWalmart()

In [59]:
w.get_products('jeans')

{"item": "George Men's Regular Fit Jean", "price": "$10.44", "link": "https://www.walmart.com/ip/George-Men-s-Regular-Fit-Jean/701236486?athbdg=L1600"}
{"item": "George Men's Regular Fit Jean", "price": "$10.44", "link": "https://www.walmart.com/ip/George-Men-s-Regular-Fit-Jean/974623932?variantFieldId=actual_color"}
{"item": "George Men's Regular Fit Jean", "price": "$10.44", "link": "https://www.walmart.com/ip/George-Men-s-Regular-Fit-Jean/701236486?variantFieldId=actual_color"}
{"item": "George Men's Regular Fit Jean", "price": "$10.44", "link": "https://www.walmart.com/ip/George-Men-s-Regular-Fit-Jean/958341066?variantFieldId=actual_color"}
{"item": "No Boundaries Juniors' High Rise Skinny Jeans", "price": "$9.88", "link": "https://www.walmart.com/ip/No-Boundaries-Juniors-High-Rise-Skinny-Jeans/768337976"}
{"item": "No Boundaries Juniors' High Rise Skinny Jeans", "price": "$9.88", "link": "https://www.walmart.com/ip/No-Boundaries-Juniors-High-Rise-Skinny-Jeans/961358752?variantFiel

{"item": "Lee Women's Midrise Straight Leg Jean", "price": "$21.94", "link": "https://www.walmart.com/ip/Lee-Women-s-Midrise-Straight-Leg-Jean/178550486"}
{"item": "Lee Women's Midrise Straight Jean", "price": "$21.94", "link": "https://www.walmart.com/ip/Lee-Women-s-Midrise-Straight-Jean/849178050?variantFieldId=actual_color"}
{"item": "Lee Women's Midrise Straight Leg Jean", "price": "$21.94", "link": "https://www.walmart.com/ip/Lee-Women-s-Midrise-Straight-Leg-Jean/288602017?variantFieldId=actual_color"}
{"item": "Lee Women's Midrise Straight Leg Jean", "price": "$21.94", "link": "https://www.walmart.com/ip/Lee-Women-s-Midrise-Straight-Leg-Jean/178550486?variantFieldId=actual_color"}
