## Scraping of Walmart Customer Reviews (while loop)

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

url = 'https://www.walmart.com/reviews/product/3053001212?page=1'  # Replace with desired product ID

#https://httpbin.org/get
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36", "Accept-Encoding": "gzip, deflate, br, zstd", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"}

reviewlist = []


def get_soup(url):
  page = requests.get(url, headers=header)
  soup = BeautifulSoup(page.text, 'html.parser')
  return soup


def get_reviews(soup):
  reviews = soup.find_all('li', {'class': "dib w-100 mb3"})
  print(f'Found {len(reviews)} reviews on this page.')
  try:
    for item in reviews:
      review = {
          'name': item.find('div', {'class': "f6 gray pr2 mb2"}).text.strip(),
          'rating': item.find('span', {'class': "w_iUH7"}).text.replace(' out of 5 stars review', '').strip(),
          'verified': True if item.find('span', {'class': "green b mr1"}) else False,
          'date': item.find('div', {'class': "f7 gray mt1"}).string,
          'title': item.find('h3').text.strip(),
          'body': item.find('span', {'class': "tl-m mb3 db-m"}).text.strip(),
      }
      reviewlist.append(review)
  except (AttributeError, KeyError):
    pass

  # Check for "Next Page" button and update url
  next_page_button = soup.find('a', text='Next Page')
  if next_page_button:
    next_page_url = next_page_button['href']
    # Update url for next iteration
    url = next_page_url
  else:
    print('Reached last page.')


# Loop through all review pages
page_number = 1
while True:
  soup = get_soup(url)
  print(f'Getting page: {page_number}')
  get_reviews(soup)
  page_number += 1
  # Exit loop if "Next Page" button not found
  if not next_page_button:
    break

# create a dataframe and export the data
df = pd.DataFrame(reviewlist)
df.to_excel('walmart_reviews.xlsx', index=False)
# df.to_csv(r'/data/customer_reviews.csv', index = False)
print('Data has been exported successfully...')
