In [1]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd

In [5]:
def scrape_page(page_number):
    url = f"https://www.trustpilot.com/review/www.amazon.com?page={page_number}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    reviews = []

    # Find all review containers
    review_divs = soup.find_all('div', class_='styles_reviewCardInner__EwDq2')

    for review_div in review_divs:
        # Extract reviewer name
        reviewer_name_tag = review_div.find('span', class_='typography_heading-xxs__QKBS8', attrs={"data-consumer-name-typography": "true"})
        reviewer_name = reviewer_name_tag.get_text(strip=True) if reviewer_name_tag else "Name not found"

        # Extract reviewer profile link
        reviewer_profile_link_tag = review_div.find('a', class_='link_internal__7XN06', attrs={"data-consumer-profile-link": "true"})
        reviewer_profile_link = reviewer_profile_link_tag['href'] if reviewer_profile_link_tag else "Profile link not found"

        # Extract reviewer country
        reviewer_country_tag = review_div.find('div', class_='typography_body-m__xgxZ_ typography_appearance-subtle__8_H2l styles_detailsIcon__Fo_ua')
        reviewer_country = reviewer_country_tag.find('span').text if reviewer_country_tag else "Country not found"

        # Extract review count
        review_count_tag = review_div.find('span', class_='typography_body-m__xgxZ_ typography_appearance-subtle__8_H2l', attrs={'data-consumer-reviews-count-typography': 'true'})
        review_count = review_count_tag.text.strip() if review_count_tag else "Review count not found"

        # Extract review date
        review_date_tag = review_div.find('time', attrs={'data-service-review-date-time-ago': 'true'})
        review_date = review_date_tag['datetime'] if review_date_tag else "Review date not found"

        # Extract rating
        rating_tag = review_div.find('div', class_='star-rating_starRating__4rrcf')

        # Extract review title
        review_title_tag = review_div.find('h2', class_='typography_heading-s__f7029')
        review_title = review_title_tag.text if review_title_tag else "Review title not found"

        # Extract review text
        review_text_tag = review_div.find('p', class_='typography_body-l__KUYFJ typography_appearance-default__AAY17 typography_color-black__5LYEn')
        review_text = review_text_tag.text if review_text_tag else "Review text not found"

        # Extract date of experience
        date_experience_tag = review_div.find('p', class_='typography_body-m__xgxZ_ typography_appearance-default__AAY17', attrs={'data-service-review-date-of-experience-typography': 'true'})
        date_experience = date_experience_tag.get_text(strip=True).split(':', 1)[-1].strip() if date_experience_tag else "Date of experience not found"

        reviews.append({
            "Reviewer Name": reviewer_name,
            "Profile Link": reviewer_profile_link,
            "Country": reviewer_country,
            "Review Count": review_count,
            "Review Date": review_date,
            "Review Title": review_title,
            "Review Text": review_text,
            "Date of Experience": date_experience
        })

    return reviews

In [3]:
def scrape_pages(start_page, end_page):
    all_reviews = []
    for page_number in range(start_page, end_page + 1):
        reviews = scrape_page(page_number)
        all_reviews.extend(reviews)

        # Delay after every 10 pages
        if page_number % 10 == 0:
            print(f"Scraped {page_number} pages. Waiting for 2 seconds...")
            time.sleep(2)

        # Delay after every 100 pages
        if page_number % 100 == 0:
            print(f"Scraped {page_number} pages. Waiting for 5 minutes...")
            time.sleep(300)

    return all_reviews

In [6]:
start_page = 1
end_page = 5
Amazon_reviews = scrape_pages(start_page, end_page)
df1 = pd.DataFrame(Amazon_reviews)

In [7]:
df1.head()

Unnamed: 0,Reviewer Name,Profile Link,Country,Review Count,Review Date,Review Title,Review Text,Date of Experience
0,Floscelia Dixon,/users/6700541cabc8f2c807f63ae7,US,2 reviews,2024-10-04T23:49:29.000Z,Amazon is worthy for all shopping needs!,"the variety, reasonable prices amazon has item...","October 04, 2024"
1,Antonia Clarke,/users/64ce0da54861160012604e19,GB,3 reviews,2024-10-04T10:16:51.000Z,I’ll never use Amazon over £10 again…,I use Amazon all the time and there great as l...,"July 31, 2024"
2,Doc Holiday,/users/5f2dfe25a2b0e2aa4bbf0848,US,4 reviews,2024-10-04T03:33:37.000Z,AMAZON SCREWED US/ and themselves !!!,Jeff Bezo took a page out of ol Sam Waltons ha...,"October 04, 2024"
3,C L Mosley,/users/623a2651f35cf600135c9d09,US,2 reviews,2024-10-04T09:10:10.000Z,Placed an order to be delivered same…,Placed an order to be delivered same day. It ...,"October 01, 2024"
4,Margaret Davies,/users/536b90dc00006400017261db,GB,38 reviews,2024-10-03T20:21:54.000Z,Amazon should be ashamed of themselves,Ordered two items 29/9/24 prime member so deli...,"September 30, 2024"
