# Trust Pilot Scraper 
*not for commerical use based on Trust Pilot's Terms of Use*

In [12]:
import requests
import json
import math
import time
import pandas as pd
from bs4 import BeautifulSoup

### Configurations

In [13]:
# Trustpilot review page
url = 'https://www.trustpilot.com/review/www.debenhams.com'

# Final list to be the dataframe
final_list = [] 

# Data file to save reviews to
save_datafile = 'debenhams_trust_reviews.csv'

# Handling for Pagination
results_per_page = 20 
run_pagination_finder = True
total_pages = 1

# Throttling to avoid spamming page with requests
# With sleepTime seconds between every page request
throttle = False
sleep_time = 2

print(f'Scraper set for {url} \nSaving results to {save_datafile}'
      f'\nRun Pagination Finder: {run_pagination_finder} \nThrottling On: {throttle}')

Scraper set for https://www.trustpilot.com/review/www.debenhams.com 
Saving results to debenhams_trust_reviews.csv
Run Pagination Finder: True 
Throttling On: False


### Pagination Finder
Get Total Number of Pages of Reviews

In [14]:
## Count amount of pages to scrape
if run_pagination_finder:
    # Get page
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'lxml')

    # Get total number of reviews
    rating_count = soup.find('span', class_='headline__review-count')
    rating_count = int(rating_count.text.replace(',', ''))

    # Total pages to scrape
    total_pages = math.ceil(rating_count / results_per_page)

    print(f'Found total of {total_pages} pages to scrape')

Found total of 127 pages to scrape


### Run Scraper

In [15]:
for pg in range(1, total_pages + 1):
    pg = url + '?page=' + str(pg)
    r = requests.get(pg)
    soup = BeautifulSoup(r.text, 'lxml')
        
    for paragraph in soup.find_all('section', class_='review__content'):
        # get review title
        title_section = paragraph.find('h2', class_='review-content__title')   
        
        if title_section:
            title = title_section.find('a').text.strip()
        else:
            title = ''
        
        # get review text
        content = paragraph.find('p', class_='review-content__text')
        
        if content:
            content = content.text.strip()
            
            # get review posted date
            datedata = json.loads(paragraph.find('div', class_='review-content-header__dates').text)
            date = datedata['publishedDate'].split('T')[0]
            
            # get review rating
            rating_class = paragraph.find('div', class_='star-rating')
            rating = rating_class.find('img')['alt'][0]   

            final_list.append([title, content, date, rating])

    if(throttle): 
        time.sleep(sleep_time)

# Save to pandas dataframe
df = pd.DataFrame(final_list, columns=['Title', 'Content', 'Date', 'Rating'])

### Pretty Print Results

In [16]:
df

Unnamed: 0,Title,Content,Date,Rating
0,Joke,"One big joke,am sell the car they chargd me me...",2020-04-06,1
1,Missing delivery,My order was allegedly dispatched as per infor...,2020-04-06,1
2,I made an order on 28th of March now…,I made an order on 28th of March now it's 3th ...,2020-04-03,1
3,Great company with fantastic offer,Great company with fantastic offer. They alway...,2020-04-03,5
4,Debenhams has been my favourite…,Debenhams has been my favourite department sto...,2020-03-31,5
5,Excellent service for online order,Excellent service during a difficult period.,2020-03-30,5
6,Regular shopper instore and online,I am a regular shopper on the website and in s...,2020-03-30,4
7,Lovely service,Lovely service. Because of recent events I nee...,2020-03-27,5
8,Luckily I have not had to many problems…,"Luckily I have not had to many problems , love...",2020-03-24,4
9,If u want to make your wedding day…,If u want to make your wedding day special I s...,2020-03-21,1


### Save dataframe to csv

In [17]:
df.to_csv(save_datafile, encoding='utf-8', index=False)