# Trust Pilot Scraper 
*not for commerical use based on Trust Pilot's Terms of Use*

In [32]:
import requests
import json
import math
import time
import pandas as pd
from bs4 import BeautifulSoup
from IPython.display import clear_output, display

### Configurations

In [33]:
# Trustpilot review page
url = 'https://www.trustpilot.com/review/www.mountainwarehouse.com'

# Data file to save reviews to
save_datafile = 'mango_trust_reviews.csv'

# Final list to be the dataframe
final_list = [] 

# Handling for Pagination
results_per_page = 20 
run_pagination_finder = True
total_pages = 1

# Throttling to avoid spamming page with requests
# With sleepTime seconds between every page request
throttle = False
sleep_time =1 

print(f'Scraper set for {url} \nSaving results to {save_datafile}'
      f'\nRun Pagination Finder: {run_pagination_finder} \nThrottling On: {throttle}')

Scraper set for https://www.trustpilot.com/review/www.mountainwarehouse.com 
Saving results to mango_trust_reviews.csv
Run Pagination Finder: True 
Throttling On: False


### Pagination Finder
Get Total Number of Pages of Reviews

In [34]:
## Count amount of pages to scrape
if run_pagination_finder:
    # Get page
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'lxml')

    # Get total number of reviews
    rating_count = soup.find('span', class_='headline__review-count')
    rating_count = int(rating_count.text.replace(',', ''))

    # Total pages to scrape
    total_pages = math.ceil(rating_count / results_per_page)

    print(f'Found total of {total_pages} pages to scrape')

Found total of 51 pages to scrape


### Run Scraper

In [35]:
for page_num in range(1, total_pages + 1):    
    page = url + '?page=' + str(page_num)
    r = requests.get(page)
    soup = BeautifulSoup(r.text, 'lxml')
        
    for paragraph in soup.find_all('section', class_='review__content'):
        # get review title
        title_section = paragraph.find('h2', class_='review-content__title')   
        
        if title_section:
            title = title_section.find('a').text.strip()
        else:
            title = ''
        
        # get review text
        content = paragraph.find('p', class_='review-content__text')
        
        if content:
            content = content.text.strip()
            
            # get review posted date
            datedata = json.loads(paragraph.find('div', class_='review-content-header__dates').text)
            date = datedata['publishedDate'].split('T')[0]
            
            # get review rating
            rating_class = paragraph.find('div', class_='star-rating')
            rating = rating_class.find('img')['alt'][0]   

            final_list.append([title, content, date, rating])
    
    # print progress
    clear_output(wait=True)
    print(f'scraped page {page_num} of {total_pages}')
    
    if(throttle): 
        time.sleep(sleep_time)

# Save to pandas dataframe
df = pd.DataFrame(final_list, columns=['Title', 'Content', 'Date', 'Rating'])

scraped page 51 of 51


### Pretty Print Results

In [23]:
df

Unnamed: 0,Title,Content,Date,Rating
0,Customer service diabolical,Customer service diabolical. My £90 order had ...,2020-05-21,1
1,Very efficient returns service,My online shopping experience from Australia w...,2020-05-21,4
2,Just happy refund is received,Item returned on 17April and refund finally we...,2020-05-21,3
3,I was just about to place an order with Mango,"I was just about to place an order, until I re...",2020-05-19,1
4,Horrible customer treatment/service,It is impossible to get in touch with their cu...,2020-05-18,1
5,I purchase a jumpsuit and can't get a…,I purchase a jumpsuit and can't get a confirma...,2020-05-18,1
6,Shopping from Mango is like trying to…,Shopping from Mango is like trying to back wit...,2020-05-18,1
7,I guess I'm not the only one having…,I guess I'm not the only one having issues wit...,2020-05-18,1
8,You cant use gift vouchers online,You cant use gift vouchers online? What a stup...,2020-05-17,1
9,Happy with service and product,"I ordered some clothes about seven days ago, r...",2020-05-15,4


### Save dataframe to csv

In [24]:
df.to_csv(save_datafile, encoding='utf-8', index=False)