### Importing Packages 

In [None]:
from bs4 import BeautifulSoup
import time

import requests 
import pandas as pd
import pickle

 ### Metacritic Reviews WebScraper 
 
This scraper appends information to a dictionary, which is intended to be converted into a pandas dataframe. It collects info on four features: 

    1) name - name of reviwer 
    2) date - date review was posted 
    3) rating - metacritic score that was given 
    4) review - text of the written review 

It uses BeautifulSoup to find div classes where the info is contained. Longer reviews have an 'Expand' option, and their reviews are contained within <span class = 'blurb blurb_expanded'>. Shorter reviews do not have this span class. 
    
The break function ensures that no error is raised when there are no more reviews to scrap on the page. 

In [20]:
review_dict = {'name':[], 'date':[], 'rating':[], 'review':[]}

for page in range(0,9):
    #url = 'https://www.metacritic.com/game/switch/pokemon-sword/user-reviews?page='+str(page)
    url = 'https://www.metacritic.com/game/switch/pokemon-shield/user-reviews?page='+str(page)
    user_agent = {'User-agent': 'Mozilla/5.0'}
    response  = requests.get(url, headers = user_agent)
    #time.sleep(5)
    soup = BeautifulSoup(response.text, 'html.parser')
    for review in soup.find_all('div', class_='review_content'):
        if review.find('div', class_='name') == None:
                       break 
        review_dict['name'].append(review.find('div', class_='name').find('a').text)
        review_dict['date'].append(review.find('div', class_='date').text)
        review_dict['rating'].append(review.find('div', class_='review_grade').find_all('div')[0].text)
        if review.find('span', class_='blurb blurb_expanded'):
            review_dict['review'].append(review.find('span', class_='blurb blurb_expanded').text)
        else:
            review_dict['review'].append(review.find('div', class_='review_body').find('span').text)
    


### Converting dictionary into pandas DataFrame 

In [21]:
reviews = pd.DataFrame(review_dict) 
reviews

Unnamed: 0,name,date,rating,review
0,Metagrass,"Nov 15, 2019",2,"I have also done a review for Pokemon Sword, b..."
1,NintendoGuy64,"Nov 15, 2019",0,"As a lifelong fan of Pokemon games, I was ecst..."
2,Drakhis,"Nov 15, 2019",2,"Even as a fan, this game has a lot of problems..."
3,Ok_Then,"Nov 15, 2019",4,"This game feels like a port from the 3DS, not ..."
4,Otonaburu,"Nov 15, 2019",4,What should have been a giant leap to signific...
...,...,...,...,...
883,gooblaster,"Nov 20, 2019",6,"Does a lot of stuff really well,but what it do..."
884,Cassichu,"Nov 20, 2019",7,I have been a long time pokemon fan since befo...
885,SayoNightclaw,"Nov 21, 2019",8,most of the negative reviews are people just m...
886,Guandaside,"Nov 21, 2019",0,"The game is good overall, but there are two ma..."


### Adding feature to indicate if Pokemon Sword or Shield 

In [12]:
#reviews['game'] = 'shield'
#reviews['game'] = 'sword'

### Final product! 
After merging scrapped data for sword and shield. 

In [13]:
#pokemon_sword.to_pickle('sword.pickle')
# with open('pokemon.pickle','rb') as read_file:
#     pokemon = pickle.load(read_file)
