# Packages

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from requests import TooManyRedirects
import re
import omdb
import time

# API Set-up

In [2]:
api_key = "2857b99a" # Jimmy's key - limit 1000 calls a day
omdb.set_default('apikey', api_key)

# include tomatoes data by default
#omdb.set_default('tomatoes', True)

# Pulling Data from API

In [3]:
# set timeout of 5 seconds for this request
logan_df = pd.DataFrame(omdb.get(title='Logan', year=2017, fullplot=True, tomatoes=True, timeout=5))
logan_df

Unnamed: 0,title,year,rated,released,runtime,genre,director,writer,actors,plot,...,tomato_consensus,tomato_user_meter,tomato_user_rating,tomato_user_reviews,tomato_url,dvd,box_office,production,website,response
0,Logan,2017,R,03 Mar 2017,137 min,"Action, Drama, Sci-Fi",James Mangold,"James Mangold, Scott Frank, Michael Green","Hugh Jackman, Patrick Stewart, Dafne Keen",In 2029 the mutant population has shrunken sig...,...,,,,,https://www.rottentomatoes.com/m/logan_2017,20 Jun 2017,"$226,277,068",,,True
1,Logan,2017,R,03 Mar 2017,137 min,"Action, Drama, Sci-Fi",James Mangold,"James Mangold, Scott Frank, Michael Green","Hugh Jackman, Patrick Stewart, Dafne Keen",In 2029 the mutant population has shrunken sig...,...,,,,,https://www.rottentomatoes.com/m/logan_2017,20 Jun 2017,"$226,277,068",,,True
2,Logan,2017,R,03 Mar 2017,137 min,"Action, Drama, Sci-Fi",James Mangold,"James Mangold, Scott Frank, Michael Green","Hugh Jackman, Patrick Stewart, Dafne Keen",In 2029 the mutant population has shrunken sig...,...,,,,,https://www.rottentomatoes.com/m/logan_2017,20 Jun 2017,"$226,277,068",,,True


In [4]:
logan_df.columns

Index(['title', 'year', 'rated', 'released', 'runtime', 'genre', 'director',
       'writer', 'actors', 'plot', 'language', 'country', 'awards', 'poster',
       'ratings', 'metascore', 'imdb_rating', 'imdb_votes', 'imdb_id', 'type',
       'tomato_meter', 'tomato_image', 'tomato_rating', 'tomato_reviews',
       'tomato_fresh', 'tomato_rotten', 'tomato_consensus',
       'tomato_user_meter', 'tomato_user_rating', 'tomato_user_reviews',
       'tomato_url', 'dvd', 'box_office', 'production', 'website', 'response'],
      dtype='object')

In [5]:
logan_df['tomato_user_reviews']

0    N/A
1    N/A
2    N/A
Name: tomato_user_reviews, dtype: object

# Scraping the Data from Rotten Tomatoes

In [6]:
def make_soup(url):
    try:
        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'html.parser')
    except TooManyRedirects:
        soup = ''
    return soup

In [7]:
soup = make_soup("https://www.rottentomatoes.com/m/logan_2017/reviews?type=user")
re.findall(r'\"review-text\">([^<]*)<\/p>', str(soup.find(class_ ="audience-reviews")))

['The Wolverine we should of had this whole time. I loved the fact that we were able to get an R rated Wolverine movie. The storyline was awesome and we had a good enclosure to the end of an era. \n\nThank You!!!',
 'filmaço.\n\num filme que meche com as emoções, surpreendente oque o logan passa e como ele está no filme, muito inteligente também a forma que a trama foi feita e introduzida.\nmuito bom',
 'The best x-men movie, the best marvel movie, the best movie',
 'sad but very good movie overall',
 'One of the best CBM movies, great acting, action, story, vfx, emotional',
 'Great finale to the Logan – Wolverine story\nThe violence and gore was top notch\nWolverine is a legend',
 "For certain segments of the film, some things felt pretty slow and pointless with regard to the plot. I mostly liked the movie for the acting and performances that I saw from Hugh Jackman, Dafne Keen, and the final scene in the forest where I saw a young mutant literally explode a guy with their powers from

In [8]:
s = requests.Session()
        
def get_reviews(url):
    r = requests.get(url)
    movie_id = re.findall(r'(?<=movieId":")(.*)(?=","type)',r.text)[0]

    api_url = f"https://www.rottentomatoes.com/napi/movie/{movie_id}/criticsReviews/all" 
    #use reviews/userfor user reviews
    # use criticsReviews/all for critics reviews
    
    payload = {
        'direction': 'next',
        'endCursor': '',
        'startCursor': '',
    }
    
    review_data = []
    
    while True:
        r = s.get(api_url, params=payload)
        data = r.json()

        if not data['pageInfo']['hasNextPage']:
            break

        payload['endCursor'] = data['pageInfo']['endCursor']
        payload['startCursor'] = data['pageInfo']['startCursor'] if data['pageInfo'].get('startCursor') else ''

        review_data.extend(data['reviews'])
        time.sleep(1)
    
    return review_data

data = get_reviews('https://www.rottentomatoes.com/m/interstellar_2014/reviews')
df = pd.json_normalize(data)

In [9]:
df.sample(15)

Unnamed: 0,creationDate,isFresh,isRotten,isRtUrl,isTop,reviewUrl,quote,reviewId,scoreOri,scoreSentiment,critic.name,critic.criticPictureUrl,critic.vanity,publication.id,publication.name
217,"Nov 6, 2014",False,True,False,False,http://www.u.tv/blogs/B-H-Martin/Sci-Fi-to-mak...,"In space no-one can you hear you scream, but t...",2232109,5/10,NEGATIVE,Brian Henry Martin,http://resizing.flixster.com/0ulshnvLW7gQBLls5...,brian-henry-martin,2445,UTV
262,"Nov 5, 2014",False,True,False,False,http://www.oregonlive.com/movies/index.ssf/201...,It's frustrating because there's so much to lo...,2231877,B-,NEGATIVE,Jeff Baker,https://images.fandango.com/cms/assets/5b6ff50...,jeff-baker,608,Oregonian
264,"Nov 5, 2014",True,False,False,False,http://www.kaplanvskaplan.com/new-releases/int...,There are genuinely spectacular and tension-fi...,2231875,,POSITIVE,David Kaplan,http://resizing.flixster.com/x7K3nQSl3uu6u018k...,david-kaplan,1623,Kaplan vs. Kaplan
68,"Jun 6, 2016",False,True,False,False,http://www.independent.ie/entertainment/movies...,Perhaps it would have been wiser to trim back ...,2331904,3/5,NEGATIVE,George Byrne,https://images.fandango.com/cms/assets/5b6ff50...,george-byrne,1803,The Herald (Ireland)
284,"Nov 4, 2014",True,False,False,True,http://www.reelviews.net/php_review_template.p...,It's an amazing achievement that deserves to b...,2231836,4/4,POSITIVE,James Berardinelli,http://resizing.flixster.com/TVz8uQPRnlgIRTI0x...,james-berardinelli,387,ReelViews
258,"Nov 5, 2014",True,False,False,False,http://www.screenit.com/ourtake/2014/interstel...,An intense epic that seeks to deliver both an ...,2231881,,POSITIVE,Teddy Durgin,https://images.fandango.com/cms/assets/5b6ff50...,teddy-durgin,411,Screen It!
214,"Nov 6, 2014",False,True,False,True,http://www.newstatesman.com/film/2014/11/home-...,If Nolan were as deft a director when the acti...,2232115,,NEGATIVE,Ryan Gilbey,https://images.fandango.com/cms/assets/5b6ff50...,ryan-gilbey,2433,New Statesman
20,"Oct 27, 2019",True,False,False,False,https://www.businessinsider.com/interstellar-r...,The space voyage is easily one of Nolan's most...,2639670,B,POSITIVE,Kirsten Acuna,http://resizing.flixster.com/uhvBxI7H9-B1Fxk4_...,kirsten-acuna,2877,Business Insider
177,"Nov 7, 2014",False,True,False,True,http://www.salon.com/2014/11/05/interstellar_c...,Interstellar is thrilling to watch across its ...,2232555,,NEGATIVE,Andrew O'Hehir,http://resizing.flixster.com/XeNsC9DNh9ypUGXQp...,andrew-ohehir,400,Salon.com
140,"Nov 11, 2014",True,False,False,False,http://www.darkhorizons.com/reviews/1375/inter...,While the ride is a bumpy one and the destinat...,2233038,,POSITIVE,Josh Hylton,https://images.fandango.com/cms/assets/5b6ff50...,josh-hylton,491,Dark Horizons


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=53ef1faa-b7a9-4637-b54c-3b067e58670d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>