# Packages

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from requests import TooManyRedirects
import re
import omdb
import time

# API Set-up

In [2]:
api_key = "2857b99a" # Jimmy's key - limit 1000 calls a day
omdb.set_default('apikey', api_key)

# include tomatoes data by default
#omdb.set_default('tomatoes', True)

# Pulling Data from API

In [3]:
# set timeout of 5 seconds for this request
logan_df = pd.DataFrame(omdb.get(title='Logan', year=2017, fullplot=True, tomatoes=True, timeout=5))
logan_df

Unnamed: 0,title,year,rated,released,runtime,genre,director,writer,actors,plot,...,tomato_consensus,tomato_user_meter,tomato_user_rating,tomato_user_reviews,tomato_url,dvd,box_office,production,website,response
0,Logan,2017,R,03 Mar 2017,137 min,"Action, Drama, Sci-Fi",James Mangold,"James Mangold, Scott Frank, Michael Green","Hugh Jackman, Patrick Stewart, Dafne Keen",In 2029 the mutant population has shrunken sig...,...,,,,,https://www.rottentomatoes.com/m/logan_2017,20 Jun 2017,"$226,277,068",,,True
1,Logan,2017,R,03 Mar 2017,137 min,"Action, Drama, Sci-Fi",James Mangold,"James Mangold, Scott Frank, Michael Green","Hugh Jackman, Patrick Stewart, Dafne Keen",In 2029 the mutant population has shrunken sig...,...,,,,,https://www.rottentomatoes.com/m/logan_2017,20 Jun 2017,"$226,277,068",,,True
2,Logan,2017,R,03 Mar 2017,137 min,"Action, Drama, Sci-Fi",James Mangold,"James Mangold, Scott Frank, Michael Green","Hugh Jackman, Patrick Stewart, Dafne Keen",In 2029 the mutant population has shrunken sig...,...,,,,,https://www.rottentomatoes.com/m/logan_2017,20 Jun 2017,"$226,277,068",,,True


In [4]:
logan_df.columns

Index(['title', 'year', 'rated', 'released', 'runtime', 'genre', 'director',
       'writer', 'actors', 'plot', 'language', 'country', 'awards', 'poster',
       'ratings', 'metascore', 'imdb_rating', 'imdb_votes', 'imdb_id', 'type',
       'tomato_meter', 'tomato_image', 'tomato_rating', 'tomato_reviews',
       'tomato_fresh', 'tomato_rotten', 'tomato_consensus',
       'tomato_user_meter', 'tomato_user_rating', 'tomato_user_reviews',
       'tomato_url', 'dvd', 'box_office', 'production', 'website', 'response'],
      dtype='object')

In [5]:
logan_df['tomato_user_reviews']

0    N/A
1    N/A
2    N/A
Name: tomato_user_reviews, dtype: object

# Scraping the Data from Rotten Tomatoes

In [6]:
def make_soup(url):
    try:
        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'html.parser')
    except TooManyRedirects:
        soup = ''
    return soup

In [7]:
soup = make_soup("https://www.rottentomatoes.com/m/logan_2017/reviews?type=user")
re.findall(r'\"review-text\">([^<]*)<\/p>', str(soup.find(class_ ="audience-reviews")))

['The Wolverine we should of had this whole time. I loved the fact that we were able to get an R rated Wolverine movie. The storyline was awesome and we had a good enclosure to the end of an era. \n\nThank You!!!',
 'filmaço.\n\num filme que meche com as emoções, surpreendente oque o logan passa e como ele está no filme, muito inteligente também a forma que a trama foi feita e introduzida.\nmuito bom',
 'The best x-men movie, the best marvel movie, the best movie',
 'sad but very good movie overall',
 'One of the best CBM movies, great acting, action, story, vfx, emotional',
 'Great finale to the Logan – Wolverine story\nThe violence and gore was top notch\nWolverine is a legend',
 "For certain segments of the film, some things felt pretty slow and pointless with regard to the plot. I mostly liked the movie for the acting and performances that I saw from Hugh Jackman, Dafne Keen, and the final scene in the forest where I saw a young mutant literally explode a guy with their powers from

In [8]:
s = requests.Session()
        
def get_reviews(url):
    r = requests.get(url)
    movie_id = re.findall(r'(?<=movieId":")(.*)(?=","type)',r.text)[0]

    api_url = f"https://www.rottentomatoes.com/napi/movie/{movie_id}/criticsReviews/all" 
    #use reviews/userfor user reviews
    # use criticsReviews/all for critics reviews
    
    payload = {
        'direction': 'next',
        'endCursor': '',
        'startCursor': '',
    }
    
    review_data = []
    
    while True:
        r = s.get(api_url, params=payload)
        data = r.json()

        if not data['pageInfo']['hasNextPage']:
            break

        payload['endCursor'] = data['pageInfo']['endCursor']
        payload['startCursor'] = data['pageInfo']['startCursor'] if data['pageInfo'].get('startCursor') else ''

        review_data.extend(data['reviews'])
        time.sleep(1)
    
    return review_data

# movie from 2016-2018 url
data = get_reviews('https://www.rottentomatoes.com/m/arrival_2016/reviews')
df = pd.json_normalize(data)
df.shape

(420, 15)

In [9]:
df.sample(15)

Unnamed: 0,creationDate,isFresh,isRotten,isRtUrl,isTop,reviewUrl,quote,reviewId,scoreOri,scoreSentiment,critic.name,critic.criticPictureUrl,critic.vanity,publication.id,publication.name
30,"Aug 20, 2019",True,False,False,False,https://www.espinof.com/criticas/la-llegada-od...,"Implausibilities aside, the development of the...",2617463,,POSITIVE,Jorge Loser,https://images.fandango.com/cms/assets/5b6ff50...,jorge-loser,2621,Espinof
7,"Apr 6, 2021",True,False,False,False,https://www.firstpost.com/entertainment/the-pr...,"True to its theme, Arrival itself seems like a...",2782384,,POSITIVE,Rahul Desai,http://resizing.flixster.com/Mj70CflEwV9U1OLCJ...,rahul-desai,2784,Firstpost
347,"Nov 9, 2016",True,False,False,False,http://themovieminute.com/Arrival.html,"Elegant and ambitious, this sci-fi mystery is ...",2361064,,POSITIVE,Joanna Langfield,http://resizing.flixster.com/8VzzG31m0z40eh9rw...,joanna-langfield,2201,The Movie Minute
378,"Nov 7, 2016",True,False,False,True,http://lwlies.com/reviews/arrival/,Given that personal trauma is the prevailing t...,2360723,3/5,POSITIVE,Adam Woodward,https://images.fandango.com/cms/assets/5b6ff50...,adam-woodward,1822,Little White Lies
409,"Sep 12, 2016",True,False,False,False,http://www.theverge.com/2016/9/10/12870314/arr...,The extraordinary success of Arrival is that i...,2350175,,POSITIVE,Bryan Bishop,http://resizing.flixster.com/ntt4fYk5DMf1tDPOj...,bryan-bishop,2525,The Verge
229,"Nov 11, 2016",True,False,False,True,http://www.mtv.com/news/author/nicholsona/,Learning Heptapodese seems easy compared to th...,2361720,B+,POSITIVE,Amy Nicholson,http://resizing.flixster.com/NqxMYD6-eGlrupjRq...,amy-nicholson,1372,MTV
381,"Nov 5, 2016",True,False,False,False,http://www.urbancinefile.com.au/home/view.asp?...,"In a breathtaking performance, Amy Adams groun...",2360634,,POSITIVE,Louise Keller,http://resizing.flixster.com/5_yfDRypLjWlCsGhR...,louise-keller,462,Urban Cinefile
107,"Jun 28, 2017",True,False,False,True,http://www.refinery29.com/2016/11/128299/arriv...,Arrival isn't exactly your typical alien movie...,2402260,,POSITIVE,Anne Cohen,https://images.fandango.com/cms/assets/5b6ff50...,anne-cohen,2859,Refinery29
88,"Jan 27, 2018",True,False,False,False,https://www.youtube.com/watch?v=ja9Yffx0EEI&in...,Arrival is one of the most intriguing movies I...,2453533,,POSITIVE,Jeremy Jahns,https://images.fandango.com/cms/assets/5b6ff50...,jeremy-jahns,2955,JeremyJahns.com
323,"Nov 10, 2016",True,False,False,True,http://www.azcentral.com/story/entertainment/m...,"[A] terrific, haunting film that plays with ou...",2361223,4.5/5,POSITIVE,Bill Goodykoontz,http://resizing.flixster.com/5CcULoGGAqPYWYmVt...,bill-goodykoontz,642,Arizona Republic


In [10]:
df[['quote', 'scoreSentiment']]

Unnamed: 0,quote,scoreSentiment
0,"Both cerebral and achingly emotional, Arrival ...",POSITIVE
1,"One of the most beautiful, emotional and origi...",POSITIVE
2,The best film of 2016. A motion picture that t...,POSITIVE
3,Arrival just might be the overall best movie o...,POSITIVE
4,Hypnotic and strange and beautiful - have I sa...,POSITIVE
...,...,...
415,A film that forces viewers to reconsider that ...,POSITIVE
416,A cerebral and emotionally resonant examinatio...,POSITIVE
417,This is hard science fiction made by an artist...,POSITIVE
418,It's heartening to encounter a science fiction...,POSITIVE


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=53ef1faa-b7a9-4637-b54c-3b067e58670d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>