# Packages

In [1]:
import pandas as pd
import numpy as np
import requests
from requests import TooManyRedirects
import re
import time

# Scraping Audience Reviews Data from Rotten Tomatoes

In [2]:
s = requests.Session()
        
def get_reviews(url):
    r = requests.get(url)
    movie_id = re.findall(r'(?<=movieId":")(.*)(?=","type)',r.text)[0]

    api_url = f"https://www.rottentomatoes.com/napi/movie/{movie_id}/reviews/user" 
    #use reviews/user for user reviews
    # use criticsReviews/all for critics reviews
    
    payload = {
        'direction': 'next',
        'endCursor': '',
        'startCursor': '',
    }
    
    review_data = []
    
    while True:
        r = s.get(api_url, params=payload)
        data = r.json()

        if not data['pageInfo']['hasNextPage']:
            break

        payload['endCursor'] = data['pageInfo']['endCursor']
        payload['startCursor'] = data['pageInfo']['startCursor'] if data['pageInfo'].get('startCursor') else ''

        review_data.extend(data['reviews'])
        time.sleep(1)
    
    return review_data

# Popular Space Movies from 2010s

## Interstellar 2014

In [3]:
# audience reviews
data = get_reviews('https://www.rottentomatoes.com/m/interstellar_2014/reviews')
interstellar_df = pd.json_normalize(data)
interstellar_df['title'] = 'Interstellar'
interstellar_df.to_csv("audience reviews/interstellar_2014.csv", index=False)
interstellar_df.shape

(12509, 15)

In [4]:
interstellar_df[['title', 'review', 'rating']].sample(15)

Unnamed: 0,title,review,rating
3363,Interstellar,Interstellar is a movie starring Matthew McCon...,4.5
10609,Interstellar,I don't know what bias I might have that would...,5.0
4652,Interstellar,This was the most anticipated movie of the yea...,5.0
6074,Interstellar,Don't quite get the adulation for this film. Y...,2.5
4447,Interstellar,"Great, like the rest of Nolan's films.",4.5
844,Interstellar,While the ending of this film is incredibly st...,4.0
3500,Interstellar,"A quite divisive film, I must say I fall into ...",4.5
7504,Interstellar,Go watch it. Great movie to nerd out on.,5.0
10686,Interstellar,A breathtaking and thrilling film that shows t...,4.5
8086,Interstellar,"Given the grand scope of this film, it has a v...",4.0


## The Martian 2015

In [5]:
# audience reviews
data = get_reviews('https://www.rottentomatoes.com/m/the_martian/reviews')
martian_df = pd.json_normalize(data)
martian_df['title'] = 'The Martian'
martian_df.to_csv("audience reviews/the_martian_2015.csv", index=False)
martian_df.shape

(7105, 15)

In [6]:
martian_df[['title', 'review', 'rating']].sample(15)

Unnamed: 0,title,review,rating
4069,The Martian,Hahaha I just love how he used his own feces t...,5.0
665,The Martian,"Aside from a few inconsistencies, the plot of ...",4.5
1166,The Martian,Terrific inspiring sci-fi. Best PR move that s...,4.5
1310,The Martian,The Martian is such a great space exploration ...,5.0
4773,The Martian,Great movie. It was the right amount of humor ...,5.0
6419,The Martian,Great film wrong Actor :(,4.0
1560,The Martian,smart witty funny intelligent a wonderful film...,5.0
2708,The Martian,"Loved the movie, though I sort of found it too...",4.0
311,The Martian,While I'm giving this a three star rating it's...,3.0
5837,The Martian,Ridley Scott proves once again why he is an ex...,4.5


## Arrival 2016 

In [7]:
# audience reviews
data = get_reviews('https://www.rottentomatoes.com/m/arrival_2016/reviews')
arrival_df = pd.json_normalize(data)
arrival_df['title'] = 'Arrival'
arrival_df.to_csv("audience reviews/arrival_2016.csv", index=False)
arrival_df.shape

(5936, 14)

In [9]:
arrival_df[['title', 'review', 'rating']].sample(15)

Unnamed: 0,title,review,rating
4289,Arrival,Yikes I'm not sure why this thing did so well,2.5
1002,Arrival,"Great film, piece of art",5.0
3008,Arrival,One of the best movies this year.,4.5
3149,Arrival,I'm a sci-fi buff and I was ready to walk out ...,0.5
4557,Arrival,Arrival is a phenomenal film with beautiful ci...,5.0
339,Arrival,Some amazing concepts are there.Very sad.But v...,5.0
5710,Arrival,"Smart, beautiful, and poignant. Arrival is one...",4.5
5764,Arrival,Arrival squanders it's beautiful premise with ...,3.0
4745,Arrival,Arrival is an intelligent work of art that dra...,5.0
5420,Arrival,The way this was a massive and personal story ...,4.5


## Collection of Space Movies Audience Reviews

In [10]:
# all movies
frames = [interstellar_df, martian_df, arrival_df]
all_movies_df = pd.concat(frames)
all_movies_df.to_csv("audience reviews/all_movies.csv", index = False)
all_movies_df[['title', 'review', 'rating']].sample(15)

Unnamed: 0,title,review,rating
427,Arrival,Melhor filme de ficÃ§Ã£o dos Ãºltimos anos!,4.0
3175,Arrival,Films don't get better than this. First contac...,5.0
6614,Interstellar,An amazing movie. Despite it being a 3 hour fi...,4.5
5408,Interstellar,I may need to watch it again when I get my doc...,5.0
2285,Interstellar,"Amazing film, terrible soundtrack.\n\nSo who g...",5.0
4076,Arrival,Awesome! I need to watch it again...,4.0
5287,The Martian,"Wife says its a five, so it's a five. Loved th...",5.0
5414,The Martian,This year's Gravity. Ridley Scott is on his ga...,5.0
5425,The Martian,I loved the book because of the science and th...,3.0
9884,Interstellar,Saw it today in IMAX 70 mm. Mind blowing exper...,5.0
