In [2]:
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import os
import re
import requests
import time
import datetime
pd.set_option('max_rows',1000)
pd.set_option('display.max_colwidth', None)

In [3]:
def MatchDescr(x):
    
    '''Docstring'''
    
    try: 
        match = re.search("<span class=\"media-list__consensus-text\">\\n.*?\\n", str(x)).group(0)
        descr = re.search("(?<=>\\n)(.*)",str(match)).group(0).strip()
        return descr
    except:
        return "No Description"

def MatchTitle(x):
    
    '''Docstring'''
    
    match = re.search(r"<a href=\"/m.*?\">", str(x)).group(0)
    title = re.search(r"(?<=m/)(.*)(?=\">)", str(x)).group(0)
    
    return title

In [11]:
def GrabPage(page):

    '''Comment /Docstring'''
    
    if (page.lower()=='opening'):
        print('fetching...')
    
        response = requests.get(f"https://www.rottentomatoes.com/browse/{page}")
        content = response.content
        
    elif (page.lower()=='upcoming'):
        print('fetching...')
        
        driver = webdriver.Firefox(executable_path=os.getenv('seleniumDriver'))
        driver.get("https://www.rottentomatoes.com/browse/upcoming")
        time.sleep(2)  # Allow 2 seconds for the web page to open
        scroll_pause_time = 2 
        screen_height = driver.execute_script("return window.screen.height;")   # get the screen height of the web
        i = 1

        while True:
            # scroll one screen height each time
            driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))  
            i += 1
            time.sleep(scroll_pause_time)
            # update scroll height each time after scrolled, as the scroll height can change after we scrolled the page
            scroll_height = driver.execute_script("return document.body.scrollHeight;")  
            # Break the loop when the height we need to scroll to is larger than the total scroll height
            if (screen_height) * i > scroll_height:
                break
        print("Done")
        content = driver.page_source
        
    else:
        print(f"{page} not supported")
        return None
        
    soup = BeautifulSoup(content, "html.parser")
    title = soup.find_all("div",attrs={"class": "media-list__movie-info"})
    #Some Movies are without tomato meter
    #score = soup.find_all("span", attrs={"class": "tMeterScore"})
    title_df = pd.DataFrame({"movie_info":title})
    #print(title_df.info())
    title_df["title"] = title_df.movie_info.apply(lambda x: MatchTitle(x))
    title_df["descr"] = title_df.movie_info.apply(lambda x: MatchDescr(x))

    return title_df.drop('movie_info', 1)


In [5]:
def GrabReview(title):
    
    '''Docstring'''
    
    try:
        review_response = requests.get(f"https://www.rottentomatoes.com/m/{title}/reviews")
        review_content = review_response.content
        soup = BeautifulSoup(review_content, "html.parser")
        reviews = soup.find_all("div", attrs={"class":"the_review"})
        review_df = pd.DataFrame ({"reviews":reviews})
        review_df.reviews = review_df.reviews.apply(lambda x: re.sub(r"<.*?>|[^a-zA-Z0-9 ]+",'',str(x)).strip())
        return review_df.reviews.tolist()
    ### No Reviews Yet
    except:
        return None

In [6]:
def MovieReview(title_df):
    
    '''Docstring'''
    
    tz = datetime.datetime.now()
    print("On %d/%d/%d as at %d:%d WAT\n"%(tz.day, tz.month, tz.year, tz.hour, tz.minute))
    review_df = pd.DataFrame(columns=['title', 'reviews'])
    for title in title_df.title:
        reviews = GrabReview(title)
        for i in range(len(reviews)):
            if i%100 == 0:
                print(f"Sample Review:\n{title}: {reviews[i]}\n\n")
            review_df = review_df.append({'title':title.replace('_',' '), 'reviews':reviews[i]}, ignore_index=True)
    return review_df

In [9]:
opening_movies = GrabPage('opening')

fetching...


In [12]:
upcoming_movies = GrabPage('upcoming')

fetching...
Done


In [14]:
openingM_review = MovieReview(opening_movies)

On 6/7/2021 as at 11:4 WAT
Sample Review:
black_widow_2021: Compared to the clunking overblown efforts in some previous Avengers films Black Widow has shape coherence and emotional heft thanks to Australian director Cate Shortland


Sample Review:
the_nest_2021: Very effective tension with a long slow creep up to a truly chilling ending


Sample Review:
the_price_of_freedom_2021: What makes The Price of Freedom a film that should be on your mustsee list is the way Ehrlich presents all of this


Sample Review:
the_woman_who_ran: A funny wellwritten script and excellent performances from the central cast


Sample Review:
dachra: a strong debut artful and eerie


Sample Review:
meander: MEANDER is a highstakes tension filled ride carried by Gaia Weiss performance that will have you on the edge of your seat


Sample Review:
summertime_2021: An intoxicating conjunction of themes and cultures whose passion you can feel in every rhyme and every note Full review in Spanish


Sample Review:
the

In [15]:
upcomingM_review = MovieReview(upcoming_movies)

On 6/7/2021 as at 11:7 WAT
Sample Review:
escape_room_tournament_of_champions: It really is more of the same just a bit bigger and shinier and  to my eye anyway  less gruesome than the original


Sample Review:
roadrunner_a_film_about_anthony_bourdain: The film is an unflinching look at Bourdain and is not hesitant to explore facets of his life and personality while also maintaining his humanity


Sample Review:
great_white: It has fun playing with the defying laws of logic on occasion but never enough to dismiss its overall substance as a survivalist effort that works with its limitations


Sample Review:
the_sleepless_unrest: Lets just say that what these folks saw and experienced was intriguing just interesting enough occasionally not often which makes the documentary dull to make you go Damn What did THAT


Sample Review:
mama_weed: Hupperts arresting charisma rules in this daring Economic Crisis Cinema gem Flaunting a subversive femalecentric literally undercover fashion statement

# Sentiment Analysis

In [16]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [17]:
sid = SentimentIntensityAnalyzer()

In [18]:
def get_sentiments(df):
    
    movie_sentiment = pd.DataFrame(columns=['title', 'sentiment', 'reviews'])
    for title in (df.title.unique()):
        #print(title)
        review =", ".join(df[df.title==title]['reviews'].tolist())
        movie_sentiment = movie_sentiment.append({'title':title, 'reviews':review, 'sentiment':None}, ignore_index=True)
        #print(movie_sentiment.head())
        
        
    movie_sentiment['sentiment'] = movie_sentiment.reviews.apply(lambda x: sid.polarity_scores(x)['compound'])
    df['compound_score'] = df.reviews.apply(lambda x: sid.polarity_scores(x)['compound'])
    
        
    return df, movie_sentiment.drop(['reviews'], 1).sort_values(by='sentiment', ascending=False).reset_index(drop=True)

In [19]:
op_score, op_sent = get_sentiments(openingM_review)

In [20]:
up_score, up_sent = get_sentiments(upcomingM_review)

What movies should you be looking to watch

In [21]:
op_sent

Unnamed: 0,title,sentiment
0,summertime 2021,0.9979
1,the woman who ran,0.9972
2,black widow 2021,0.9963
3,scales,0.9661
4,the loneliest whale the search for 52,0.4497
5,the nest 2021,-0.022
6,the price of freedom 2021,-0.1027
7,meander,-0.8074
8,dachra,-0.9714


In [22]:
up_sent

Unnamed: 0,title,sentiment
0,nine days,0.9981
1,ailey,0.9976
2,how it ends 2021,0.9974
3,no ordinary man,0.9969
4,the comeback trail 2020,0.9926
5,joe bell,0.9926
6,roadrunner a film about anthony bourdain,0.9906
7,great white,0.99
8,mandibles,0.9834
9,escape room tournament of champions,0.8627


Each review sentiment

In [23]:
op_score

Unnamed: 0,title,reviews,compound_score
0,black widow 2021,Compared to the clunking overblown efforts in some previous Avengers films Black Widow has shape coherence and emotional heft thanks to Australian director Cate Shortland,0.5423
1,black widow 2021,Especially as relates to its genrespecific weaknesses Black Widow is all too familiar,-0.3612
2,black widow 2021,The action scenes those scenes are fine especially the prison break but they will likely leave you waiting a little impatiently for the next moments with the family,-0.6485
3,black widow 2021,Black Widow is worth a view However it wouldve been better had the movie come out after Civil War or even after Avengers Infinity War,-0.6124
4,black widow 2021,Black Widow is not a bad film nevertheless it feels like a wasted opportunity,0.6059
5,black widow 2021,A lethargic clunky and overlong bore thats low on thrills suspense and excitement,0.1027
6,black widow 2021,Surprising exactly no one Florence Pugh steals the show,-0.5267
7,black widow 2021,Florence Pugh delivers a terrific performance in a fun if inconsequential entry into Hollywoods most successful franchise,0.8883
8,black widow 2021,The films truer purpose seems to serve as a setup for the next Black Widow character Pughs Yelena Bolava wholl be taking over the franchise but that couldve been done in many ways other than this approach,0.1901
9,black widow 2021,A not your average Marvel comic book action movie,-0.3252


In [24]:
up_score

Unnamed: 0,title,reviews,compound_score
0,escape room tournament of champions,It really is more of the same just a bit bigger and shinier and to my eye anyway less gruesome than the original,0.3182
1,escape room tournament of champions,Theres just enough cheap thrills and genuine fascination to render this excursion worthy,0.8078
2,roadrunner a film about anthony bourdain,The film is an unflinching look at Bourdain and is not hesitant to explore facets of his life and personality while also maintaining his humanity,0.1877
3,roadrunner a film about anthony bourdain,A testimonial to the mans strengths flaws and frailties,0.4019
4,roadrunner a film about anthony bourdain,Watching Roadrunner feels like engaging in a kind of collective mourning a desperate bid to understand a man who meant so much to so many even if we never met him,-0.0772
5,roadrunner a film about anthony bourdain,for those who admired Bourdain his absence is still felt as acutely as a searing burn from working the stove,0.5106
6,roadrunner a film about anthony bourdain,Morgan Neville captures the complicated intense life of a man who was mentally tortured and globally adored with a beautiful cinematic landscape as vast and varied as the subjet himself,0.5267
7,roadrunner a film about anthony bourdain,Roadrunner is a wonderfully engaging and revealing biography that may alter the way you perceive Bourdain for better and worse,0.7269
8,roadrunner a film about anthony bourdain,An entertaining representative and very fair representation of Bourdains life even if it doesnt introduce a ton of information that wont already be familiar to Bourdain diehards,0.6697
9,roadrunner a film about anthony bourdain,The conversations and confessions of Bourdains closest kin propel the story forward,0.0
